diff --git a/.gitignore b/.gitignore index 6060111..cfd4657 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,6 @@ examples/.ipynb_checkpoints # Node node_modules package-lock.json + +# worktrees +worktrees diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9a0946a..603fd0d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,13 +4,13 @@ default_install_hook_types: repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.0 + rev: v0.15.19 hooks: - id: ruff args: ["--fix", "--exit-non-zero-on-fix"] - id: ruff-format - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: check-added-large-files - id: check-case-conflict @@ -21,18 +21,18 @@ repos: - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook - rev: v9.18.0 + rev: v9.26.0 hooks: - id: commitlint stages: - commit-msg additional_dependencies: ['conventional-changelog-conventionalcommits'] - repo: https://github.com/pycqa/isort - rev: 5.13.2 + rev: 9.0.0a3 hooks: - id: isort - repo: https://github.com/codespell-project/codespell - rev: v2.3.0 + rev: v2.4.2 hooks: - id: codespell args: ["-x", ".codespellignorelines"] @@ -42,12 +42,12 @@ repos: - id: pydocstyle additional_dependencies: ["tomli"] - repo: https://github.com/PyCQA/flake8 - rev: 7.1.1 + rev: 7.3.0 hooks: - id: flake8 additional_dependencies: ['flake8-pyproject'] - repo: https://github.com/arkinmodi/add-license-header - rev: v2.2.0 + rev: v2.4.1 hooks: - id: add-license-header args: diff --git a/README.md b/README.md index 7601aa4..8c4a56a 100644 --- a/README.md +++ b/README.md @@ -114,9 +114,9 @@ The previous code will output the following dictionary: { 'file_1.c': ( 'The files \'reference_dir/file_1.c\' and \'compared_dir/file_1.c\' are different:\n' - 'Added the value(s) \'{"2": 0}\' in the \'[b]\' key.\n' - 'Changed the value of \'[a]\' from 1 to 2.\n' - 'Changed the value of \'[b][0]\' from 1 to 10.' + 'Added value at [\'b\'][2]: 0.\n' + 'Changed value at [\'a\']: 1 -> 2.\n' + 'Changed value at [\'b\'][0]: 1 -> 10.' ) } ``` @@ -136,9 +136,9 @@ Which will output the following ``AssertionError``: ```bash AssertionError: The files 'reference_dir/file_1.c' and 'compared_dir/file_1.c' are different: -Added the value(s) '{"2": 0}' in the '[b]' key. -Changed the value of '[a]' from 1 to 2. -Changed the value of '[b][0]' from 1 to 10. +Added value at ['b'][2]: 0. +Changed value at ['a']: 1 -> 2. +Changed value at ['b'][0]: 1 -> 10. ``` #### Advanced Configuration Options @@ -169,6 +169,21 @@ dir_content_diff.assert_equal_trees( ) ``` +Dictionary-based comparators such as JSON, YAML, XML and INI use keyword arguments for +their comparison options. The project-specific `tolerance` and `absolute_tolerance` +arguments control numeric comparisons. Useful DeepDiff options can also be passed as +keywords, for example `exclude_paths`, `exclude_regex_paths`, `include_paths`, +`ignore_order`, `ignore_string_case` and `max_diffs`. + +```python +specific_args = { + "sub_dir_1/sub_file_1.a": { + "exclude_paths": {"root['metadata']['timestamp']"}, + "ignore_order": True, + } +} +``` + Each comparator has different arguments that are detailed in the documentation. ##### File-specific Comparators diff --git a/dir_content_diff/base_comparators.py b/dir_content_diff/base_comparators.py index 998fc6c..abb670e 100644 --- a/dir_content_diff/base_comparators.py +++ b/dir_content_diff/base_comparators.py @@ -12,26 +12,57 @@ import configparser import filecmp import json +import math import re from abc import ABC from abc import abstractmethod from pathlib import Path from xml.etree import ElementTree -import dictdiffer import diff_pdf_visually import jsonpath_ng import yaml +from deepdiff import DeepDiff +from deepdiff.operator import BaseOperator from dicttoxml import dicttoxml from diff_pdf_visually import pdfdiff_pages from dir_content_diff.util import diff_msg_formatter -_ACTION_MAPPING = { - "add": "Added the value(s) '{value}' in the '{key}' key.", - "change": "Changed the value of '{key}' from {value[0]} to {value[1]}.", - "remove": "Removed the value(s) '{value}' from '{key}' key.", -} + +class _NumericToleranceOperator(BaseOperator): + """Numeric comparison operator implementing the existing tolerance kwargs.""" + + def __init__(self, tolerance, absolute_tolerance): + super().__init__() + self.tolerance = tolerance + self.absolute_tolerance = absolute_tolerance + + def match(self, level): + """Return whether both compared values are numeric.""" + return ( + isinstance(level.t1, (int, float)) + and isinstance(level.t2, (int, float)) + and not isinstance(level.t1, bool) + and not isinstance(level.t2, bool) + ) + + def give_up_diffing(self, level, diff_instance): + """Return whether two numeric values should be treated as equal.""" + first_is_nan = bool(level.t1 != level.t1) + second_is_nan = bool(level.t2 != level.t2) + if first_is_nan or second_is_nan: + return first_is_nan and second_is_nan + return math.isclose( + level.t1, + level.t2, + rel_tol=self.tolerance or 0, + abs_tol=self.absolute_tolerance or 0, + ) + + def normalize_value_for_hashing(self, parent, obj): + """Return unmodified values when set items are hashed.""" + return obj class BaseComparator(ABC): @@ -231,25 +262,24 @@ def __call__( else: filtered_diffs = self.filter(diffs, **filter_kwargs) if hasattr(filtered_diffs, "items"): + sorted_diffs = self.sort( + [ + self.format_diff(i, **format_diff_kwargs) + for i in filtered_diffs.items() + ], + **sort_kwargs, + ) formatted_diffs = self.concatenate( - self.sort( - [ - self.format_diff(i, **format_diff_kwargs) - for i in filtered_diffs.items() - ], - **sort_kwargs, - ), + sorted_diffs, **concat_kwargs, ) else: + sorted_diffs = self.sort( + [self.format_diff(i, **format_diff_kwargs) for i in filtered_diffs], + **sort_kwargs, + ) formatted_diffs = self.concatenate( - self.sort( - [ - self.format_diff(i, **format_diff_kwargs) - for i in filtered_diffs - ], - **sort_kwargs, - ), + sorted_diffs, **concat_kwargs, ) @@ -302,10 +332,20 @@ def diff(self, ref, comp, *args, **kwargs): class DictComparator(BaseComparator): """Comparator for dictionaries.""" + _MISSING_VALUE = object() + _DIFF_ACTION_CATEGORIES = { + "dictionary_item_added": "add", + "iterable_item_added": "add", + "dictionary_item_removed": "remove", + "iterable_item_removed": "remove", + "type_changes": "change", + "values_changed": "change", + } + _ACTION_MAPPING = { - "add": "Added the value(s) '{value}' in the '{key}' key.", - "change": "Changed the value of '{key}' from {value[0]} to {value[1]}.", - "remove": "Removed the value(s) '{value}' from '{key}' key.", + "add": "Added value at {key}: {value}.", + "change": "Changed value at {key}: {value[0]} -> {value[1]}.", + "remove": "Removed value at {key}: {value}.", "missing_ref_entry": ( "The path '{key}' is missing in the reference dictionary, please fix the " "'replace_pattern' argument." @@ -316,39 +356,87 @@ class DictComparator(BaseComparator): ), } - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._format_mapping = { - "add": self._format_add_value, - "remove": self._format_remove_value, - "change": self._format_change_value, - } - @staticmethod - def _format_key(key): - if isinstance(key, str): - key = key.split(".") - if key == [""]: - key = [] - return "".join(f"[{k}]" for k in key) + def _format_report_value(value): + try: + return json.dumps(value, default=str, sort_keys=True) + except TypeError: + return json.dumps(value, default=str) @staticmethod - def _format_add_value(value): - return json.dumps(dict(sorted(value))) + def _format_report_path(path): + """Format a path for human-readable reports.""" + if isinstance(path, str) and path.startswith("root["): + path = path[4:] + return path - @staticmethod - def _format_remove_value(value): - return json.dumps(dict(sorted(value))) + @classmethod + def _format_action(cls, action, path, value): + """Format a normalized diff action.""" + formatted_path = cls._format_report_path(path) + if action == "change": + old_value, new_value = value + if old_value is cls._MISSING_VALUE: + return f"{formatted_path}: {cls._format_report_value(new_value)}" + return ( + f"Changed value at {formatted_path}: " + f"{cls._format_report_value(old_value)} -> " + f"{cls._format_report_value(new_value)}." + ) + if action == "add": + return ( + f"Added value at {formatted_path}: {cls._format_report_value(value)}." + ) + if action == "remove": + return ( + f"Removed value at {formatted_path}: {cls._format_report_value(value)}." + ) + raise ValueError(f"Unexpected dictionary diff action: {action!r}") + + @classmethod + def _value_change_values(cls, values): + """Return explicit old/new values from a DeepDiff value-change item.""" + if hasattr(values, "items") and "old_value" in values and "new_value" in values: + return values["old_value"], values["new_value"] + return cls._MISSING_VALUE, values + + @classmethod + def _iter_deepdiff_actions(cls, category, value): + """Yield normalized action tuples for known DeepDiff report categories.""" + action = cls._DIFF_ACTION_CATEGORIES.get(category) + if action is None or not hasattr(value, "items"): + return + + for path, diff_value in value.items(): + if action == "change": + diff_value = cls._value_change_values(diff_value) + yield action, path, diff_value + + @classmethod + def _format_deepdiff_category(cls, category, value): + """Format a known grouped DeepDiff category.""" + return "\n".join( + cls._format_action(action, path, diff_value) + for action, path, diff_value in cls._iter_deepdiff_actions(category, value) + ) - @staticmethod - def _format_change_value(value): - value = list(value) - for num, i in enumerate(value): - if isinstance(i, str): - value[num] = f"'{i}'" + def filter(self, differences, **kwargs): + """Expand grouped categories into formatted diff elements.""" + filtered_differences = super().filter(differences, **kwargs) + expanded_differences = [] + for category, value in filtered_differences.items(): + if category == "format_errors": + expanded_differences.extend( + (category, action, key, error_value) + for action, key, error_value in value + ) + elif category in self._DIFF_ACTION_CATEGORIES and hasattr(value, "items"): + expanded_differences.extend( + self._iter_deepdiff_actions(category, value) + ) else: - value[num] = str(i) - return value + expanded_differences.append((category, value)) + return expanded_differences def format_data(self, data, ref=None, replace_pattern=None, **kwargs): """Format the loaded data.""" @@ -388,38 +476,77 @@ def format_data(self, data, ref=None, replace_pattern=None, **kwargs): ) return data - def diff(self, ref, comp, *args, **kwargs): + def diff( + self, + ref, + comp, + *args, + tolerance=None, + absolute_tolerance=None, + custom_operators=None, + **kwargs, + ): """Compare 2 dictionaries. - This function calls :func:`dictdiffer.diff` to compare the dictionaries, read the doc of - this function for details on args and kwargs. + This function compares dictionaries and returns a machine-readable diff report. Keyword Args: tolerance (float): Relative threshold to consider when comparing two float numbers. absolute_tolerance (float): Absolute threshold to consider when comparing two float numbers. - ignore (set[list]): Set of keys that should not be checked. - path_limit (list[str]): List of path limit tuples or :class:`dictdiffer.utils.PathLimit` - object to limit the diff recursion depth. + custom_operators (list): Additional custom operators passed to DeepDiff. + **kwargs: Additional keyword arguments are passed to :class:`deepdiff.diff.DeepDiff`. """ + if args: + raise TypeError( + "DictComparator.diff does not accept positional comparison " + "arguments. Use keyword arguments instead, for example " + "'tolerance', 'absolute_tolerance', 'exclude_paths', " + "'exclude_regex_paths', 'include_paths', or 'ignore_order'." + ) errors = self.current_state.get("format_errors", []) + custom_operators = list(custom_operators or []) + custom_operators.insert( + 0, _NumericToleranceOperator(tolerance, absolute_tolerance) + ) - if len(args) > 5: - dot_notation = args[5] - args = args[:5] + args[6:] - else: - dot_notation = kwargs.pop("dot_notation", False) - kwargs["dot_notation"] = dot_notation - errors.extend(list(dictdiffer.diff(ref, comp, *args, **kwargs))) - return errors + kwargs.setdefault("verbose_level", 2) + kwargs.setdefault( + "threshold_to_diff_deeper", 0 + ) # Expand all differences, even if they are small + kwargs.setdefault("zip_ordered_iterables", True) + kwargs.setdefault("ignore_nan_inequality", True) + kwargs.setdefault("ignore_private_variables", False) + + report = DeepDiff( + ref, + comp, + custom_operators=custom_operators, + **kwargs, + ).to_dict() + if errors: + report["format_errors"] = errors + return report def format_diff(self, difference): """Format one element difference.""" - action, key, value = difference - return self._ACTION_MAPPING[action].format( - key=self._format_key(key), - value=self._format_mapping[action](value), - ) + if len(difference) == 4 and difference[0] == "format_errors": + _, action, key, error_value = difference + return self._ACTION_MAPPING[action].format(key=key, value=error_value) + + if len(difference) == 3 and difference[0] in {"add", "change", "remove"}: + action, path, value = difference + return self._format_action(action, path, value) + + category, value = difference + if category == "format_errors": + return "\n".join( + self._ACTION_MAPPING[action].format(key=key, value=error_value) + for action, key, error_value in value + ) + if category in self._DIFF_ACTION_CATEGORIES and hasattr(value, "items"): + return self._format_deepdiff_category(category, value) + return f"{category}: {self._format_report_value(value)}" class JsonComparator(DictComparator): diff --git a/dir_content_diff/core.py b/dir_content_diff/core.py index 4ddb8a1..71ef04f 100644 --- a/dir_content_diff/core.py +++ b/dir_content_diff/core.py @@ -412,7 +412,7 @@ def compare_trees( _, result = _compare_single_file( ref_file, comp_path, relative_path, config, formatted_data_path ) - if result is not False: + if result: different_files[relative_path] = result except Exception as exc: # pragma: no cover LOGGER.error("File comparison failed for %s: %s", relative_path, exc) diff --git a/docs/source/conf.py b/docs/source/conf.py index cd36bdb..e40d7bc 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -88,7 +88,7 @@ } intersphinx_mapping = { - "dictdiffer": ("https://dictdiffer.readthedocs.io/en/latest/", None), + "deepdiff": ("https://zepworks.com/deepdiff/current/", None), "morph_tool": ("https://morph-tool.readthedocs.io/en/latest/", None), "morphio": ("https://morphio.readthedocs.io/en/latest/", None), "numpy": ("https://numpy.org/doc/stable/", None), diff --git a/pyproject.toml b/pyproject.toml index 158f953..1d5e875 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ dynamic = ["version", "optional-dependencies"] dependencies = [ "attrs>=21.3.0", "click>=8", - "dictdiffer>=0.8", + "deepdiff>=8.6", "dicttoxml>=1.7.12", "diff_pdf_visually>=1.7", "jsonpath-ng>=1.5", @@ -91,3 +91,9 @@ testpaths = [ markers = [ "comparators_missing_deps: marks tests for missing dependencies", ] + +[dependency-groups] +dev = [ + "ipython>=8.39.0", + "pdbpp>=0.12.1", +] diff --git a/tests/conftest.py b/tests/conftest.py index 9cf1f1e..533da30 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -94,47 +94,37 @@ def dict_diff(): """The diff that should be reported for the JSON and YAML files.""" diff = ( r"""The files '\S*' and '\S*' are different:\n""" - r"""Added the value\(s\) '{"__dict_key_2__": \[1, 2, 3\], "__dict_key_3__": \[1, 2, 3\], """ - r""""__dict_key_4__": \[1, 2, 3\]}' in the '\[simple_dict\]' key\.\n""" - r"""Added the value\(s\) '{"__nested_dict_key_2__": "nested_dict_val_2", """ - r""""nested_dict_key_1": "__nested_dict_val_1__"}' in the '\[nested_dict\]""" - r"""\[sub_nested_dict\]' key\.\n""" - r"""Added the value\(s\) '{"__nested_dict_key_2__": "nested_dict_val_2"}' in the """ - r"""'\[nested_list\]\[3\]\[1\]' key\.\n""" - r"""Added the value\(s\) '{"simple_list_test": \[\["dict_key_1", \[1, 4, 3\]\], """ - r"""\["__dict_key_2__", \[1, 2, 3\]\]\]}' in the '' key\.\n""" - r"""Changed the value of '\[int_value\]' from 1 to 2\.\n""" - r"""Changed the value of '\[nested_dict\]\[dict_key\]\[1\]' from 2 to 4\.\n""" - r"""Changed the value of '\[nested_dict\]\[sub_nested_dict\]\[nested_list_key\]""" - r"""\[0\]' from 1 to 2\.\n""" - r"""Changed the value of '\[nested_dict\]\[sub_nested_dict\]\[nested_list_key\]""" - r"""\[1\]' from 2.5 to 2.50001\.\n""" - r"""Changed the value of '\[nested_dict\]\[sub_nested_dict\]\[nested_list_key\]""" - r"""\[2\]' from 'str_val' to '__str_val__'\.\n""" - r"""Changed the value of '\[nested_list\]\[0\]' from 1 to 2\.\n""" - r"""Changed the value of '\[nested_list\]\[1\]' from 2.5 to 2.50001\.\n""" - r"""Changed the value of '\[nested_list\]\[2\]' from 'str_val' to '__str_val__'\.\n""" - r"""Changed the value of '\[nested_list\]\[3\]\[0\]' from 'nested_list_val' to """ - r"""'__nested_list_val__'\.\n""" - r"""Changed the value of '\[nested_list\]\[3\]\[1\]\[nested_dict_key_1\]' from """ - r"""'nested_dict_val_1' to '__nested_dict_val_1__'\.\n""" - r"""Changed the value of '\[nested_list\]\[3\]\[1\]\[nested_list_key\]\[0\]' from 1 """ - r"""to 2\.\n""" - r"""Changed the value of '\[nested_list\]\[3\]\[1\]\[nested_list_key\]\[1\]' from """ - r"""2.5 to 2.50001\.\n""" - r"""Changed the value of '\[nested_list\]\[3\]\[1\]\[nested_list_key\]\[2\]' from """ - r"""'str_val' to '__str_val__'\.\n""" - r"""Changed the value of '\[simple_dict\]\[dict_key_1\]\[1\]' from 2 to 4\.\n""" - r"""Changed the value of '\[simple_list\]\[0\]' from 1 to 2\.\n""" - r"""Changed the value of '\[simple_list\]\[1\]' from 2.5 to 2.50001\.\n""" - r"""Changed the value of '\[simple_list\]\[2\]' from 'str_val' to '__str_val__'\.\n""" - r"""Removed the value\(s\) '{"dict_key_2": \[1, 2, 3\]}' from '\[simple_dict\]' """ - r"""key\.\n""" - r"""Removed the value\(s\) '{"nested_dict_key": "nested_dict_val"}' from '\[""" - r"""nested_dict\]\[sub_nested_dict\]' key\.\n""" - r"""Removed the value\(s\) '{"nested_dict_key_2": "nested_dict_val_2"}' from """ - r"""'\[nested_list\]\[3\]\[1\]' key\.\n""" - r"""Removed the value\(s\) '{"nested_dict_test": 0}' from '' key\.""" + r"""Added value at \['nested_dict'\]\['sub_nested_dict'\]\['__nested_dict_key_2__'\]: "nested_dict_val_2"\.\n""" + r"""Added value at \['nested_dict'\]\['sub_nested_dict'\]\['nested_dict_key_1'\]: "__nested_dict_val_1__"\.\n""" + r"""Added value at \['nested_list'\]\[3\]\[1\]\['__nested_dict_key_2__'\]: "nested_dict_val_2"\.\n""" + r"""Added value at \['simple_dict'\]\['__dict_key_2__'\]: \[1, 2, 3\]\.\n""" + r"""Added value at \['simple_dict'\]\['__dict_key_3__'\]: \[1, 2, 3\]\.\n""" + r"""Added value at \['simple_dict'\]\['__dict_key_4__'\]: \[1, 2, 3\]\.\n""" + r"""Added value at \['simple_list_test'\]: \[\["dict_key_1", \[1, 4, 3\]\], """ + r"""\["__dict_key_2__", \[1, 2, 3\]\]\]\.\n""" + r"""Changed value at \['int_value'\]: 1 -> 2\.\n""" + r"""Changed value at \['nested_dict'\]\['dict_key'\]\[1\]: 2 -> 4\.\n""" + r"""Changed value at \['nested_dict'\]\['sub_nested_dict'\]\['nested_list_key'\]\[0\]: 1 -> 2\.\n""" + r"""Changed value at \['nested_dict'\]\['sub_nested_dict'\]\['nested_list_key'\]\[1\]: 2.5 -> 2.50001\.\n""" + r"""Changed value at \['nested_dict'\]\['sub_nested_dict'\]\['nested_list_key'\]""" + r"""\[2\]: "str_val" -> "__str_val__"\.\n""" + r"""Changed value at \['nested_list'\]\[0\]: 1 -> 2\.\n""" + r"""Changed value at \['nested_list'\]\[1\]: 2.5 -> 2.50001\.\n""" + r"""Changed value at \['nested_list'\]\[2\]: "str_val" -> "__str_val__"\.\n""" + r"""Changed value at \['nested_list'\]\[3\]\[0\]: "nested_list_val" -> "__nested_list_val__"\.\n""" + r"""Changed value at \['nested_list'\]\[3\]\[1\]\['nested_dict_key_1'\]: """ + r""""nested_dict_val_1" -> "__nested_dict_val_1__"\.\n""" + r"""Changed value at \['nested_list'\]\[3\]\[1\]\['nested_list_key'\]\[0\]: 1 -> 2\.\n""" + r"""Changed value at \['nested_list'\]\[3\]\[1\]\['nested_list_key'\]\[1\]: 2.5 -> 2.50001\.\n""" + r"""Changed value at \['nested_list'\]\[3\]\[1\]\['nested_list_key'\]\[2\]: "str_val" -> "__str_val__"\.\n""" + r"""Changed value at \['simple_dict'\]\['dict_key_1'\]\[1\]: 2 -> 4\.\n""" + r"""Changed value at \['simple_list'\]\[0\]: 1 -> 2\.\n""" + r"""Changed value at \['simple_list'\]\[1\]: 2.5 -> 2.50001\.\n""" + r"""Changed value at \['simple_list'\]\[2\]: "str_val" -> "__str_val__"\.\n""" + r"""Removed value at \['nested_dict'\]\['sub_nested_dict'\]\['nested_dict_key'\]: "nested_dict_val"\.\n""" + r"""Removed value at \['nested_dict_test'\]: 0\.\n""" + r"""Removed value at \['nested_list'\]\[3\]\[1\]\['nested_dict_key_2'\]: "nested_dict_val_2"\.\n""" + r"""Removed value at \['simple_dict'\]\['dict_key_2'\]: \[1, 2, 3\]\.""" ) return diff @@ -148,10 +138,7 @@ def base_diff(): @pytest.fixture def xml_diff(dict_diff): """The diff that should be reported for the XML files.""" - diff = dict_diff.replace("'\\[", "'\\[root\\]\\[").replace( - " '' key", " '\\[root\\]' key" - ) - return diff + return dict_diff.replace(r"value at \[", r"value at \['root'\]\[") @pytest.fixture @@ -159,12 +146,12 @@ def ini_diff(): """The diff that should be reported for the INI files.""" diff = ( r"The files '\S*/file\.ini' and '\S*/file\.ini' are different:\n" - r"Changed the value of '\[section1\]\[attr1\]' from 'val1' to 'val2'\.\n" - r"Changed the value of '\[section1\]\[attr2\]' from 1 to 2.\n" - r"Changed the value of '\[section2\]\[attr3\]\[1\]' from 2 to 3.\n" - r"Changed the value of '\[section2\]\[attr3\]\[3\]' from 'b' to 'c'.\n" - r"Changed the value of '\[section2\]\[attr4\]\[a\]' from 1 to 4.\n" - r"Changed the value of '\[section2\]\[attr4\]\[b\]\[1\]' from 2 to 3." + r"""Changed value at \['section1'\]\['attr1'\]: "val1" -> "val2"\.\n""" + r"""Changed value at \['section1'\]\['attr2'\]: 1 -> 2\.\n""" + r"""Changed value at \['section2'\]\['attr3'\]\[1\]: 2 -> 3\.\n""" + r"""Changed value at \['section2'\]\['attr3'\]\[3\]: "b" -> "c"\.\n""" + r"""Changed value at \['section2'\]\['attr4'\]\['a'\]: 1 -> 4\.\n""" + r"""Changed value at \['section2'\]\['attr4'\]\['b'\]\[1\]: 2 -> 3\.""" ) return diff diff --git a/tests/test_base.py b/tests/test_base.py index 1d5a571..bdd3adf 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -17,10 +17,10 @@ import copy import json import logging +import math import re import shutil -import dictdiffer import pytest import dir_content_diff @@ -230,8 +230,8 @@ def format_diff(self, difference, mark_formatted=False): assert kwargs_msg in no_format_diff assert diff == no_format_diff.replace(kwargs_msg, "") assert len(re.findall("### FORMATTED", diff)) == 0 - assert len(re.findall("### FORMATTED", formatted_diff)) == 25 - assert len(re.findall("### FORMATTED", formatted_diff_default)) == 25 + assert len(re.findall("### FORMATTED", formatted_diff)) == 28 + assert len(re.findall("### FORMATTED", formatted_diff_default)) == 28 assert kwargs_msg in diff_default assert diff_default.replace(kwargs_msg, "") == diff @@ -367,6 +367,37 @@ def concatenate(self, differences, eol=None): assert concat_eol_diff == concat_eol_diff_default assert diff == concat_diff_default.replace(kwargs_msg_n, "\n") + def test_base_comparator_does_not_flatten_formatted_differences(self, tmp_path): + """Test base comparator concatenation keeps embedded newlines untouched.""" + + class ComparatorWithMultilineDiff( + dir_content_diff.base_comparators.BaseComparator + ): + """Compare data with multiline formatted diff entries.""" + + def diff(self, ref, comp, *args, **kwargs): + return ["first\nsecond", "third"] + + def concatenate(self, differences, eol=None): + if not eol: + eol = "\n" + return eol.join(differences) + + ref_file = tmp_path / "ref.txt" + res_file = tmp_path / "res.txt" + ref_file.write_text("ref", encoding="utf-8") + res_file.write_text("res", encoding="utf-8") + + diff = dir_content_diff.compare_files( + ref_file, + res_file, + ComparatorWithMultilineDiff(), + concat_kwargs={"eol": "#EOL#"}, + ) + + assert "first\nsecond#EOL#third" in diff + assert "first#EOL#second#EOL#third" not in diff + def test_report_kwargs(self, ref_tree, res_tree_diff): """Test the report_kwargs method.""" @@ -547,6 +578,239 @@ def test_format_data(self): for i in patterns[("string", "NEW VALUE")] ] + def test_numeric_tolerance(self): + """Test numeric tolerance semantics.""" + comparator = dir_content_diff.base_comparators.JsonComparator() + + assert ( + comparator.diff({"value": 100.0}, {"value": 111.0}, tolerance=0.1) == {} + ) + assert comparator.diff( + {"value": 100.0}, + {"value": 112.0}, + tolerance=0.1, + ) == { + "values_changed": { + "root['value']": {"new_value": 112.0, "old_value": 100.0} + } + } + + assert ( + comparator.diff( + {"value": 0.0}, + {"value": 0.05}, + absolute_tolerance=0.1, + ) + == {} + ) + assert comparator.diff( + {"value": 0.0}, + {"value": 0.11}, + absolute_tolerance=0.1, + ) == { + "values_changed": { + "root['value']": {"new_value": 0.11, "old_value": 0.0} + } + } + + assert ( + comparator.diff( + {"value": 100.0}, + {"value": 111.0}, + tolerance=0.1, + absolute_tolerance=0.01, + ) + == {} + ) + assert comparator.diff({"value": 1}, {"value": 1.05}, tolerance=0.1) == {} + assert comparator.diff({"value": 1}, {"value": 1.2}, tolerance=0.1) == { + "type_changes": { + "root['value']": { + "new_type": float, + "new_value": 1.2, + "old_type": int, + "old_value": 1, + } + } + } + assert comparator.diff({"value": True}, {"value": 1}) == { + "type_changes": { + "root['value']": { + "new_type": int, + "new_value": 1, + "old_type": bool, + "old_value": True, + } + } + } + assert comparator.diff( + {"value": 1.0}, + {"value": 1.0000000000001}, + tolerance=0, + ) == { + "values_changed": { + "root['value']": { + "new_value": 1.0000000000001, + "old_value": 1.0, + } + } + } + + def test_diff_rejects_positional_args(self): + """Test dictionary comparisons require keyword arguments.""" + comparator = dir_content_diff.base_comparators.JsonComparator() + + with pytest.raises(TypeError, match="does not accept positional"): + comparator.diff({"value": 1}, {"value": 2}, None) + + def test_positional_diff_args_are_reported(self, ref_tree, res_tree_equal): + """Test positional config args fail with a clear report.""" + res = compare_trees( + ref_tree, + res_tree_equal, + specific_args={"file.json": {"args": [None]}}, + ) + + report = res["file.json"] + assert "Exception raised: (TypeError)" in report + assert "does not accept positional comparison arguments" in report + assert "takes 3 positional arguments" not in report + + def test_keyword_tolerance_is_applied(self): + """Test tolerance remains a first-class keyword argument.""" + comparator = dir_content_diff.base_comparators.JsonComparator() + + assert ( + comparator.diff( + {"value": 100.0}, + {"value": 105.0}, + tolerance=0.1, + ) + == {} + ) + assert comparator.diff( + {"value": 100.0}, + {"value": 105.0}, + tolerance=0, + ) == { + "values_changed": { + "root['value']": {"new_value": 105.0, "old_value": 100.0} + } + } + + def test_deepdiff_exclude_paths_kwarg(self): + """Test useful DeepDiff kwargs are passed through.""" + comparator = dir_content_diff.base_comparators.JsonComparator() + + assert comparator.diff( + {"value": 1, "generated": "old"}, + {"value": 2, "generated": "new"}, + exclude_paths={"root['generated']"}, + ) == {"values_changed": {"root['value']": {"new_value": 2, "old_value": 1}}} + + def test_deepdiff_ignore_order_kwarg(self): + """Test DeepDiff list-order controls can be used.""" + comparator = dir_content_diff.base_comparators.JsonComparator() + + assert ( + comparator.diff( + {"values": [1, 2, 3]}, + {"values": [3, 2, 1]}, + ignore_order=True, + ) + == {} + ) + + def test_numeric_equality_and_nan_handling(self): + """Test numeric equality and NaN handling.""" + comparator = dir_content_diff.base_comparators.JsonComparator() + nan = float("nan") + + assert comparator.diff({"value": 1}, {"value": 1.0}) == {} + assert comparator.diff({"value": nan}, {"value": nan}) == {} + + diff = comparator.diff({"value": nan}, {"value": 1.0}) + value_diff = diff["values_changed"]["root['value']"] + assert math.isnan(value_diff["old_value"]) + assert value_diff["new_value"] == 1.0 + + @pytest.mark.parametrize( + "difference, expected", + [ + ( + ( + "values_changed", + {"root['value']": {"new_value": "new", "old_value": "old"}}, + ), + """Changed value at ['value']: "old" -> "new".""", + ), + ( + ( + "type_changes", + { + "root['value']": { + "new_type": int, + "new_value": 1, + "old_type": bool, + "old_value": True, + } + }, + ), + "Changed value at ['value']: true -> 1.", + ), + ( + ("dictionary_item_added", {"root['value']": {"enabled": True}}), + """Added value at ['value']: {"enabled": true}.""", + ), + ( + ("iterable_item_added", {"root['values'][1]": "new"}), + """Added value at ['values'][1]: "new".""", + ), + ( + ("dictionary_item_removed", {"root['value']": {"enabled": False}}), + """Removed value at ['value']: {"enabled": false}.""", + ), + ( + ("iterable_item_removed", {"root['values'][1]": "old"}), + """Removed value at ['values'][1]: "old".""", + ), + ( + ("values_changed", {"root['value']": {"new_value": "new"}}), + """['value']: {"new_value": "new"}""", + ), + ( + ("repetition_change", {"root['values'][1]": {"old_repeat": 1}}), + """repetition_change: {"root['values'][1]": {"old_repeat": 1}}""", + ), + ], + ) + def test_format_deepdiff_categories(self, difference, expected): + """Test human-readable reports for DeepDiff categories.""" + comparator = dir_content_diff.base_comparators.JsonComparator() + + assert comparator.format_diff(difference) == expected + + def test_sort_uses_base_comparator_behavior(self): + """Test dictionary comparator sorting uses the base comparator behavior.""" + comparator = dir_content_diff.base_comparators.JsonComparator() + + assert ( + comparator.sort.__func__ + is dir_content_diff.base_comparators.BaseComparator.sort + ) + assert comparator.sort(["b", "a"]) == ["a", "b"] + + def test_tuple_values_use_native_iterable_report(self): + """Test tuple values use the native iterable report.""" + comparator = dir_content_diff.base_comparators.JsonComparator() + + assert comparator.diff({"k": (1,)}, {"k": (1, 2)}) == { + "iterable_item_added": {"root['k'][1]": 2} + } + assert comparator.diff({"k": (1, 2)}, {"k": (1, 3)}) == { + "values_changed": {"root['k'][1]": {"new_value": 3, "old_value": 2}} + } + class TestXmlComparator: """Test the XML comparator.""" @@ -976,7 +1240,6 @@ def test_nested_files(self, ref_with_nested_file, res_equal_with_nested_file): def test_specific_args(self, ref_tree, res_tree_equal): """Test specific args.""" specific_args = { - "file.yaml": {"args": [None, None, None, False, 0, False]}, "file.json": {"tolerance": 0}, } res = compare_trees(ref_tree, res_tree_equal, specific_args=specific_args) @@ -986,7 +1249,6 @@ def test_specific_args(self, ref_tree, res_tree_equal): def test_replace_pattern(self, ref_tree, res_tree_equal): """Test specific args.""" specific_args = { - "file.yaml": {"args": [None, None, None, False, 0, False]}, "file.json": { "format_data_kwargs": { "replace_pattern": {(".*val.*", "NEW_VAL"): ["*.[*]"]}, @@ -1004,8 +1266,8 @@ def test_replace_pattern(self, ref_tree, res_tree_equal): r"""The files '\S*/ref/file\.json' and '\S*/res/file\.json' are different:\n""" r"""Kwargs used for formatting data: """ r"""{'replace_pattern': {\('\.\*val\.\*', 'NEW_VAL'\): \['\*\.\[\*\]'\]}}\n""" - r"""Changed the value of '\[nested_list\]\[2\]' from 'str_val' to 'NEW_VAL'\.\n""" - r"""Changed the value of '\[simple_list\]\[2\]' from 'str_val' to 'NEW_VAL'\.""" + r"""Changed value at \['nested_list'\]\[2\]: "str_val" -> "NEW_VAL"\.\n""" + r"""Changed value at \['simple_list'\]\[2\]: "str_val" -> "NEW_VAL"\.""" ) assert re.match(pat, res["file.json"]) is not None @@ -1013,7 +1275,6 @@ def test_replace_pattern(self, ref_tree, res_tree_equal): def test_specific_comparator(self, ref_tree, res_tree_equal): """Test specific args.""" specific_args = { - "file.yaml": {"args": [None, None, None, False, 0, False]}, "file.json": {"comparator": dir_content_diff.DefaultComparator()}, } res = compare_trees(ref_tree, res_tree_equal, specific_args=specific_args) @@ -1024,7 +1285,6 @@ def test_specific_patterns(self, ref_tree, res_tree_equal, base_diff): """Test specific args.""" specific_args = { "all yaml files": { - "args": [None, None, None, False, 0, False], "patterns": [r".*\.yaml"], }, "all json files": { @@ -1240,7 +1500,6 @@ def test_specific_patterns(self, ref_tree, res_tree_diff, base_diff, dict_diff): """Test specific args.""" specific_args = { "all yaml files": { - "args": [None, None, None, False, 0, False], "patterns": [r".*\.yaml"], }, "all json files": { @@ -1304,36 +1563,6 @@ def test_nested_files(self, ref_with_nested_file, res_diff_with_nested_file): ) assert match is not None - def test_fix_dot_notation( - self, ref_tree, res_tree_diff, pdf_diff, dict_diff, xml_diff, ini_diff - ): - """Test that the dot notation is properly fixed.""" - specific_args = {"file.yaml": {"args": [None, None, None, False, 0, True]}} - res = compare_trees(ref_tree, res_tree_diff, specific_args=specific_args) - - assert len(res) == 5 - match_res_0 = re.match(pdf_diff, res["file.pdf"]) - match_res_1 = re.match( - dict_diff.replace( - r"are different:\n", - r"are different:\nArgs used for computing differences: " - r"\[None, None, None, False, 0, True\]\n", - ), - res["file.yaml"], - ) - match_res_2 = re.match(dict_diff, res["file.json"]) - match_res_3 = re.match(xml_diff, res["file.xml"]) - match_res_4 = re.match(ini_diff, res["file.ini"]) - - for match_i in [ - match_res_0, - match_res_1, - match_res_2, - match_res_3, - match_res_4, - ]: - assert match_i is not None - def test_format_inside_diff(self, ref_tree, res_tree_diff, dict_diff): """Test formatting the result inside the diff method.""" @@ -1346,13 +1575,12 @@ def load(self, path, *args, **kwargs): return data def diff(self, ref, comp, *args, **kwargs): - diffs = list( - dictdiffer.diff(ref, comp, *args, dot_notation=False, **kwargs) - ) - # Format here instead of overriding the default format method comparator = dir_content_diff.base_comparators.JsonComparator() - formatted = [comparator.format_diff(i) for i in diffs] + diffs = comparator.diff(ref, comp, **kwargs) + formatted = [ + comparator.format_diff(i) for i in comparator.filter(diffs) + ] return formatted @@ -1368,16 +1596,18 @@ def diff(self, ref, comp, *args, **kwargs): class TestProgrammaticUse: """Test specific comparators that could be use programmatically.""" + def test_equal_tree_raw_diffs(self, ref_tree, res_tree_equal): + """Test equal raw diff reports are not returned as differences.""" + assert compare_trees(ref_tree, res_tree_equal, return_raw_diffs=True) == {} + def test_diff_tree(self, ref_tree, res_tree_diff, pdf_diff, dict_diff): """Test with different trees.""" res = compare_trees(ref_tree, res_tree_diff, return_raw_diffs=True) res_json = res["file.json"] - - assert len(res_json) == 25 - assert len(list(filter(lambda x: x[0] == "change", res_json))) == 17 - assert len(list(filter(lambda x: x[0] == "add", res_json))) == 4 - assert len(list(filter(lambda x: x[0] == "remove", res_json))) == 4 + assert len(res_json["values_changed"]) == 17 + assert len(res_json["dictionary_item_added"]) == 7 + assert len(res_json["dictionary_item_removed"]) == 4 class TestBaseFunctions: diff --git a/tests/test_parallel_execution.py b/tests/test_parallel_execution.py index 0c0c63c..3d2b830 100644 --- a/tests/test_parallel_execution.py +++ b/tests/test_parallel_execution.py @@ -138,7 +138,6 @@ def test_parallel_assert_equal_trees(self, ref_tree, res_tree_equal): def test_parallel_with_specific_args(self, ref_tree, res_tree_equal): """Test parallel execution with specific args.""" specific_args = { - "file.yaml": {"args": [None, None, None, False, 0, False]}, "file.json": {"tolerance": 0}, }