Skip to content

Commit 4eb599f

Browse files
committed
Adding "include_paths", affeceed_root_keys, get_root_key, better
reporting of encoding errors
1 parent 41907f7 commit 4eb599f

13 files changed

+139
-17
lines changed

deepdiff/deephash.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,11 @@ def prepare_string_for_hashing(
8888
err = er
8989
if not encoded:
9090
obj_decoded = obj.decode('utf-8', errors='ignore')
91-
start = min(err.start - 10, 0)
91+
start = max(err.start - 20, 0)
9292
start_prefix = ''
9393
if start > 0:
9494
start_prefix = '...'
95-
end = err.end + 10
95+
end = err.end + 20
9696
end_suffix = '...'
9797
if end >= len(obj):
9898
end = len(obj)
@@ -329,8 +329,13 @@ def _skip_this(self, obj, parent):
329329
skip = False
330330
if self.exclude_paths and parent in self.exclude_paths:
331331
skip = True
332-
if self.include_paths and parent not in self.include_paths:
333-
skip = True
332+
if self.include_paths and parent != 'root':
333+
if parent not in self.include_paths:
334+
skip = True
335+
for prefix in self.include_paths:
336+
if parent.startswith(prefix):
337+
skip = False
338+
break
334339
elif self.exclude_regex_paths and any(
335340
[exclude_regex_path.search(parent) for exclude_regex_path in self.exclude_regex_paths]):
336341
skip = True

deepdiff/diff.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -438,8 +438,13 @@ def _skip_this(self, level):
438438
skip = False
439439
if self.exclude_paths and level_path in self.exclude_paths:
440440
skip = True
441-
if self.include_paths and level_path not in self.include_paths:
442-
skip = True
441+
if self.include_paths and level_path != 'root':
442+
if level_path not in self.include_paths:
443+
skip = True
444+
for prefix in self.include_paths:
445+
if level_path.startswith(prefix):
446+
skip = False
447+
break
443448
elif self.exclude_regex_paths and any(
444449
[exclude_regex_path.search(level_path) for exclude_regex_path in self.exclude_regex_paths]):
445450
skip = True
@@ -1445,6 +1450,22 @@ def affected_paths(self):
14451450
result |= OrderedSet(value.keys())
14461451
return result
14471452

1453+
@property
1454+
def affected_root_keys(self):
1455+
"""
1456+
Get the list of root keys that were affected.
1457+
Whether a value was changed or they were added or removed.
1458+
"""
1459+
result = OrderedSet()
1460+
for key in REPORT_KEYS:
1461+
value = self.tree.get(key)
1462+
if value:
1463+
if isinstance(value, PrettyOrderedSet):
1464+
result |= OrderedSet([i.get_root_key() for i in value])
1465+
else:
1466+
result |= OrderedSet([i.get_root_key() for i in value.keys()])
1467+
return result
1468+
14481469

14491470
if __name__ == "__main__": # pragma: no cover
14501471
import doctest

deepdiff/helper.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import uuid
66
import logging
77
import warnings
8+
import string
89
import time
910
from ast import literal_eval
1011
from decimal import Decimal, localcontext
@@ -93,6 +94,8 @@ class np_type:
9394
py4 = py_major_version == 4
9495

9596

97+
NUMERICS = frozenset(string.digits)
98+
9699
# we used to use OrderedDictPlus when dictionaries in Python were not ordered.
97100
dict_ = dict
98101

@@ -254,9 +257,14 @@ def add_root_to_paths(paths):
254257
if path.startswith('root'):
255258
result.add(path)
256259
else:
257-
result.add(f"root.{path}")
258-
result.add(f"root[{path}]")
259-
result.add(f"root['{path}']")
260+
if path.isdigit():
261+
result.add(f"root['{path}']")
262+
result.add(f"root[{path}]")
263+
elif path[0].isdigit():
264+
result.add(f"root['{path}']")
265+
else:
266+
result.add(f"root.{path}")
267+
result.add(f"root['{path}']")
260268
return result
261269

262270

deepdiff/model.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,21 @@ def all_down(self):
621621
def _format_result(root, result):
622622
return None if result is None else "{}{}".format(root, result)
623623

624+
def get_root_key(self, use_t2=False):
625+
"""
626+
Get the path's root key value for this change
627+
628+
For example if the path to the element that is reported to have a change in value is root['X'][0]
629+
then get_root_key should return 'X'
630+
"""
631+
root_level = self.all_up
632+
if(use_t2):
633+
next_rel = root_level.t2_child_rel
634+
else:
635+
next_rel = root_level.t1_child_rel or root_level.t2_child_rel # next relationship object to get a formatted param from
636+
637+
return next_rel.param
638+
624639
def path(self, root="root", force=None, get_parent_too=False, use_t2=False, output_format='str'):
625640
"""
626641
A python syntax string describing how to descend to this level, assuming the top level object is called root.

deepdiff/serialization.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import io
44
import os
55
import json
6+
import uuid
67
import logging
78
import re # NOQA
89
import builtins # NOQA
@@ -70,6 +71,7 @@ class UnsupportedFormatErr(TypeError):
7071
'datetime.time',
7172
'datetime.timedelta',
7273
'decimal.Decimal',
74+
'uuid.UUID',
7375
'ordered_set.OrderedSet',
7476
'collections.namedtuple',
7577
'collections.OrderedDict',
@@ -502,6 +504,7 @@ def _save_content(content, path, file_type, keep_backup=True):
502504
type: lambda x: x.__name__,
503505
bytes: lambda x: x.decode('utf-8'),
504506
datetime.datetime: lambda x: x.isoformat(),
507+
uuid.UUID: lambda x: str(x),
505508
}
506509

507510

docs/exclude_paths.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,21 @@ Example
4444
{}
4545

4646

47+
When passing include_paths, all the children of that path will be included too.
48+
49+
Example
50+
>>> t1 = {
51+
... "foo": {"bar": "potato"},
52+
... "ingredients": ["no meat", "no eggs", "no dairy"]
53+
... }
54+
>>> t2 = {
55+
... "foo": {"bar": "banana"},
56+
... "ingredients": ["bread", "cheese"]
57+
... }
58+
>>> DeepDiff(t1, t2, include_paths="foo")
59+
{'values_changed': {"root['foo']['bar']": {'new_value': 'banana', 'old_value': 'potato'}}}
60+
61+
4762
.. _exclude_regex_paths_label:
4863

4964
Exclude Regex Paths

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def get_reqs(filename):
2626
long_description = file.read()
2727

2828

29-
setup(name='deepdiff6',
29+
setup(name='deepdiff',
3030
version=version,
3131
description='Deep Difference and Search of any Python object/data.',
3232
url='https://github.com/seperman/deepdiff',

tests/test_cache.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def test_cache_deeply_nested_a1(self, nested_a_t1, nested_a_t2, nested_a_result,
2626
diff_of_diff = DeepDiff(nested_a_result, diff.to_dict(), ignore_order=False)
2727
assert not diff_of_diff
2828
assert nested_a_affected_paths == diff.affected_paths
29+
assert [0, 1] == diff.affected_root_keys
2930

3031
@pytest.mark.slow
3132
def test_cache_deeply_nested_a2(self, nested_a_t1, nested_a_t2, nested_a_result):

tests/test_diff_text.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1335,9 +1335,28 @@ def test_include_path3_with_just_key_names(self):
13351335
"ingredients": ["no meat", "no eggs", "no dairy"]
13361336
}
13371337
t2 = {"for life": "vegan"}
1338-
ddiff = DeepDiff(t2, t1, include_paths={"for_life"})
1338+
ddiff = DeepDiff(t1, t2, include_paths={"for_life"})
13391339
assert {} == ddiff
13401340

1341+
def test_include_path4_nested(self):
1342+
t1 = {
1343+
"foo": {"bar": "potato"},
1344+
"ingredients": ["no meat", "no eggs", "no dairy"]
1345+
}
1346+
t2 = {
1347+
"foo": {"bar": "banana"},
1348+
"ingredients": ["bread", "cheese"]
1349+
}
1350+
ddiff = DeepDiff(t1, t2, include_paths="foo")
1351+
assert {
1352+
'values_changed': {
1353+
"root['foo']['bar']": {
1354+
'new_value': 'banana',
1355+
'old_value': 'potato'
1356+
}
1357+
}
1358+
} == ddiff
1359+
13411360
def test_skip_path4(self):
13421361
t1 = {
13431362
"for life": "vegan",
@@ -1423,6 +1442,7 @@ def exclude_obj_callback_strict(obj, path):
14231442
result = {'values_changed': {"root['x']": {'new_value': 12, 'old_value': 10}}}
14241443
assert result == ddiff
14251444
assert {"root['x']"} == ddiff.affected_paths
1445+
assert {"x"} == ddiff.affected_root_keys
14261446

14271447
def test_skip_str_type_in_dictionary(self):
14281448
t1 = {1: {2: "a"}}

tests/test_hash.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -568,16 +568,18 @@ def test_skip_str_type_in_dict_on_list(self):
568568
assert 1 in t1_hash
569569
assert t1_hash[dic1] == t2_hash[dic2]
570570

571-
def test_skip_path(self):
571+
def test_skip_path_in_hash(self):
572572
dic1 = {1: "a"}
573573
t1 = [dic1, 2]
574574
dic2 = {}
575575
t2 = [dic2, 2]
576576
t1_hash = DeepHashPrep(t1, exclude_paths=['root[0]'])
577577
t2_hash = DeepHashPrep(t2, exclude_paths='root[0]')
578+
t2_hash_again = DeepHashPrep(t2, include_paths='1')
578579
assert 1 not in t1_hash
579580
assert 2 in t1_hash
580581
assert t1_hash[2] == t2_hash[2]
582+
assert t1_hash[2] == t2_hash_again[2]
581583

582584
def test_skip_path2(self):
583585

@@ -596,6 +598,23 @@ def test_skip_path2(self):
596598
t2_hash = DeepHashPrep(t2, exclude_paths=exclude_paths)
597599
assert t1_hash[t1] == t2_hash[t2]
598600

601+
def test_hash_include_path_nested(self):
602+
603+
obj10 = {'a': 1, 'b': 'f', 'e': "1111", 'foo': {'bar': 'baz'}}
604+
obj11 = {'c': 1, 'd': 'f', 'e': 'Cool'}
605+
606+
obj20 = {'a': 1, 'b': 'f', 'e': 'Cool', 'foo': {'bar': 'baz'}}
607+
obj21 = {'c': 1, 'd': 'f', 'e': "2222"}
608+
609+
t1 = [obj10, obj11]
610+
t2 = [obj20, obj21]
611+
612+
include_paths = ["root[0]['foo']['bar']"]
613+
614+
t1_hash = DeepHashPrep(t1, include_paths=include_paths)
615+
t2_hash = DeepHashPrep(t2, include_paths=include_paths)
616+
assert t1_hash[t1] == t2_hash[t2]
617+
599618
def test_skip_regex_path(self):
600619
dic1 = {1: "a"}
601620
t1 = [dic1, 2]
@@ -805,18 +824,24 @@ def test_combine_hashes_lists(self, items, prefix, expected):
805824
"Please either pass ignore_encoding_errors=True or pass the encoding via encodings=['utf-8', '...'].")
806825

807826
EXPECTED_MESSAGE2 = (
808-
"'utf-8' codec can't decode byte 0xbc in position 0: invalid start byte in 'p of flo...'. "
827+
"'utf-8' codec can't decode byte 0xbc in position 0: invalid start byte in ' cup of flour'. "
809828
"Please either pass ignore_encoding_errors=True or pass the encoding via encodings=['utf-8', '...'].")
810829

830+
EXPECTED_MESSAGE3 = (
831+
"'utf-8' codec can't decode byte 0xc3 in position 34: invalid continuation byte in '...up of potatos. Then ( cup of flour'. Please either pass ignore_encoding_errors=True or "
832+
"pass the encoding via encodings=['utf-8', '...']."
833+
)
834+
811835
@pytest.mark.parametrize('test_num, item, encodings, ignore_encoding_errors, expected_result, expected_message', [
812836
(1, b'\xc3\x28', None, False, UnicodeDecodeError, EXPECTED_MESSAGE1),
813837
(2, b'\xc3\x28', ['utf-8'], False, UnicodeDecodeError, EXPECTED_MESSAGE1),
814838
(3, b'\xc3\x28', ['utf-8'], True, {b'\xc3(': '640da73f0d9b268a0a7ae884d77063d1193f43a651352f9032d99a8fe1705546'}, None),
815839
(4, b"\xbc cup of flour", ['utf-8'], False, UnicodeDecodeError, EXPECTED_MESSAGE2),
816840
(5, b"\xbc cup of flour", ['utf-8'], True, {b'\xbc cup of flour': '86ac12eb5e35db88cf93baca1d62098023b2d93d634e75fb4e37657e514f3d51'}, None),
817841
(6, b"\xbc cup of flour", ['utf-8', 'latin-1'], False, {b'\xbc cup of flour': 'cfc354ae2232a8983bf59b2004f44fcb4036f57df1d08b9cde9950adea3f8d3e'}, None),
842+
(7, b"First have a cup of potatos. Then \xc3\x28 cup of flour", None, False, UnicodeDecodeError, EXPECTED_MESSAGE3),
818843
])
819-
def test_encodings(self, test_num, item, encodings, ignore_encoding_errors, expected_result, expected_message):
844+
def test_hash_encodings(self, test_num, item, encodings, ignore_encoding_errors, expected_result, expected_message):
820845
if UnicodeDecodeError == expected_result:
821846
with pytest.raises(expected_result) as exc_info:
822847
DeepHash(item, encodings=encodings, ignore_encoding_errors=ignore_encoding_errors)

tests/test_helper.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
cartesian_product_of_shape, literal_eval_extended,
1010
not_found, OrderedSetPlus, diff_numpy_array, cartesian_product_numpy,
1111
get_truncate_datetime, datetime_normalize,
12-
detailed__dict__, ENUM_IGNORE_KEYS,
12+
detailed__dict__, ENUM_IGNORE_KEYS, add_root_to_paths,
1313
)
1414

1515

@@ -288,3 +288,12 @@ def test_datetime_normalize(self, truncate_datetime, obj, expected):
288288
def test_detailed__dict__(self, obj, ignore_keys, expected):
289289
result = detailed__dict__(obj, ignore_private_variables=True, ignore_keys=ignore_keys)
290290
assert expected == result, f"test_detailed__dict__ failed for {obj}"
291+
292+
@pytest.mark.parametrize('test_num, value, expected', [
293+
(1, ['ab'], {'root.ab', "root['ab']"}),
294+
(2, ['11'], {"root['11']", 'root[11]'}),
295+
(3, ['1a'], {"root['1a']"}),
296+
])
297+
def test_add_root_to_paths(self, test_num, value, expected):
298+
result = add_root_to_paths(value)
299+
assert expected == result, f"test_add_root_to_paths #{test_num} failed."

tests/test_ignore_order.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1130,7 +1130,7 @@ class TestDecodingErrorIgnoreOrder:
11301130
"Please either pass ignore_encoding_errors=True or pass the encoding via encodings=['utf-8', '...'].")
11311131

11321132
EXPECTED_MESSAGE2 = (
1133-
"'utf-8' codec can't decode byte 0xbc in position 0: Can not produce a hash for root: invalid start byte in 'p of flo...'. "
1133+
"'utf-8' codec can't decode byte 0xbc in position 0: Can not produce a hash for root: invalid start byte in ' cup of flour'. "
11341134
"Please either pass ignore_encoding_errors=True or pass the encoding via encodings=['utf-8', '...'].")
11351135

11361136
@pytest.mark.parametrize('test_num, item, encodings, ignore_encoding_errors, expected_result, expected_message', [

tests/test_serialization.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ def test_pretty_form_method(self, expected, verbose_level):
317317

318318
@pytest.mark.parametrize('test_num, value', [
319319
(1, {'10': None}),
320-
(2, {"type_changes": {"root": {"old_type": None, "new_type": list, "new_value": ["你好", 2, 3, 5]}}})
320+
(2, {"type_changes": {"root": {"old_type": None, "new_type": list, "new_value": ["你好", 2, 3, 5]}}}),
321321
])
322322
def test_json_dumps_and_loads(self, test_num, value):
323323
serialized = json_dumps(value)

0 commit comments

Comments
 (0)