Skip to content

Commit 2178878

Browse files
committed
subtract delta fixed when iterable_compare_func is used. Better handling
of force adding a delta to an object. We change between an empty list. and an empty dictionary when needed. We find the closest list item when removing items from iterable and force=True.
1 parent 39c3a3d commit 2178878

File tree

6 files changed

+283
-67
lines changed

6 files changed

+283
-67
lines changed

deepdiff/delta.py

Lines changed: 113 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ def __init__(
7171
diff=None,
7272
delta_path=None,
7373
delta_file=None,
74+
delta_diff=None,
7475
flat_dict_list=None,
7576
deserializer=pickle_load,
7677
log_errors=True,
@@ -81,6 +82,7 @@ def __init__(
8182
verify_symmetry=None,
8283
bidirectional=False,
8384
always_include_values=False,
85+
iterable_compare_func_was_used=None,
8486
force=False,
8587
):
8688
if hasattr(deserializer, '__code__') and 'safe_to_import' in set(deserializer.__code__.co_varnames):
@@ -114,6 +116,8 @@ def _deserializer(obj, safe_to_import=None):
114116
with open(delta_path, 'rb') as the_file:
115117
content = the_file.read()
116118
self.diff = _deserializer(content, safe_to_import=safe_to_import)
119+
elif delta_diff:
120+
self.diff = delta_diff
117121
elif delta_file:
118122
try:
119123
content = delta_file.read()
@@ -128,7 +132,10 @@ def _deserializer(obj, safe_to_import=None):
128132
self.mutate = mutate
129133
self.raise_errors = raise_errors
130134
self.log_errors = log_errors
131-
self._numpy_paths = self.diff.pop('_numpy_paths', False)
135+
self._numpy_paths = self.diff.get('_numpy_paths', False)
136+
# When we create the delta from a list of flat dictionaries, details such as iterable_compare_func_was_used get lost.
137+
# That's why we allow iterable_compare_func_was_used to be explicitly set.
138+
self._iterable_compare_func_was_used = self.diff.get('_iterable_compare_func_was_used', iterable_compare_func_was_used)
132139
self.serializer = serializer
133140
self.deserializer = deserializer
134141
self.force = force
@@ -198,7 +205,17 @@ def _do_verify_changes(self, path, expected_old_value, current_old_value):
198205
self._raise_or_log(VERIFICATION_MSG.format(
199206
path_str, expected_old_value, current_old_value, VERIFY_BIDIRECTIONAL_MSG))
200207

201-
def _get_elem_and_compare_to_old_value(self, obj, path_for_err_reporting, expected_old_value, elem=None, action=None, forced_old_value=None):
208+
def _get_elem_and_compare_to_old_value(
209+
self,
210+
obj,
211+
path_for_err_reporting,
212+
expected_old_value,
213+
elem=None,
214+
action=None,
215+
forced_old_value=None,
216+
next_element=None,
217+
):
218+
# if forced_old_value is not None:
202219
try:
203220
if action == GET:
204221
current_old_value = obj[elem]
@@ -208,9 +225,21 @@ def _get_elem_and_compare_to_old_value(self, obj, path_for_err_reporting, expect
208225
raise DeltaError(INVALID_ACTION_WHEN_CALLING_GET_ELEM.format(action))
209226
except (KeyError, IndexError, AttributeError, TypeError) as e:
210227
if self.force:
211-
_forced_old_value = {} if forced_old_value is None else forced_old_value
228+
if forced_old_value is None:
229+
if next_element is None or isinstance(next_element, str):
230+
_forced_old_value = {}
231+
else:
232+
_forced_old_value = []
233+
else:
234+
_forced_old_value = forced_old_value
212235
if action == GET:
213-
obj[elem] = _forced_old_value
236+
if isinstance(obj, list):
237+
if isinstance(elem, int) and elem < len(obj):
238+
obj[elem] = _forced_old_value
239+
else:
240+
obj.append(_forced_old_value)
241+
else:
242+
obj[elem] = _forced_old_value
214243
elif action == GETATTR:
215244
setattr(obj, elem, _forced_old_value)
216245
return _forced_old_value
@@ -277,6 +306,11 @@ def _set_new_value(self, parent, parent_to_obj_elem, parent_to_obj_action,
277306
parent, obj, path, parent_to_obj_elem,
278307
parent_to_obj_action, elements,
279308
to_type=list, from_type=tuple)
309+
if elem != 0 and self.force and isinstance(obj, list) and len(obj) == 0:
310+
# it must have been a dictionary
311+
obj = {}
312+
self._simple_set_elem_value(obj=parent, path_for_err_reporting=path, elem=parent_to_obj_elem,
313+
value=obj, action=parent_to_obj_action)
280314
self._simple_set_elem_value(obj=obj, path_for_err_reporting=path, elem=elem,
281315
value=new_value, action=action)
282316

@@ -356,6 +390,9 @@ def _do_item_added(self, items, sort=True, insert=False):
356390
else:
357391
items = items.items()
358392

393+
# if getattr(self, 'DEBUG', None):
394+
# import pytest; pytest.set_trace()
395+
359396
for path, new_value in items:
360397
elem_and_details = self._get_elements_and_details(path)
361398
if elem_and_details:
@@ -404,14 +441,21 @@ def _get_elements_and_details(self, path):
404441
try:
405442
elements = _path_to_elements(path)
406443
if len(elements) > 1:
407-
parent = self.get_nested_obj(obj=self, elements=elements[:-2])
444+
elements_subset = elements[:-2]
445+
if len(elements_subset) != len(elements):
446+
next_element = elements[-2][0]
447+
next2_element = elements[-1][0]
448+
else:
449+
next_element = None
450+
parent = self.get_nested_obj(obj=self, elements=elements_subset, next_element=next_element)
408451
parent_to_obj_elem, parent_to_obj_action = elements[-2]
409452
obj = self._get_elem_and_compare_to_old_value(
410453
obj=parent, path_for_err_reporting=path, expected_old_value=None,
411-
elem=parent_to_obj_elem, action=parent_to_obj_action)
454+
elem=parent_to_obj_elem, action=parent_to_obj_action, next_element=next2_element)
412455
else:
413456
parent = parent_to_obj_elem = parent_to_obj_action = None
414-
obj = self.get_nested_obj(obj=self, elements=elements[:-1])
457+
obj = self
458+
# obj = self.get_nested_obj(obj=self, elements=elements[:-1])
415459
elem, action = elements[-1]
416460
except Exception as e:
417461
self._raise_or_log(UNABLE_TO_GET_ITEM_MSG.format(path, e))
@@ -458,6 +502,57 @@ def _do_values_or_type_changed(self, changes, is_type_change=False, verify_chang
458502
self._do_verify_changes(path, expected_old_value, current_old_value)
459503

460504
def _do_item_removed(self, items):
505+
"""
506+
Handle removing items.
507+
"""
508+
# Sorting the iterable_item_removed in reverse order based on the paths.
509+
# So that we delete a bigger index before a smaller index
510+
# if hasattr(self, 'DEBUG'):
511+
# import pytest; pytest.set_trace()
512+
for path, expected_old_value in sorted(items.items(), key=self._sort_key_for_item_added, reverse=True):
513+
elem_and_details = self._get_elements_and_details(path)
514+
if elem_and_details:
515+
elements, parent, parent_to_obj_elem, parent_to_obj_action, obj, elem, action = elem_and_details
516+
else:
517+
continue # pragma: no cover. Due to cPython peephole optimizer, this line doesn't get covered. https://github.com/nedbat/coveragepy/issues/198
518+
519+
look_for_expected_old_value = False
520+
current_old_value = not_found
521+
try:
522+
if action == GET:
523+
current_old_value = obj[elem]
524+
look_for_expected_old_value = current_old_value != expected_old_value
525+
elif action == GETATTR:
526+
current_old_value = getattr(obj, elem)
527+
look_for_expected_old_value = current_old_value != expected_old_value
528+
except (KeyError, IndexError, AttributeError, TypeError):
529+
look_for_expected_old_value = True
530+
531+
if look_for_expected_old_value and isinstance(obj, list) and not self._iterable_compare_func_was_used:
532+
# It may return None if it doesn't find it
533+
elem = self._find_closest_iterable_element_for_index(obj, elem, expected_old_value)
534+
if elem is not None:
535+
current_old_value = expected_old_value
536+
if current_old_value is not_found or elem is None:
537+
continue
538+
539+
self._del_elem(parent, parent_to_obj_elem, parent_to_obj_action,
540+
obj, elements, path, elem, action)
541+
self._do_verify_changes(path, expected_old_value, current_old_value)
542+
543+
def _find_closest_iterable_element_for_index(self, obj, elem, expected_old_value):
544+
closest_elem = None
545+
closest_distance = float('inf')
546+
for index, value in enumerate(obj):
547+
dist = abs(index - elem)
548+
if dist > closest_distance:
549+
break
550+
if value == expected_old_value and dist < closest_distance:
551+
closest_elem = index
552+
closest_distance = dist
553+
return closest_elem
554+
555+
def _do_item_removedOLD(self, items):
461556
"""
462557
Handle removing items.
463558
"""
@@ -695,10 +790,9 @@ def _from_flat_dicts(flat_dict_list):
695790
Create the delta's diff object from the flat_dict_list
696791
"""
697792
result = {}
698-
699-
DEFLATTENING_NEW_ACTION_MAP = {
700-
'iterable_item_added': 'iterable_items_added_at_indexes',
701-
'iterable_item_removed': 'iterable_items_removed_at_indexes',
793+
FLATTENING_NEW_ACTION_MAP = {
794+
'unordered_iterable_item_added': 'iterable_items_added_at_indexes',
795+
'unordered_iterable_item_removed': 'iterable_items_removed_at_indexes',
702796
}
703797
for flat_dict in flat_dict_list:
704798
index = None
@@ -710,8 +804,8 @@ def _from_flat_dicts(flat_dict_list):
710804
raise ValueError("Flat dict need to include the 'action'.")
711805
if path is None:
712806
raise ValueError("Flat dict need to include the 'path'.")
713-
if action in DEFLATTENING_NEW_ACTION_MAP:
714-
action = DEFLATTENING_NEW_ACTION_MAP[action]
807+
if action in FLATTENING_NEW_ACTION_MAP:
808+
action = FLATTENING_NEW_ACTION_MAP[action]
715809
index = path.pop()
716810
if action in {'attribute_added', 'attribute_removed'}:
717811
root_element = ('root', GETATTR)
@@ -729,8 +823,8 @@ def _from_flat_dicts(flat_dict_list):
729823
result[action][path_str] = set()
730824
result[action][path_str].add(value)
731825
elif action in {
732-
'dictionary_item_added', 'dictionary_item_removed', 'iterable_item_added',
733-
'iterable_item_removed', 'attribute_removed', 'attribute_added'
826+
'dictionary_item_added', 'dictionary_item_removed',
827+
'attribute_removed', 'attribute_added', 'iterable_item_added', 'iterable_item_removed',
734828
}:
735829
result[action][path_str] = value
736830
elif action == 'values_changed':
@@ -843,10 +937,12 @@ def to_flat_dicts(self, include_action_in_path=False, report_type_changes=True):
843937
]
844938

845939
FLATTENING_NEW_ACTION_MAP = {
846-
'iterable_items_added_at_indexes': 'iterable_item_added',
847-
'iterable_items_removed_at_indexes': 'iterable_item_removed',
940+
'iterable_items_added_at_indexes': 'unordered_iterable_item_added',
941+
'iterable_items_removed_at_indexes': 'unordered_iterable_item_removed',
848942
}
849943
for action, info in self.diff.items():
944+
if action.startswith('_'):
945+
continue
850946
if action in FLATTENING_NEW_ACTION_MAP:
851947
new_action = FLATTENING_NEW_ACTION_MAP[action]
852948
for path, index_to_value in info.items():

deepdiff/path.py

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def _path_to_elements(path, root_element=DEFAULT_FIRST_ELEMENT):
115115
return tuple(elements)
116116

117117

118-
def _get_nested_obj(obj, elements):
118+
def _get_nested_obj(obj, elements, next_element=None):
119119
for (elem, action) in elements:
120120
if action == GET:
121121
obj = obj[elem]
@@ -124,21 +124,50 @@ def _get_nested_obj(obj, elements):
124124
return obj
125125

126126

127-
def _get_nested_obj_and_force(obj, elements):
128-
for (elem, action) in elements:
127+
def _guess_type(elements, elem, index, next_element):
128+
# If we are not at the last elements
129+
if index < len(elements) - 1:
130+
# We assume it is a nested dictionary not a nested list
131+
return {}
132+
if isinstance(next_element, int):
133+
return []
134+
return {}
135+
136+
137+
def _get_nested_obj_and_force(obj, elements, next_element=None):
138+
prev_elem = None
139+
prev_action = None
140+
prev_obj = obj
141+
for index, (elem, action) in enumerate(elements):
142+
_prev_obj = obj
129143
if action == GET:
130144
try:
131145
obj = obj[elem]
146+
prev_obj = _prev_obj
132147
except KeyError:
133-
obj[elem] = {}
148+
obj[elem] = _guess_type(elements, elem, index, next_element)
134149
obj = obj[elem]
150+
prev_obj = _prev_obj
135151
except IndexError:
136152
if isinstance(obj, list) and isinstance(elem, int) and elem >= len(obj):
137153
obj.extend([None] * (elem - len(obj)))
138-
obj.append({})
154+
obj.append(_guess_type(elements, elem, index), next_element)
139155
obj = obj[-1]
156+
prev_obj = _prev_obj
157+
elif isinstance(obj, list) and len(obj) == 0 and prev_elem:
158+
# We ran into an empty list that should have been a dictionary
159+
# We need to change it from an empty list to a dictionary
160+
obj = {elem: _guess_type(elements, elem, index, next_element)}
161+
if prev_action == GET:
162+
prev_obj[prev_elem] = obj
163+
else:
164+
setattr(prev_obj, prev_elem, obj)
165+
obj = obj[elem]
140166
elif action == GETATTR:
141167
obj = getattr(obj, elem)
168+
prev_obj = _prev_obj
169+
prev_elem = elem
170+
prev_action = action
142171
return obj
143172

144173

@@ -245,9 +274,10 @@ def stringify_element(param, quote_str=None):
245274
new_param = []
246275
for char in param:
247276
if char in {'"', "'"}:
277+
import pytest; pytest.set_trace()
248278
new_param.append('\\')
249279
new_param.append(char)
250-
param = ''.join(new_param)
280+
result = '"' + ''.join(new_param) + '"'
251281
elif has_quote:
252282
result = f'"{param}"'
253283
elif has_double_quote:

deepdiff/serialization.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,9 @@ def _to_delta_dict(self, directed=True, report_repetition_required=True, always_
256256
# and will be omitted when counting distance. (Look inside the distance module.)
257257
result['_numpy_paths'] = self._numpy_paths
258258

259+
if self.iterable_compare_func:
260+
result['_iterable_compare_func_was_used'] = True
261+
259262
return deepcopy(dict(result))
260263

261264
def pretty(self):

docs/delta.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ delta_path : String, default=None.
2121
delta_file : File Object, default=None.
2222
:ref:`delta_file_label` is the file object containing the delta data.
2323

24+
delta_diff : Delta diff, default=None.
25+
This is a slightly different diff than the output of DeepDiff. When Delta object is initiated from the DeepDiff output, it transforms the diff into a slightly different structure that is more suitable for delta. You can find that object via delta.diff.
26+
It is the same object that is serialized when you create a delta dump. If you already have the delta_diff object, you can pass it to Delta via the delta_diff parameter.
27+
2428
flat_dict_list : List of flat dictionaries, default=None,
2529
:ref:`flat_dict_list_label` can be used to load the delta object from a list of flat dictionaries.
2630

0 commit comments

Comments
 (0)