Skip to content

Commit ade098a

Browse files
committed
adding group_by_sort_key
1 parent 17f34a7 commit ade098a

File tree

2 files changed

+58
-2
lines changed

2 files changed

+58
-2
lines changed

deepdiff/diff.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ def __init__(self,
130130
exclude_types=None,
131131
get_deep_distance=False,
132132
group_by=None,
133+
group_by_sort_key=None,
133134
hasher=None,
134135
hashes=None,
135136
ignore_encoding_errors=False,
@@ -170,7 +171,7 @@ def __init__(self,
170171
"ignore_private_variables, ignore_nan_inequality, number_to_string_func, verbose_level, "
171172
"view, hasher, hashes, max_passes, max_diffs, zip_ordered_iterables, "
172173
"cutoff_distance_for_pairs, cutoff_intersection_for_pairs, log_frequency_in_sec, cache_size, "
173-
"cache_tuning_sample_size, get_deep_distance, group_by, cache_purge_level, "
174+
"cache_tuning_sample_size, get_deep_distance, group_by, group_by_sort_key, cache_purge_level, "
174175
"math_epsilon, iterable_compare_func, _original_type, "
175176
"ignore_order_func, custom_operators, encodings, ignore_encoding_errors, "
176177
"_parameters and _shared_parameters.") % ', '.join(kwargs.keys()))
@@ -216,6 +217,14 @@ def __init__(self,
216217
self.hasher = hasher
217218
self.cache_tuning_sample_size = cache_tuning_sample_size
218219
self.group_by = group_by
220+
if callable(group_by_sort_key):
221+
self.group_by_sort_key = group_by_sort_key
222+
elif group_by_sort_key:
223+
def _group_by_sort_key(x):
224+
return x[group_by_sort_key]
225+
self.group_by_sort_key = _group_by_sort_key
226+
else:
227+
self.group_by_sort_key = None
219228
self.encodings = encodings
220229
self.ignore_encoding_errors = ignore_encoding_errors
221230

@@ -1607,11 +1616,20 @@ def _group_iterable_to_dict(self, item, group_by, item_name):
16071616
except KeyError:
16081617
logger.error("Unable to group {} by {}. The key is missing in {}".format(item_name, group_by, row))
16091618
raise
1610-
result[key] = row
1619+
if self.group_by_sort_key:
1620+
if key not in result:
1621+
result[key] = []
1622+
if row not in result[key]:
1623+
result[key].append(row)
1624+
else:
1625+
result[key] = row
16111626
else:
16121627
msg = "Unable to group {} by {} since the item {} is not a dictionary.".format(item_name, group_by, row)
16131628
logger.error(msg)
16141629
raise ValueError(msg)
1630+
if self.group_by_sort_key:
1631+
for key, row in result.items():
1632+
row.sort(key=self.group_by_sort_key)
16151633
return result
16161634
msg = "Unable to group {} by {}".format(item_name, group_by)
16171635
logger.error(msg)

tests/test_diff_text.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1710,6 +1710,44 @@ def test_group_by1(self):
17101710
'old_value': 'Blue'}}}
17111711
assert expected_grouped == diff
17121712

1713+
def test_group_by2_when_repeats(self):
1714+
t1 = [
1715+
{'id': 'AA', 'name': 'Joe', 'last_name': 'Nobody', 'int_id': 2},
1716+
{'id': 'BB', 'name': 'James', 'last_name': 'Blue', 'int_id': 20},
1717+
{'id': 'BB', 'name': 'Jimmy', 'last_name': 'Red', 'int_id': 3},
1718+
{'id': 'CC', 'name': 'Mike', 'last_name': 'Apple', 'int_id': 4},
1719+
]
1720+
1721+
t2 = [
1722+
{'id': 'AA', 'name': 'Joe', 'last_name': 'Nobody', 'int_id': 2},
1723+
{'id': 'BB', 'name': 'James', 'last_name': 'Brown', 'int_id': 20},
1724+
{'id': 'CC', 'name': 'Mike', 'last_name': 'Apple', 'int_id': 4},
1725+
]
1726+
1727+
diff = DeepDiff(t1, t2, group_by='id', group_by_sort_key='name')
1728+
expected_grouped = {
1729+
'values_changed': {
1730+
"root['BB'][0]['last_name']": {
1731+
'new_value': 'Brown',
1732+
'old_value': 'Blue'
1733+
}
1734+
},
1735+
'iterable_item_removed': {
1736+
"root['BB'][1]": {
1737+
'name': 'Jimmy',
1738+
'last_name': 'Red',
1739+
'int_id': 3
1740+
}
1741+
}
1742+
}
1743+
assert expected_grouped == diff
1744+
1745+
diff2 = DeepDiff(t1, t2, group_by='id', group_by_sort_key=lambda x: x['name'])
1746+
assert expected_grouped == diff2
1747+
1748+
diff3 = DeepDiff(t1, t2, group_by='id', group_by_sort_key=lambda x: x['name'])
1749+
assert expected_grouped == diff3
1750+
17131751
def test_group_by_key_missing(self):
17141752
t1 = [
17151753
{'id': 'AA', 'name': 'Joe', 'last_name': 'Nobody'},

0 commit comments

Comments
 (0)