Skip to content

Commit df0c984

Browse files
committed
Making orjson optional. Adding PrefixOrSuffixOperator
1 parent fdb08d4 commit df0c984

14 files changed

+139
-27
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ If you want to use DeepDiff from commandline:
4848

4949
`pip install "deepdiff[cli]"`
5050

51+
If you want to improve the performance of DeepDiff with certain processes such as json serialization:
52+
53+
`pip install "deepdiff[optimize]"`
54+
5155
### Importing
5256

5357
```python

deepdiff/commands.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@
1111
from deepdiff import Delta, DeepSearch, extract as deep_extract
1212
from deepdiff.serialization import load_path_content, save_content_to_path
1313

14+
try:
15+
import orjson
16+
except ImportError:
17+
orjson = None
18+
1419

1520
@click.group()
1621
def cli():
@@ -105,7 +110,13 @@ def diff(
105110
# printing into stdout
106111
sys.stdout.buffer.write(delta.dumps())
107112
else:
108-
pprint(diff, indent=2)
113+
try:
114+
if orjson:
115+
print(diff.to_json(option=orjson.OPT_INDENT_2))
116+
else:
117+
print(diff.to_json(indent=2))
118+
except Exception:
119+
pprint(diff, indent=2)
109120

110121

111122
@cli.command()

deepdiff/operator.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,14 @@ def match(self, level) -> bool:
2525

2626
def give_up_diffing(self, level, diff_instance) -> bool:
2727
raise NotImplementedError('Please implement the diff function.')
28+
29+
30+
class PrefixOrSuffixOperator:
31+
32+
def match(self, level) -> bool:
33+
return level.t1 and level.t2 and isinstance(level.t1, str) and isinstance(level.t2, str)
34+
35+
def give_up_diffing(self, level, diff_instance) -> bool:
36+
t1 = level.t1
37+
t2 = level.t2
38+
return t1.startswith(t2) or t2.startswith(t1)

deepdiff/serialization.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import io
44
import os
55
import json
6-
import orjson
76
import uuid
87
import logging
98
import re # NOQA
@@ -26,6 +25,11 @@
2625
except ImportError: # pragma: no cover.
2726
import csv
2827
clevercsv = None # pragma: no cover.
28+
try:
29+
import orjson
30+
except ImportError: # pragma: no cover.
31+
orjson = None
32+
2933
from copy import deepcopy
3034
from functools import partial
3135
from collections.abc import Mapping
@@ -556,15 +560,17 @@ def object_hook(self, obj):
556560
def json_dumps(item, default_mapping=None, **kwargs):
557561
"""
558562
Dump json with extra details that are not normally json serializable
559-
560-
Note: I tried to replace json with orjson for its speed. It does work
561-
but the output it makes is a byte object and Postgres couldn't directly use it without
562-
encoding to str. So I switched back to json.
563563
"""
564-
return orjson.dumps(
565-
item,
566-
default=json_convertor_default(default_mapping=default_mapping),
567-
**kwargs).decode(encoding='utf-8')
564+
if orjson:
565+
return orjson.dumps(
566+
item,
567+
default=json_convertor_default(default_mapping=default_mapping),
568+
**kwargs).decode(encoding='utf-8')
569+
else:
570+
return json.dumps(
571+
item,
572+
default=json_convertor_default(default_mapping=default_mapping),
573+
**kwargs)
568574

569575

570576
json_loads = partial(json.loads, cls=JSONDecoder)

docs/custom.rst

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -128,21 +128,61 @@ For example you could use the level object to further determine if the 2 objects
128128
Custom Operators
129129
----------------
130130

131-
Whether two objects are different or not are largely depend on the context. For example, apple and banana are the same
131+
Whether two objects are different or not largely depends on the context. For example, apples and bananas are the same
132132
if you are considering whether they are fruits or not.
133133

134134
In that case, you can pass a *custom_operators* for the job.
135135

136-
In fact, custom operators give you a lot of power. In the following examples we explore use cases from making DeepDiff
137-
report the L2 Distance of items, to only include certain paths in diffing all the way to making DeepDiff stop diffing
138-
as soon as the first diff is reported.
136+
Custom operators give you a lot of power. In the following examples, we explore various use cases such as:
137+
138+
- Making DeepDiff report the L2 Distance of items
139+
- Only include specific paths in diffing
140+
- Making DeepDiff stop diffing once we find the first diff.
141+
142+
You can use one of the predefined custom operators that come with DeepDiff. Or you can define one yourself.
143+
144+
145+
Built-In Custom Operators
146+
147+
148+
PrefixOrSuffixOperator
149+
......................
150+
151+
152+
This operator will skip strings that are suffix or prefix of each other.
153+
154+
For example when this operator is used, the two strings of "joe" and "joe's car" will not be reported as different.
155+
156+
>>> from deepdiff import DeepDiff
157+
>>> from deepdiff.operator import PrefixOrSuffixOperator
158+
>>> t1 = {
159+
... "key1": ["foo", "bar's food", "jack", "joe"]
160+
... }
161+
>>> t2 = {
162+
... "key1": ["foo", "bar", "jill", "joe'car"]
163+
... }
164+
>>>
165+
>>> DeepDiff(t1, t2)
166+
{'values_changed': {"root['key1'][1]": {'new_value': 'bar', 'old_value': "bar's food"}, "root['key1'][2]": {'new_value': 'jill', 'old_value': 'jack'}, "root['key1'][3]": {'new_value': "joe'car", 'old_value': 'joe'}}}
167+
>>> DeepDiff(t1, t2, custom_operators=[
168+
... PrefixOrSuffixOperator()
169+
... ])
170+
>>>
171+
{'values_changed': {"root['key1'][2]": {'new_value': 'jill', 'old_value': 'jack'}}}
172+
173+
174+
175+
176+
Define A Custom Operator
177+
------------------------
178+
139179

140180
To define an custom operator, you just need to inherit a *BaseOperator* and
141181

142182
* implement a give_up_diffing method
143183
* give_up_diffing(level: DiffLevel, diff_instance: DeepDiff) -> boolean
144184

145-
If it returns True, then we will give up diffing the 2 objects.
185+
If it returns True, then we will give up diffing the tow objects.
146186
You may or may not use the diff_instance.custom_report_result within this function
147187
to report any diff. If you decide not to report anything, and this
148188
function returns True, then the objects are basically skipped in the results.

docs/index.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ If you want to use DeepDiff from commandline::
7676

7777
pip install "deepdiff[cli]"
7878

79+
If you want to improve the performance of DeepDiff with certain processes such as json serialization::
80+
81+
pip install "deepdiff[optimize]"
82+
7983
Read about DeepDiff optimizations at :ref:`optimizations_label`
8084

8185
Importing

docs/optimizations.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@ Optimizations
88
If you are dealing with large nested objects and ignore_order=True, chances are DeepDiff takes a while to calculate the diff. Here are some tips that may help you with optimizations and progress report.
99

1010

11+
Optimized Libraries
12+
-------------------
13+
14+
If you dump DeepDiff or Delta objects as json, you can improve the performance by installing orjson.
15+
DeepDiff will automatically use orjson instead of Python's built-in json library to do json serialization.
16+
17+
pip install "deepdiff[optimize]"
18+
19+
1120
Max Passes
1221
----------
1322

requirements-dev-3.7.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
wheel==0.38.1
21
-r requirements.txt
32
-r requirements-cli.txt
43
bump2version==1.0.1
@@ -8,3 +7,4 @@ numpy==1.21.6
87
pytest==7.1.2
98
python-dotenv==0.20.0
109
python-dateutil==2.8.2
10+
wheel==0.38.1

requirements-dev.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
wheel==0.38.1
21
-r requirements.txt
32
-r requirements-cli.txt
43
bump2version==1.0.1
@@ -14,3 +13,5 @@ Sphinx==5.3.0
1413
sphinx-sitemap==2.2.1
1514
flake8==6.0.0
1615
python-dateutil==2.8.2
16+
orjson==3.8.3
17+
wheel==0.38.1

requirements-optimize.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
orjson

requirements.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1 @@
11
ordered-set>=4.0.2,<4.2.0
2-
orjson

setup.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ def get_reqs(filename):
2121

2222
reqs = get_reqs("requirements.txt")
2323
cli_reqs = get_reqs("requirements-cli.txt")
24+
optimize_reqs = get_reqs("requirements-optimize.txt")
2425

2526
with open('README.md') as file:
2627
long_description = file.read()
@@ -45,6 +46,7 @@ def get_reqs(filename):
4546
python_requires='>=3.7',
4647
extras_require={
4748
"cli": cli_reqs,
49+
"optimize": optimize_reqs,
4850
},
4951
classifiers=[
5052
"Intended Audience :: Developers",
@@ -54,6 +56,7 @@ def get_reqs(filename):
5456
"Programming Language :: Python :: 3.8",
5557
"Programming Language :: Python :: 3.9",
5658
"Programming Language :: Python :: 3.10",
59+
"Programming Language :: Python :: 3.11",
5760
"Programming Language :: Python :: Implementation :: PyPy",
5861
"Development Status :: 5 - Production/Stable",
5962
"License :: OSI Approved :: MIT License"

tests/test_command.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@
1111
class TestCommands:
1212

1313
@pytest.mark.parametrize('t1, t2, expected_in_stdout, expected_exit_code', [
14-
('t1.json', 't2.json', "'dictionary_item_added\': [root[0]", 0),
14+
('t1.json', 't2.json', '"dictionary_item_added": [\n "root[0]', 0),
1515
('t1_corrupt.json', 't2.json', "Expecting property name enclosed in double quotes", 1),
16-
('t1.json', 't2_json.csv', "'old_value\': \'value2\'", 0),
17-
('t2_json.csv', 't1.json', "'old_value\': \'value3\'", 0),
18-
('t1.csv', 't2.csv', "\'new_value\': \'James\'", 0),
16+
('t1.json', 't2_json.csv', '"old_value": "value2"', 0),
17+
('t2_json.csv', 't1.json', '"old_value": "value3"', 0),
18+
('t1.csv', 't2.csv', '"new_value": "James"', 0),
1919
('t1.toml', 't2.toml', "10.0.0.2", 0),
20-
('t1.pickle', 't2.pickle', "'new_value': 5, 'old_value': 1", 0),
21-
('t1.yaml', 't2.yaml', "'new_value': 61, 'old_value': 65", 0),
20+
('t1.pickle', 't2.pickle', '"new_value": 5,\n "old_value": 1', 0),
21+
('t1.yaml', 't2.yaml', '"new_value": 61,\n "old_value": 65', 0),
2222
])
2323
def test_diff_command(self, t1, t2, expected_in_stdout, expected_exit_code):
2424
t1 = os.path.join(FIXTURES_DIR, t1)
@@ -74,7 +74,7 @@ def test_command_group_by(self):
7474
diffed = runner.invoke(diff, [t1, t2, '--group-by', 'id'])
7575
assert 0 == diffed.exit_code
7676
assert 'values_changed' in diffed.output
77-
assert '\'new_value\': \'Chicken\'' in diffed.output
77+
assert '"new_value": "Chicken"' in diffed.output
7878

7979
def test_command_math_epsilon(self):
8080
t1 = os.path.join(FIXTURES_DIR, 'd_t1.yaml')
@@ -86,7 +86,7 @@ def test_command_math_epsilon(self):
8686

8787
diffed2 = runner.invoke(diff, [t1, t2, '--math-epsilon', '0.001'])
8888
assert 0 == diffed2.exit_code
89-
assert "{'values_changed': {'root[2][2]': {'new_value': 0.289, 'old_value': 0.288}}}\n" == diffed2.output
89+
assert '{\n "values_changed": {\n "root[2][2]": {\n "new_value": 0.289,\n "old_value": 0.288\n }\n }\n}\n' == diffed2.output
9090

9191
def test_command_grep(self):
9292
path = os.path.join(FIXTURES_DIR, 'd_t1.yaml')

tests/test_operators.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from typing import List
44
from deepdiff import DeepDiff
5-
from deepdiff.operator import BaseOperator
5+
from deepdiff.operator import BaseOperator, PrefixOrSuffixOperator
66

77

88
class TestOperators:
@@ -217,3 +217,26 @@ def give_up_diffing(self, level, diff_instance) -> bool:
217217

218218
expected = {'values_changed': {'root[0][1]': {'new_value': 3, 'old_value': 2}}}
219219
assert expected == ddiff
220+
221+
def test_prefix_or_suffix_diff(self):
222+
223+
t1 = {
224+
"key1": ["foo", "bar's food", "jack", "joe"]
225+
}
226+
t2 = {
227+
"key1": ["foo", "bar", "jill", "joe'car"]
228+
}
229+
230+
ddiff = DeepDiff(t1, t2, custom_operators=[
231+
PrefixOrSuffixOperator()
232+
])
233+
234+
expected = {'values_changed': {"root['key1'][2]": {'new_value': 'jill', 'old_value': 'jack'}}}
235+
assert expected == ddiff
236+
237+
ddiff2 = DeepDiff(t1, t2, ignore_order=True, custom_operators=[
238+
PrefixOrSuffixOperator()
239+
])
240+
241+
expected2 = {'values_changed': {"root['key1'][2]": {'new_value': 'jill', 'old_value': 'jack'}}}
242+
assert expected2 == ddiff2

0 commit comments

Comments
 (0)