Skip to content

Commit d7760be

Browse files
committed
Add optional orjson support
1 parent 8d594d4 commit d7760be

File tree

6 files changed

+89
-51
lines changed

6 files changed

+89
-51
lines changed

docs/guide/configuration.asciidoc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,24 @@ es = Elasticsearch(
264264
)
265265
------------------------------------
266266

267+
If the `orjson` package is installed, you can use the faster ``OrjsonSerializer`` for the default mimetype (``application/json``):
268+
269+
[source,python]
270+
------------------------------------
271+
from elasticsearch import Elasticsearch, OrjsonSerializer
272+
273+
es = Elasticsearch(
274+
...,
275+
serializer=OrjsonSerializer()
276+
)
277+
------------------------------------
278+
279+
orjson is particularly fast when serializing vectors as it has native numpy support. This will be the default in a future release. Note that you can install orjson with the `orjson` extra:
280+
281+
[source,sh]
282+
--------------------------------------------
283+
$ python -m pip install elasticsearch[orjson]
284+
--------------------------------------------
267285

268286
[discrete]
269287
[[nodes]]

elasticsearch_serverless/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@
6363
)
6464
from .serializer import JSONSerializer, JsonSerializer
6565

66+
try:
67+
from .serializer import OrjsonSerializer
68+
except ModuleNotFoundError:
69+
OrjsonSerializer = None # type: ignore[assignment,misc]
70+
6671
# Only raise one warning per deprecation message so as not
6772
# to spam up the user if the same action is done multiple times.
6873
warnings.simplefilter("default", category=ElasticsearchWarning, append=True)
@@ -86,6 +91,8 @@
8691
"UnsupportedProductError",
8792
"ElasticsearchWarning",
8893
]
94+
if OrjsonSerializer is not None:
95+
__all__.append("OrjsonSerializer")
8996

9097
fixup_module_metadata(__name__, globals())
9198
del fixup_module_metadata

elasticsearch_serverless/serializer.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@
4141
"MapboxVectorTileSerializer",
4242
]
4343

44+
try:
45+
from elastic_transport import OrjsonSerializer as _OrjsonSerializer
46+
47+
__all__.append("OrjsonSerializer")
48+
except ModuleNotFoundError:
49+
_OrjsonSerializer = None # type: ignore[assignment,misc]
50+
4451

4552
class JsonSerializer(_JsonSerializer):
4653
mimetype: ClassVar[str] = "application/json"
@@ -73,6 +80,13 @@ def default(self, data: Any) -> Any:
7380
raise TypeError(f"Unable to serialize {data!r} (type: {type(data)})")
7481

7582

83+
if _OrjsonSerializer is not None:
84+
85+
class OrjsonSerializer(JsonSerializer, _OrjsonSerializer):
86+
def default(self, data: Any) -> Any:
87+
return JsonSerializer.default(self, data)
88+
89+
7690
class NdjsonSerializer(JsonSerializer, _NdjsonSerializer):
7791
mimetype: ClassVar[str] = "application/x-ndjson"
7892

noxfile.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def pytest_argv():
4949

5050
@nox.session(python=["3.9", "3.10", "3.11", "3.12"])
5151
def test(session):
52-
session.install(".[dev]", env=INSTALL_ENV)
52+
session.install(".[async,requests,orjson]", env=INSTALL_ENV)
5353

5454
session.run(*pytest_argv(), *(session.posargs))
5555

@@ -86,7 +86,7 @@ def lint(session):
8686
session.run("python", "utils/license-headers.py", "check", *SOURCE_FILES)
8787

8888
# Workaround to make '-r' to still work despite uninstalling aiohttp below.
89-
session.install(".[async,requests]", env=INSTALL_ENV)
89+
session.install(".[async,requests,orjson]", env=INSTALL_ENV)
9090

9191
# Run mypy on the package and then the type examples separately for
9292
# the two different mypy use-cases, ourselves and our users.

pyproject.toml

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,9 @@ dependencies = [
4444
]
4545

4646
[project.optional-dependencies]
47-
async = [
48-
"aiohttp>=3,<4",
49-
]
50-
requests = [
51-
"requests>=2.4.0, <3.0.0",
52-
]
47+
async = ["aiohttp>=3,<4"]
48+
requests = ["requests>=2.4.0, <3.0.0" ]
49+
orjson = ["orjson>=3"]
5350
dev = [
5451
"requests>=2, <3",
5552
"aiohttp",
@@ -66,6 +63,7 @@ dev = [
6663
"twine",
6764
"build",
6865
"nox",
66+
"orjson",
6967
"numpy",
7068
"pandas",
7169
"mapbox-vector-tile",

test_elasticsearch_serverless/test_serializer.py

Lines changed: 44 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
# specific language governing permissions and limitations
1717
# under the License.
1818

19-
import sys
2019
import uuid
2120
from datetime import datetime
2221
from decimal import Decimal
@@ -31,133 +30,135 @@
3130

3231
import re
3332

33+
from elasticsearch.serializer import JSONSerializer, OrjsonSerializer, TextSerializer
34+
3435
from elasticsearch_serverless import Elasticsearch
3536
from elasticsearch_serverless.exceptions import SerializationError
36-
from elasticsearch_serverless.serializer import JSONSerializer, TextSerializer
3737

3838
requires_numpy_and_pandas = pytest.mark.skipif(
39-
np is None or pd is None, reason="Test requires numpy or pandas to be available"
39+
np is None or pd is None, reason="Test requires numpy and pandas to be available"
4040
)
4141

4242

43-
def test_datetime_serialization():
44-
assert b'{"d":"2010-10-01T02:30:00"}' == JSONSerializer().dumps(
43+
@pytest.fixture(params=[JSONSerializer, OrjsonSerializer])
44+
def json_serializer(request: pytest.FixtureRequest):
45+
yield request.param()
46+
47+
48+
def test_datetime_serialization(json_serializer):
49+
assert b'{"d":"2010-10-01T02:30:00"}' == json_serializer.dumps(
4550
{"d": datetime(2010, 10, 1, 2, 30)}
4651
)
4752

4853

49-
def test_decimal_serialization():
50-
requires_numpy_and_pandas()
54+
def test_decimal_serialization(json_serializer):
55+
assert b'{"d":3.8}' == json_serializer.dumps({"d": Decimal("3.8")})
5156

52-
if sys.version_info[:2] == (2, 6):
53-
pytest.skip("Float rounding is broken in 2.6.")
54-
assert b'{"d":3.8}' == JSONSerializer().dumps({"d": Decimal("3.8")})
5557

56-
57-
def test_uuid_serialization():
58-
assert b'{"d":"00000000-0000-0000-0000-000000000003"}' == JSONSerializer().dumps(
58+
def test_uuid_serialization(json_serializer):
59+
assert b'{"d":"00000000-0000-0000-0000-000000000003"}' == json_serializer.dumps(
5960
{"d": uuid.UUID("00000000-0000-0000-0000-000000000003")}
6061
)
6162

6263

6364
@requires_numpy_and_pandas
64-
def test_serializes_numpy_bool():
65-
assert b'{"d":true}' == JSONSerializer().dumps({"d": np.bool_(True)})
65+
def test_serializes_numpy_bool(json_serializer):
66+
assert b'{"d":true}' == json_serializer.dumps({"d": np.bool_(True)})
6667

6768

6869
@requires_numpy_and_pandas
69-
def test_serializes_numpy_integers():
70-
ser = JSONSerializer()
70+
def test_serializes_numpy_integers(json_serializer):
7171
for np_type in (
7272
np.int_,
7373
np.int8,
7474
np.int16,
7575
np.int32,
7676
np.int64,
7777
):
78-
assert ser.dumps({"d": np_type(-1)}) == b'{"d":-1}'
78+
assert json_serializer.dumps({"d": np_type(-1)}) == b'{"d":-1}'
7979

8080
for np_type in (
8181
np.uint8,
8282
np.uint16,
8383
np.uint32,
8484
np.uint64,
8585
):
86-
assert ser.dumps({"d": np_type(1)}) == b'{"d":1}'
86+
assert json_serializer.dumps({"d": np_type(1)}) == b'{"d":1}'
8787

8888

8989
@requires_numpy_and_pandas
90-
def test_serializes_numpy_floats():
91-
ser = JSONSerializer()
90+
def test_serializes_numpy_floats(json_serializer):
9291
for np_type in (
9392
np.float32,
9493
np.float64,
9594
):
96-
assert re.search(rb'^{"d":1\.2[\d]*}$', ser.dumps({"d": np_type(1.2)}))
95+
assert re.search(
96+
rb'^{"d":1\.2[\d]*}$', json_serializer.dumps({"d": np_type(1.2)})
97+
)
9798

9899

99100
@requires_numpy_and_pandas
100-
def test_serializes_numpy_datetime():
101-
assert b'{"d":"2010-10-01T02:30:00"}' == JSONSerializer().dumps(
101+
def test_serializes_numpy_datetime(json_serializer):
102+
assert b'{"d":"2010-10-01T02:30:00"}' == json_serializer.dumps(
102103
{"d": np.datetime64("2010-10-01T02:30:00")}
103104
)
104105

105106

106107
@requires_numpy_and_pandas
107-
def test_serializes_numpy_ndarray():
108-
assert b'{"d":[0,0,0,0,0]}' == JSONSerializer().dumps(
108+
def test_serializes_numpy_ndarray(json_serializer):
109+
assert b'{"d":[0,0,0,0,0]}' == json_serializer.dumps(
109110
{"d": np.zeros((5,), dtype=np.uint8)}
110111
)
111112
# This isn't useful for Elasticsearch, just want to make sure it works.
112-
assert b'{"d":[[0,0],[0,0]]}' == JSONSerializer().dumps(
113+
assert b'{"d":[[0,0],[0,0]]}' == json_serializer.dumps(
113114
{"d": np.zeros((2, 2), dtype=np.uint8)}
114115
)
115116

116117

117118
@requires_numpy_and_pandas
118119
def test_serializes_numpy_nan_to_nan():
119-
assert b'{"d":NaN}' == JSONSerializer().dumps({"d": np.nan})
120+
assert b'{"d":NaN}' == JSONSerializer().dumps({"d": float("NaN")})
121+
# NaN is invalid JSON, and orjson silently converts it to null
122+
assert b'{"d":null}' == OrjsonSerializer().dumps({"d": float("NaN")})
120123

121124

122125
@requires_numpy_and_pandas
123-
def test_serializes_pandas_timestamp():
124-
assert b'{"d":"2010-10-01T02:30:00"}' == JSONSerializer().dumps(
125-
{"d": pd.Timestamp("2010-10-01T02:30:00")}
126-
)
126+
def test_serializes_pandas_timestamp(json_serializer):
127+
assert b'{"d":"2010-10-01T02:30:00"}' == json_serializer.dumps()
127128

128129

129130
@requires_numpy_and_pandas
130-
def test_serializes_pandas_series():
131-
assert b'{"d":["a","b","c","d"]}' == JSONSerializer().dumps(
131+
def test_serializes_pandas_series(json_serializer):
132+
assert b'{"d":["a","b","c","d"]}' == json_serializer.dumps(
132133
{"d": pd.Series(["a", "b", "c", "d"])}
133134
)
134135

135136

136137
@requires_numpy_and_pandas
137138
@pytest.mark.skipif(not hasattr(pd, "NA"), reason="pandas.NA is required")
138-
def test_serializes_pandas_na():
139-
assert b'{"d":null}' == JSONSerializer().dumps({"d": pd.NA})
139+
def test_serializes_pandas_na(json_serializer):
140+
assert b'{"d":null}' == json_serializer.dumps({"d": pd.NA})
140141

141142

142143
@requires_numpy_and_pandas
143144
@pytest.mark.skipif(not hasattr(pd, "NaT"), reason="pandas.NaT required")
144-
def test_raises_serialization_error_pandas_nat():
145+
def test_raises_serialization_error_pandas_nat(json_serializer):
145146
with pytest.raises(SerializationError):
146-
JSONSerializer().dumps({"d": pd.NaT})
147+
json_serializer.dumps({"d": pd.NaT})
147148

148149

149150
@requires_numpy_and_pandas
150-
def test_serializes_pandas_category():
151+
def test_serializes_pandas_category(json_serializer):
151152
cat = pd.Categorical(["a", "c", "b", "a"], categories=["a", "b", "c"])
152-
assert b'{"d":["a","c","b","a"]}' == JSONSerializer().dumps({"d": cat})
153+
assert b'{"d":["a","c","b","a"]}' == json_serializer.dumps({"d": cat})
153154

154155
cat = pd.Categorical([1, 2, 3], categories=[1, 2, 3])
155-
assert b'{"d":[1,2,3]}' == JSONSerializer().dumps({"d": cat})
156+
assert b'{"d":[1,2,3]}' == json_serializer.dumps({"d": cat})
156157

157158

158-
def test_json_raises_serialization_error_on_dump_error():
159+
def test_json_raises_serialization_error_on_dump_error(json_serializer):
159160
with pytest.raises(SerializationError):
160-
JSONSerializer().dumps(object())
161+
json_serializer.dumps(object())
161162

162163

163164
def test_raises_serialization_error_on_load_error():

0 commit comments

Comments
 (0)