Skip to content

Commit 739a22e

Browse files
Merge #974
974: Allow passing a custom serializer for documents r=sanders41 a=sanders41 # Pull Request `@LaundroMat` FYI in case you want to test this to see if it solves your issue. ## Related issue Fixes #973 ## What does this PR do? - Allows passing a custom JSONEncoder to serialize documents with types that the default encoder can't handle. ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: Paul Sanders <[email protected]>
2 parents b3d914e + 7f7d066 commit 739a22e

File tree

4 files changed

+237
-18
lines changed

4 files changed

+237
-18
lines changed

README.md

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,34 @@ index.update_filterable_attributes([
169169
])
170170
```
171171

172+
#### Custom Serializer for documents <!-- omit in toc -->
173+
174+
If your documents contain fields that the Python JSON serializer does not know how to handle you
175+
can use your own custom serializer.
176+
177+
```py
178+
from datetime import datetime
179+
from json import JSONEncoder
180+
from uuid import uuid4
181+
182+
183+
class CustomEncoder(JSONEncoder):
184+
def default(self, o):
185+
if isinstance(o, (UUID, datetime)):
186+
return str(o)
187+
188+
# Let the base class default method raise the TypeError
189+
return super().default(o)
190+
191+
192+
documents = [
193+
{"id": uuid4(), "title": "test 1", "when": datetime.now()},
194+
{"id": uuid4(), "title": "Test 2", "when": datetime.now()},
195+
]
196+
index = empty_index()
197+
index.add_documents(documents, serializer=CustomEncoder)
198+
```
199+
172200
You only need to perform this operation once.
173201

174202
Note that Meilisearch will rebuild your index whenever you update `filterableAttributes`. Depending on the size of your dataset, this might take time. You can track the process using the [task](https://www.meilisearch.com/docs/reference/api/tasks#get-tasks).
@@ -205,7 +233,6 @@ index.search(
205233

206234
This package guarantees compatibility with [version v1.x of Meilisearch](https://github.com/meilisearch/meilisearch/releases/latest), but some features may not be present. Please check the [issues](https://github.com/meilisearch/meilisearch-python/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22+label%3Aenhancement) for more info.
207235

208-
209236
## 💡 Learn more
210237

211238
The following sections in our main documentation website may interest you:

meilisearch/_httprequests.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import json
44
from functools import lru_cache
5-
from typing import Any, Callable, List, Mapping, Optional, Sequence, Tuple, Union
5+
from typing import Any, Callable, List, Mapping, Optional, Sequence, Tuple, Type, Union
66

77
import requests
88

@@ -39,6 +39,8 @@ def send_request(
3939
]
4040
] = None,
4141
content_type: Optional[str] = None,
42+
*,
43+
serializer: Optional[Type[json.JSONEncoder]] = None,
4244
) -> Any:
4345
if content_type:
4446
self.headers["Content-Type"] = content_type
@@ -58,11 +60,10 @@ def send_request(
5860
data=body,
5961
)
6062
else:
63+
data = json.dumps(body, cls=serializer) if body else "" if body == "" else "null"
64+
6165
request = http_method(
62-
request_path,
63-
timeout=self.config.timeout,
64-
headers=self.headers,
65-
data=json.dumps(body) if body else "" if body == "" else "null",
66+
request_path, timeout=self.config.timeout, headers=self.headers, data=data
6667
)
6768
return self.__validate(request)
6869

@@ -81,8 +82,10 @@ def post(
8182
Union[Mapping[str, Any], Sequence[Mapping[str, Any]], List[str], str]
8283
] = None,
8384
content_type: Optional[str] = "application/json",
85+
*,
86+
serializer: Optional[Type[json.JSONEncoder]] = None,
8487
) -> Any:
85-
return self.send_request(requests.post, path, body, content_type)
88+
return self.send_request(requests.post, path, body, content_type, serializer=serializer)
8689

8790
def patch(
8891
self,
@@ -108,8 +111,10 @@ def put(
108111
]
109112
] = None,
110113
content_type: Optional[str] = "application/json",
114+
*,
115+
serializer: Optional[Type[json.JSONEncoder]] = None,
111116
) -> Any:
112-
return self.send_request(requests.put, path, body, content_type)
117+
return self.send_request(requests.put, path, body, content_type, serializer=serializer)
113118

114119
def delete(
115120
self,

meilisearch/index.py

Lines changed: 70 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,19 @@
11
from __future__ import annotations
22

33
from datetime import datetime
4-
from typing import Any, Dict, Generator, List, Mapping, MutableMapping, Optional, Sequence, Union
4+
from typing import (
5+
TYPE_CHECKING,
6+
Any,
7+
Dict,
8+
Generator,
9+
List,
10+
Mapping,
11+
MutableMapping,
12+
Optional,
13+
Sequence,
14+
Type,
15+
Union,
16+
)
517
from urllib import parse
618
from warnings import warn
719

@@ -26,6 +38,9 @@
2638
from meilisearch.models.task import Task, TaskInfo, TaskResults
2739
from meilisearch.task import TaskHandler
2840

41+
if TYPE_CHECKING:
42+
from json import JSONEncoder
43+
2944

3045
# pylint: disable=too-many-public-methods, too-many-lines
3146
class Index:
@@ -403,6 +418,8 @@ def add_documents(
403418
self,
404419
documents: Sequence[Mapping[str, Any]],
405420
primary_key: Optional[str] = None,
421+
*,
422+
serializer: Optional[Type[JSONEncoder]] = None,
406423
) -> TaskInfo:
407424
"""Add documents to the index.
408425
@@ -412,6 +429,9 @@ def add_documents(
412429
List of documents. Each document should be a dictionary.
413430
primary_key (optional):
414431
The primary-key used in index. Ignored if already set up.
432+
serializer (optional):
433+
A custom JSONEncode to handle serializing fields that the build in json.dumps
434+
cannot handle, for example UUID and datetime.
415435
416436
Returns
417437
-------
@@ -425,14 +445,16 @@ def add_documents(
425445
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
426446
"""
427447
url = self._build_url(primary_key)
428-
add_document_task = self.http.post(url, documents)
448+
add_document_task = self.http.post(url, documents, serializer=serializer)
429449
return TaskInfo(**add_document_task)
430450

431451
def add_documents_in_batches(
432452
self,
433453
documents: Sequence[Mapping[str, Any]],
434454
batch_size: int = 1000,
435455
primary_key: Optional[str] = None,
456+
*,
457+
serializer: Optional[Type[JSONEncoder]] = None,
436458
) -> List[TaskInfo]:
437459
"""Add documents to the index in batches.
438460
@@ -444,6 +466,9 @@ def add_documents_in_batches(
444466
The number of documents that should be included in each batch. Default = 1000
445467
primary_key (optional):
446468
The primary-key used in index. Ignored if already set up.
469+
serializer (optional):
470+
A custom JSONEncode to handle serializing fields that the build in json.dumps
471+
cannot handle, for example UUID and datetime.
447472
448473
Returns
449474
-------
@@ -461,7 +486,7 @@ def add_documents_in_batches(
461486
tasks: List[TaskInfo] = []
462487

463488
for document_batch in self._batch(documents, batch_size):
464-
task = self.add_documents(document_batch, primary_key)
489+
task = self.add_documents(document_batch, primary_key, serializer=serializer)
465490
tasks.append(task)
466491

467492
return tasks
@@ -470,6 +495,8 @@ def add_documents_json(
470495
self,
471496
str_documents: str,
472497
primary_key: Optional[str] = None,
498+
*,
499+
serializer: Optional[Type[JSONEncoder]] = None,
473500
) -> TaskInfo:
474501
"""Add string documents from JSON file to the index.
475502
@@ -479,6 +506,9 @@ def add_documents_json(
479506
String of document from a JSON file.
480507
primary_key (optional):
481508
The primary-key used in index. Ignored if already set up.
509+
serializer (optional):
510+
A custom JSONEncode to handle serializing fields that the build in json.dumps
511+
cannot handle, for example UUID and datetime.
482512
483513
Returns
484514
-------
@@ -491,7 +521,9 @@ def add_documents_json(
491521
MeilisearchApiError
492522
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
493523
"""
494-
return self.add_documents_raw(str_documents, primary_key, "application/json")
524+
return self.add_documents_raw(
525+
str_documents, primary_key, "application/json", serializer=serializer
526+
)
495527

496528
def add_documents_csv(
497529
self,
@@ -556,6 +588,8 @@ def add_documents_raw(
556588
primary_key: Optional[str] = None,
557589
content_type: Optional[str] = None,
558590
csv_delimiter: Optional[str] = None,
591+
*,
592+
serializer: Optional[Type[JSONEncoder]] = None,
559593
) -> TaskInfo:
560594
"""Add string documents to the index.
561595
@@ -570,6 +604,9 @@ def add_documents_raw(
570604
csv_delimiter:
571605
One ASCII character used to customize the delimiter for CSV.
572606
Note: The csv delimiter can only be used with the Content-Type text/csv.
607+
serializer (optional):
608+
A custom JSONEncode to handle serializing fields that the build in json.dumps
609+
cannot handle, for example UUID and datetime.
573610
574611
Returns
575612
-------
@@ -583,11 +620,15 @@ def add_documents_raw(
583620
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
584621
"""
585622
url = self._build_url(primary_key=primary_key, csv_delimiter=csv_delimiter)
586-
response = self.http.post(url, str_documents, content_type)
623+
response = self.http.post(url, str_documents, content_type, serializer=serializer)
587624
return TaskInfo(**response)
588625

589626
def update_documents(
590-
self, documents: Sequence[Mapping[str, Any]], primary_key: Optional[str] = None
627+
self,
628+
documents: Sequence[Mapping[str, Any]],
629+
primary_key: Optional[str] = None,
630+
*,
631+
serializer: Optional[Type[JSONEncoder]] = None,
591632
) -> TaskInfo:
592633
"""Update documents in the index.
593634
@@ -597,6 +638,9 @@ def update_documents(
597638
List of documents. Each document should be a dictionary.
598639
primary_key (optional):
599640
The primary-key used in index. Ignored if already set up
641+
serializer (optional):
642+
A custom JSONEncode to handle serializing fields that the build in json.dumps
643+
cannot handle, for example UUID and datetime.
600644
601645
Returns
602646
-------
@@ -610,7 +654,7 @@ def update_documents(
610654
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
611655
"""
612656
url = self._build_url(primary_key)
613-
response = self.http.put(url, documents)
657+
response = self.http.put(url, documents, serializer=serializer)
614658
return TaskInfo(**response)
615659

616660
def update_documents_ndjson(
@@ -644,6 +688,8 @@ def update_documents_json(
644688
self,
645689
str_documents: str,
646690
primary_key: Optional[str] = None,
691+
*,
692+
serializer: Optional[Type[JSONEncoder]] = None,
647693
) -> TaskInfo:
648694
"""Update documents as a json string in the index.
649695
@@ -653,6 +699,9 @@ def update_documents_json(
653699
String of document from a JSON file.
654700
primary_key (optional):
655701
The primary-key used in index. Ignored if already set up
702+
serializer (optional):
703+
A custom JSONEncode to handle serializing fields that the build in json.dumps
704+
cannot handle, for example UUID and datetime.
656705
657706
Returns
658707
-------
@@ -665,7 +714,9 @@ def update_documents_json(
665714
MeilisearchApiError
666715
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
667716
"""
668-
return self.update_documents_raw(str_documents, primary_key, "application/json")
717+
return self.update_documents_raw(
718+
str_documents, primary_key, "application/json", serializer=serializer
719+
)
669720

670721
def update_documents_csv(
671722
self,
@@ -703,6 +754,8 @@ def update_documents_raw(
703754
primary_key: Optional[str] = None,
704755
content_type: Optional[str] = None,
705756
csv_delimiter: Optional[str] = None,
757+
*,
758+
serializer: Optional[Type[JSONEncoder]] = None,
706759
) -> TaskInfo:
707760
"""Update documents as a string in the index.
708761
@@ -717,6 +770,9 @@ def update_documents_raw(
717770
csv_delimiter:
718771
One ASCII character used to customize the delimiter for CSV.
719772
Note: The csv delimiter can only be used with the Content-Type text/csv.
773+
serializer (optional):
774+
A custom JSONEncode to handle serializing fields that the build in json.dumps
775+
cannot handle, for example UUID and datetime.
720776
721777
Returns
722778
-------
@@ -730,14 +786,15 @@ def update_documents_raw(
730786
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
731787
"""
732788
url = self._build_url(primary_key=primary_key, csv_delimiter=csv_delimiter)
733-
response = self.http.put(url, str_documents, content_type)
789+
response = self.http.put(url, str_documents, content_type, serializer=serializer)
734790
return TaskInfo(**response)
735791

736792
def update_documents_in_batches(
737793
self,
738794
documents: Sequence[Mapping[str, Any]],
739795
batch_size: int = 1000,
740796
primary_key: Optional[str] = None,
797+
serializer: Optional[Type[JSONEncoder]] = None,
741798
) -> List[TaskInfo]:
742799
"""Update documents to the index in batches.
743800
@@ -749,6 +806,9 @@ def update_documents_in_batches(
749806
The number of documents that should be included in each batch. Default = 1000
750807
primary_key (optional):
751808
The primary-key used in index. Ignored if already set up.
809+
serializer (optional):
810+
A custom JSONEncode to handle serializing fields that the build in json.dumps
811+
cannot handle, for example UUID and datetime.
752812
753813
Returns
754814
-------
@@ -766,7 +826,7 @@ def update_documents_in_batches(
766826
tasks = []
767827

768828
for document_batch in self._batch(documents, batch_size):
769-
update_task = self.update_documents(document_batch, primary_key)
829+
update_task = self.update_documents(document_batch, primary_key, serializer=serializer)
770830
tasks.append(update_task)
771831

772832
return tasks

0 commit comments

Comments
 (0)