Skip to content

New format handling CSV NDJSON #329

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 11, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 19 additions & 7 deletions meilisearch/_httprequests.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,26 @@ def send_request(
http_method: Callable,
path: str,
body: Optional[Union[Dict[str, Any], List[Dict[str, Any]], List[str]]] = None,
content_type: Optional[str] = None,
) -> Any:
if content_type:
self.headers['Content-Type'] = content_type
try:
request_path = self.config.url + '/' + path
request = http_method(
request_path,
timeout=self.config.timeout,
headers=self.headers,
data=json.dumps(body) if body else "null"
)
if not content_type:
request = http_method(
request_path,
timeout=self.config.timeout,
headers=self.headers,
data=json.dumps(body) if body else "null"
)
else:
request = http_method(
request_path,
timeout=self.config.timeout,
headers=self.headers,
data=body
)
return self.__validate(request)

except requests.exceptions.Timeout as err:
Expand All @@ -46,8 +57,9 @@ def post(
self,
path: str,
body: Optional[Union[Dict[str, Any], List[Dict[str, Any]], List[str]]] = None,
content_type: Optional[str] = None,
) -> Any:
return self.send_request(requests.post, path, body)
return self.send_request(requests.post, path, body, content_type)

def put(
self,
Expand Down
122 changes: 122 additions & 0 deletions meilisearch/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,128 @@ def add_documents_in_batches(

return update_ids

def add_documents_json(
self,
str_documents: str,
primary_key: Optional[str] = None,
) -> Dict[str, int]:
"""Add string documents from JSON file to the index.

Parameters
----------
str_documents:
String of document from a JSON file.
primary_key (optional):
The primary-key used in index. Ignored if already set up.

Returns
-------
update:
Dictionary containing an update id to track the action:
https://docs.meilisearch.com/reference/api/updates.html#get-an-update-status

Raises
------
MeiliSearchApiError
An error containing details about why MeiliSearch can't process your request. MeiliSearch error codes are described here: https://docs.meilisearch.com/errors/#meilisearch-errors
"""
return self.add_documents_raw(str_documents, primary_key, 'json')

def add_documents_csv(
self,
str_documents: str,
primary_key: Optional[str] = None,
) -> Dict[str, int]:
"""Add string documents from a CSV file to the index.

Parameters
----------
str_documents:
String of document from a CSV file.
primary_key (optional):
The primary-key used in index. Ignored if already set up.

Returns
-------
update:
Dictionary containing an update id to track the action:
https://docs.meilisearch.com/reference/api/updates.html#get-an-update-status

Raises
------
MeiliSearchApiError
An error containing details about why MeiliSearch can't process your request. MeiliSearch error codes are described here: https://docs.meilisearch.com/errors/#meilisearch-errors
"""
return self.add_documents_raw(str_documents, primary_key, 'csv')

def add_documents_ndjson(
self,
str_documents: str,
primary_key: Optional[str] = None,
) -> Dict[str, int]:
"""Add string documents from a NDJSON file to the index.

Parameters
----------
str_documents:
String of document from a NDJSON file.
primary_key (optional):
The primary-key used in index. Ignored if already set up.

Returns
-------
update:
Dictionary containing an update id to track the action:
https://docs.meilisearch.com/reference/api/updates.html#get-an-update-status

Raises
------
MeiliSearchApiError
An error containing details about why MeiliSearch can't process your request. MeiliSearch error codes are described here: https://docs.meilisearch.com/errors/#meilisearch-errors
"""
return self.add_documents_raw(str_documents, primary_key, 'jsonl')

def add_documents_raw(
self,
str_documents: str,
primary_key: Optional[str] = None,
doc_type: Optional[str] = None,
) -> Dict[str, int]:
"""Add string documents to the index.

Parameters
----------
str_documents:
String of document.
primary_key (optional):
The primary-key used in index. Ignored if already set up.
type:
The type of document. Type available: 'csv', 'json', 'jsonl'

Returns
-------
update:
Dictionary containing an update id to track the action:
https://docs.meilisearch.com/reference/api/updates.html#get-an-update-status

Raises
------
MeiliSearchApiError
An error containing details about why MeiliSearch can't process your request. MeiliSearch error codes are described here: https://docs.meilisearch.com/errors/#meilisearch-errors
"""
if primary_key is None:
url = f'{self.config.paths.index}/{self.uid}/{self.config.paths.document}'
else:
primary_key = urllib.parse.urlencode({'primaryKey': primary_key})
url = f'{self.config.paths.index}/{self.uid}/{self.config.paths.document}?{primary_key}'
if doc_type == "json":
content_type = 'application/json'
if doc_type == "jsonl":
content_type = 'application/x-ndjson'
if doc_type == "csv":
content_type = 'text/csv'
return self.http.post(url, str_documents, content_type)

def update_documents(
self,
documents: List[Dict[str, Any]],
Expand Down