Add csv delimiter

alallema · alallema · commit c3b668c801b2 · 2023-03-29T12:43:36.000+02:00
diff --git a/datasets/songs_custom_delimiter.csv b/datasets/songs_custom_delimiter.csv
@@ -0,0 +1,21 @@
+id;title;album;artist;genre;country;released;duration;released-timestamp;duration-float
+702481615;Armatage Shanks;Dookie: The Ultimate Critical Review;Green Day;Rock;Europe;2005;;1104537600;
+888221515;Old Folks;Six Classic Albums Plus Bonus Tracks;Harold Land;Jazz;Europe;2013;6:36;1356998400;6.36
+1382413601;คำขอร้อง;สำเนียงคนจันทร์ / เอาเถอะถ้าเห็นเขาดีกว่า;อิทธิพล บำรุงกุล;"Folk; World; & Country";Thailand;;;;
+190889300;Track 1;Summer Breeze;Dreas;Funk / Soul;US;2008;18:56;1199145600;18.56
+813645611;Slave (Alternative Version);Honky Château;Elton John;Rock;Europe;;2:53;;2.5300000000000002
+394018506;Sex & Geld;Trackz Für Den Index;Mafia Clikk;Hip Hop;Germany;2006;5:02;1136073600;5.02
+1522481803;Pisciaunella;Don Pepp U Pacce;Giovanni Russo (2);"Folk; World; & Country";Italy;1980;;315532800;
+862296713;不知;Kiss 2001 Hong Kong Live Concert;Various;Electronic;Hong Kong;2002-04-13;;1018656000;
+467946423;Rot;Be Quick Or Be Dead Vol. 3;Various;Electronic;Serbia;2013-06-20;1:00;1371686400;1
+1323854803;"Simulation Project 1; ツキハナ「Moonflower」";Unlimited Dream Company;Amun Dragoon;Electronic;US;2018-04-10;2:44;1523318400;2.44
+235115704;Doctor Vine;The Big F;The Big F;Rock;US;1989;5:29;599616000;5.29
+249025232;"Ringel; Ringel; Reihe";Kinderlieder ABC - Der Bielefelder Kinderchor Singt 42 Lieder Von A-Z;Der Bielefelder Kinderchor;Children's;Germany;1971;;31536000;
+710094000;Happy Safari = Safari Feliz;Safari Swings Again = El Safari Sigue En Su Swing;Bert Kaempfert & His Orchestra;Jazz;Argentina;1977;2:45;220924800;2.45
+538632700;Take Me Up;Spring;Various;Electronic;US;2000;3:06;946684800;3.06
+1556505508;Doin To Me ( Radio Version );Say My Name;Netta Dogg;Hip Hop;US;2005;;1104537600;
+1067031900;Concerto For Balloon & Orchestra / Concerto For Synthesizer & Orchestra;Concerto For Balloon & Orchestra And Three Overtures;Stanyan String & Wind Ensemble;Classical;US;1977;;220924800;
+137251914;"I Love The Nightlife (Disco 'Round) (Real Rapino 7"" Mix)";The Adventures Of Priscilla: Queen Of The Desert - Original Motion Picture Soundtrack;Various;Stage & Screen;US;1994;3:31;757382400;3.31
+554983904;Walking On The Moon;Certifiable (Live In Buenos Aires);The Police;Rock;Malaysia;2008-11-00;;1225497600;
+557616002;Two Soldiers;Jerry Garcia / David Grisman;David Grisman;"Folk; World; & Country";US;2014-04-00;4:24;1396310400;4.24
+878936809;When You Gonna Learn;Live At Firenze 93;Jamiroquai;Funk / Soul;France;2004;13:01;1072915200;13.01
diff --git a/meilisearch/index.py b/meilisearch/index.py
@@ -434,6 +434,7 @@ def add_documents_csv(
         self,
         str_documents: str,
         primary_key: Optional[str] = None,
+        csv_delimiter: Optional[str] = None,
     ) -> TaskInfo:
         """Add string documents from a CSV file to the index.
 
@@ -443,6 +444,8 @@ def add_documents_csv(
             String of document from a CSV file.
         primary_key (optional):
             The primary-key used in index. Ignored if already set up.
+        csv_delimiter:
+            One ASCII character used to customize the delimiter for CSV. Comma used by default.
 
         Returns
         -------
@@ -455,7 +458,7 @@ def add_documents_csv(
         MeilisearchApiError
             An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://docs.meilisearch.com/errors/#meilisearch-errors
         """
-        return self.add_documents_raw(str_documents, primary_key, "text/csv")
+        return self.add_documents_raw(str_documents, primary_key, "text/csv", csv_delimiter)
 
     def add_documents_ndjson(
         self,
@@ -489,6 +492,7 @@ def add_documents_raw(
         str_documents: str,
         primary_key: Optional[str] = None,
         content_type: Optional[str] = None,
+        csv_delimiter: Optional[str] = None,
     ) -> TaskInfo:
         """Add string documents to the index.
 
@@ -499,7 +503,10 @@ def add_documents_raw(
         primary_key (optional):
             The primary-key used in index. Ignored if already set up.
         type:
-            The type of document. Type available: 'csv', 'json', 'jsonl'
+            The type of document. Type available: 'csv', 'json', 'jsonl'.
+        csv_delimiter:
+            One ASCII character used to customize the delimiter for CSV.
+            Note: The csv delimiter can only be used with the Content-Type text/csv.
 
         Returns
         -------
@@ -512,7 +519,7 @@ def add_documents_raw(
         MeilisearchApiError
             An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://docs.meilisearch.com/errors/#meilisearch-errors
         """
-        url = self._build_url(primary_key)
+        url = self._build_url(primary_key=primary_key, csv_delimiter=csv_delimiter)
         response = self.http.post(url, str_documents, content_type)
         return TaskInfo(**response)
 
@@ -601,6 +608,7 @@ def update_documents_csv(
         self,
         str_documents: str,
         primary_key: Optional[str] = None,
+        csv_delimiter: Optional[str] = None,
     ) -> TaskInfo:
         """Update documents as a csv string in the index.
 
@@ -609,7 +617,9 @@ def update_documents_csv(
         str_documents:
             String of document from a CSV file.
         primary_key (optional):
-            The primary-key used in index. Ignored if already set up
+            The primary-key used in index. Ignored if already set up.
+        csv_delimiter:
+            One ASCII character used to customize the delimiter for CSV. Comma used by default.
 
         Returns
         -------
@@ -622,13 +632,14 @@ def update_documents_csv(
         MeilisearchApiError
             An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://docs.meilisearch.com/errors/#meilisearch-errors
         """
-        return self.update_documents_raw(str_documents, primary_key, "text/csv")
+        return self.update_documents_raw(str_documents, primary_key, "text/csv", csv_delimiter)
 
     def update_documents_raw(
         self,
         str_documents: str,
         primary_key: Optional[str] = None,
         content_type: Optional[str] = None,
+        csv_delimiter: Optional[str] = None,
     ) -> TaskInfo:
         """Update documents as a string in the index.
 
@@ -640,6 +651,9 @@ def update_documents_raw(
             The primary-key used in index. Ignored if already set up.
         type:
             The type of document. Type available: 'csv', 'json', 'jsonl'
+        csv_delimiter:
+            One ASCII character used to customize the delimiter for CSV.
+            Note: The csv delimiter can only be used with the Content-Type text/csv.
 
         Returns
         -------
@@ -652,7 +666,7 @@ def update_documents_raw(
         MeilisearchApiError
             An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://docs.meilisearch.com/errors/#meilisearch-errors
         """
-        url = self._build_url(primary_key)
+        url = self._build_url(primary_key=primary_key, csv_delimiter=csv_delimiter)
         response = self.http.put(url, str_documents, content_type)
         return TaskInfo(**response)
 
@@ -1530,8 +1544,13 @@ def __settings_url_for(self, sub_route: str) -> str:
     def _build_url(
         self,
         primary_key: Optional[str] = None,
+        csv_delimiter: Optional[str] = None,
     ) -> str:
-        if primary_key is None:
+        parameters = {}
+        if primary_key:
+            parameters["primaryKey"] = primary_key
+        if csv_delimiter:
+            parameters["csvDelimiter"] = csv_delimiter
+        if primary_key is None and csv_delimiter is None:
             return f"{self.config.paths.index}/{self.uid}/{self.config.paths.document}"
-        primary_key = parse.urlencode({"primaryKey": primary_key})
-        return f"{self.config.paths.index}/{self.uid}/{self.config.paths.document}?{primary_key}"
+        return f"{self.config.paths.index}/{self.uid}/{self.config.paths.document}?{parse.urlencode(parameters)}"
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -70,16 +70,25 @@ def small_movies_json_file():
 @fixture(scope="session")
 def songs_csv():
     """
-    Runs once per session. Provides the content of songs.csv from read..
+    Runs once per session. Provides the content of songs.csv from read.
     """
     with open("./datasets/songs.csv", encoding="utf-8") as song_csv_file:
         return song_csv_file.read().encode("utf-8")
 
 
+@fixture(scope="session")
+def songs_csv_custom_separator():
+    """
+    Runs once per session. Provides the content of songs_custom_delimiter.csv from read.
+    """
+    with open("./datasets/songs_custom_delimiter.csv", encoding="utf-8") as song_csv_file:
+        return song_csv_file.read().encode("utf-8")
+
+
 @fixture(scope="session")
 def songs_ndjson():
     """
-    Runs once per session. Provides the content of songs.ndjson from read..
+    Runs once per session. Provides the content of songs.ndjson from read.
     """
     with open("./datasets/songs.ndjson", encoding="utf-8") as song_ndjson_file:
         return song_ndjson_file.read().encode("utf-8")
diff --git a/tests/index/test_index_document_meilisearch.py b/tests/index/test_index_document_meilisearch.py
@@ -197,6 +197,20 @@ def test_add_documents_csv(empty_index, songs_csv):
     assert index.get_primary_key() == "id"
 
 
+def test_add_documents_csv_with_delimiter(empty_index, songs_csv_custom_separator):
+    """Tests adding new documents to a clean index."""
+    index = empty_index("csv-delimiter")
+    response = index.add_documents_csv(songs_csv_custom_separator, csv_delimiter=";")
+    assert isinstance(response, TaskInfo)
+    assert response.task_uid is not None
+    task = index.wait_for_task(response.task_uid)
+    assert task.status == "succeeded"
+    assert task.details["receivedDocuments"] == 20
+    documents = index.get_documents().results
+    assert documents[1].country == "Europe"
+    assert documents[4].artist == "Elton John"
+
+
 def test_update_documents_csv(index_with_documents, songs_csv):
     """Tests updating a single document with csv string."""
     index = index_with_documents()
@@ -208,6 +222,20 @@ def test_update_documents_csv(index_with_documents, songs_csv):
     assert index.get_primary_key() == "id"
 
 
+def test_update_documents_csv_with_delimiter(index_with_documents, songs_csv_custom_separator):
+    """Tests adding new documents to a clean index."""
+    index = index_with_documents()
+    response = index.update_documents_csv(songs_csv_custom_separator, csv_delimiter=";")
+    assert isinstance(response, TaskInfo)
+    assert response.task_uid is not None
+    task = index.wait_for_task(response.task_uid)
+    assert task.status == "succeeded"
+    assert task.details["receivedDocuments"] == 20
+    document = index.get_document("813645611")
+    assert document.country == "Europe"
+    assert document.artist == "Elton John"
+
+
 def test_add_documents_json(empty_index, small_movies_json_file):
     """Tests adding new documents to a clean index."""
     index = empty_index()
diff --git a/tests/index/test_index_search_meilisearch.py b/tests/index/test_index_search_meilisearch.py
@@ -2,7 +2,7 @@
 
 
 def test_basic_search(index_with_documents):
-    """Tests search with an simple query."""
+    """Tests search with a simple query."""
     response = index_with_documents().search("How to Train Your Dragon")
     assert isinstance(response, dict)
     assert response["hits"][0]["id"] == "166428"
@@ -356,7 +356,7 @@ def test_phrase_search(index_with_documents):
 
 
 def test_basic_search_on_nested_documents(index_with_documents, nested_movies):
-    """Tests search with an simple query on nested fields."""
+    """Tests search with a simple query on nested fields."""
     response = index_with_documents("nested_fields_index", nested_movies).search("An awesome")
     assert isinstance(response, dict)
     assert response["hits"][0]["id"] == 5