Skip to content

Commit 5e98900

Browse files
committed
Merge branch 'main' into add-composite-embeds-add-pool-hf
2 parents f75e2dc + 2b0bd13 commit 5e98900

File tree

8 files changed

+579
-155
lines changed

8 files changed

+579
-155
lines changed

README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,27 @@ JSON output:
143143
}
144144
```
145145

146+
#### Hybrid Search <!-- omit in toc -->
147+
148+
Hybrid search combines traditional keyword search with semantic search for more relevant results. You need to have an embedder configured in your index settings to use this feature.
149+
150+
```python
151+
# Using hybrid search with the search method
152+
index.search(
153+
'action movie',
154+
{
155+
"hybrid": {"semanticRatio": 0.5, "embedder": "default"}
156+
}
157+
)
158+
```
159+
160+
The `semanticRatio` parameter (between 0 and 1) controls the balance between keyword search and semantic search:
161+
- 0: Only keyword search
162+
- 1: Only semantic search
163+
- Values in between: A mix of both approaches
164+
165+
The `embedder` parameter specifies which configured embedder to use for the semantic search component.
166+
146167
#### Custom Search With Filters <!-- omit in toc -->
147168

148169
If you want to enable filtering, you must add your attributes to the `filterableAttributes` index setting.

meilisearch/index.py

Lines changed: 88 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,23 @@
2424
from meilisearch.config import Config
2525
from meilisearch.errors import version_error_hint_message
2626
from meilisearch.models.document import Document, DocumentsResults
27-
from meilisearch.models.index import (
28-
CompositeEmbedder,
27+
from meilisearch.models.embedders import (
2928
Embedders,
30-
Faceting,
29+
EmbedderType,
3130
HuggingFaceEmbedder,
32-
IndexStats,
33-
LocalizedAttributes,
3431
OllamaEmbedder,
3532
OpenAiEmbedder,
33+
RestEmbedder,
34+
UserProvidedEmbedder,
35+
CompositeEmbedder,
36+
)
37+
from meilisearch.models.index import (
38+
Faceting,
39+
IndexStats,
40+
LocalizedAttributes,
3641
Pagination,
3742
ProximityPrecision,
38-
RestEmbedder,
3943
TypoTolerance,
40-
UserProvidedEmbedder,
4144
)
4245
from meilisearch.models.task import Task, TaskInfo, TaskResults
4346
from meilisearch.task import TaskHandler
@@ -278,14 +281,21 @@ def get_stats(self) -> IndexStats:
278281
def search(self, query: str, opt_params: Optional[Mapping[str, Any]] = None) -> Dict[str, Any]:
279282
"""Search in the index.
280283
284+
https://www.meilisearch.com/docs/reference/api/search
285+
281286
Parameters
282287
----------
283288
query:
284289
String containing the searched word(s)
285290
opt_params (optional):
286291
Dictionary containing optional query parameters.
287-
Note: The vector parameter is only available in Meilisearch >= v1.13.0
288-
https://www.meilisearch.com/docs/reference/api/search#search-in-an-index
292+
Common parameters include:
293+
- hybrid: Dict with 'semanticRatio' and 'embedder' fields for hybrid search
294+
- vector: Array of numbers for vector search
295+
- retrieveVectors: Boolean to include vector data in search results
296+
- filter: Filter queries by an attribute's value
297+
- limit: Maximum number of documents returned
298+
- offset: Number of documents to skip
289299
290300
Returns
291301
-------
@@ -299,7 +309,9 @@ def search(self, query: str, opt_params: Optional[Mapping[str, Any]] = None) ->
299309
"""
300310
if opt_params is None:
301311
opt_params = {}
312+
302313
body = {"q": query, **opt_params}
314+
303315
return self.http.post(
304316
f"{self.config.paths.index}/{self.uid}/{self.config.paths.search}",
305317
body=body,
@@ -956,15 +968,7 @@ def get_settings(self) -> Dict[str, Any]:
956968
)
957969

958970
if settings.get("embedders"):
959-
embedders: dict[
960-
str,
961-
OpenAiEmbedder
962-
| HuggingFaceEmbedder
963-
| OllamaEmbedder
964-
| RestEmbedder
965-
| CompositeEmbedder
966-
| UserProvidedEmbedder,
967-
] = {}
971+
embedders: dict[str, EmbedderType] = {}
968972
for k, v in settings["embedders"].items():
969973
if v.get("source") == "openAi":
970974
embedders[k] = OpenAiEmbedder(**v)
@@ -992,6 +996,26 @@ def update_settings(self, body: MutableMapping[str, Any]) -> TaskInfo:
992996
----------
993997
body:
994998
Dictionary containing the settings of the index.
999+
Supported settings include:
1000+
- 'rankingRules': List of ranking rules
1001+
- 'distinctAttribute': Attribute for deduplication
1002+
- 'searchableAttributes': Attributes that can be searched
1003+
- 'displayedAttributes': Attributes to display in search results
1004+
- 'stopWords': Words ignored in search queries
1005+
- 'synonyms': Dictionary of synonyms
1006+
- 'filterableAttributes': Attributes that can be used for filtering
1007+
- 'sortableAttributes': Attributes that can be used for sorting
1008+
- 'typoTolerance': Settings for typo tolerance
1009+
- 'pagination': Settings for pagination
1010+
- 'faceting': Settings for faceting
1011+
- 'dictionary': List of custom dictionary words
1012+
- 'separatorTokens': List of separator tokens
1013+
- 'nonSeparatorTokens': List of non-separator tokens
1014+
- 'embedders': Dictionary of embedder configurations for AI-powered search
1015+
- 'searchCutoffMs': Maximum search time in milliseconds
1016+
- 'proximityPrecision': Precision for proximity ranking
1017+
- 'localizedAttributes': Settings for localized attributes
1018+
9951019
More information:
9961020
https://www.meilisearch.com/docs/reference/api/settings#update-settings
9971021
@@ -1004,7 +1028,8 @@ def update_settings(self, body: MutableMapping[str, Any]) -> TaskInfo:
10041028
Raises
10051029
------
10061030
MeilisearchApiError
1007-
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
1031+
An error containing details about why Meilisearch can't process your request.
1032+
Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
10081033
"""
10091034
if body.get("embedders"):
10101035
for _, v in body["embedders"].items():
@@ -1883,10 +1908,13 @@ def reset_non_separator_tokens(self) -> TaskInfo:
18831908
def get_embedders(self) -> Embedders | None:
18841909
"""Get embedders of the index.
18851910
1911+
Retrieves the current embedder configuration from Meilisearch.
1912+
18861913
Returns
18871914
-------
1888-
settings:
1889-
The embedders settings of the index.
1915+
Embedders:
1916+
The embedders settings of the index, or None if no embedders are configured.
1917+
Contains a dictionary of embedder configurations, where keys are embedder names.
18901918
18911919
Raises
18921920
------
@@ -1898,38 +1926,37 @@ def get_embedders(self) -> Embedders | None:
18981926
if not response:
18991927
return None
19001928

1901-
embedders: dict[
1902-
str,
1903-
OpenAiEmbedder
1904-
| HuggingFaceEmbedder
1905-
| OllamaEmbedder
1906-
| RestEmbedder
1907-
| CompositeEmbedder
1908-
| UserProvidedEmbedder,
1909-
] = {}
1929+
embedders: dict[str, EmbedderType] = {}
19101930
for k, v in response.items():
1911-
if v.get("source") == "openAi":
1931+
source = v.get("source")
1932+
if source == "openAi":
19121933
embedders[k] = OpenAiEmbedder(**v)
1913-
elif v.get("source") == "ollama":
1914-
embedders[k] = OllamaEmbedder(**v)
1915-
elif v.get("source") == "huggingFace":
1934+
elif source == "huggingFace":
19161935
embedders[k] = HuggingFaceEmbedder(**v)
1917-
elif v.get("source") == "rest":
1936+
elif source == "ollama":
1937+
embedders[k] = OllamaEmbedder(**v)
1938+
elif source == "rest":
19181939
embedders[k] = RestEmbedder(**v)
1919-
elif v.get("source") == "composite":
1940+
elif source == "composite":
19201941
embedders[k] = CompositeEmbedder(**v)
1942+
elif source == "userProvided":
1943+
embedders[k] = UserProvidedEmbedder(**v)
19211944
else:
1945+
# Default to UserProvidedEmbedder for unknown sources
19221946
embedders[k] = UserProvidedEmbedder(**v)
19231947

19241948
return Embedders(embedders=embedders)
19251949

19261950
def update_embedders(self, body: Union[MutableMapping[str, Any], None]) -> TaskInfo:
19271951
"""Update embedders of the index.
19281952
1953+
Updates the embedder configuration for the index. The embedder configuration
1954+
determines how Meilisearch generates vector embeddings for documents.
1955+
19291956
Parameters
19301957
----------
19311958
body: dict
1932-
Dictionary containing the embedders.
1959+
Dictionary containing the embedders configuration.
19331960
19341961
Returns
19351962
-------
@@ -1940,13 +1967,30 @@ def update_embedders(self, body: Union[MutableMapping[str, Any], None]) -> TaskI
19401967
Raises
19411968
------
19421969
MeilisearchApiError
1943-
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
1970+
An error containing details about why Meilisearch can't process your request.
1971+
Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
19441972
"""
1973+
if body is not None and body.get("embedders"):
1974+
embedders: dict[str, EmbedderType] = {}
1975+
for k, v in body["embedders"].items():
1976+
source = v.get("source")
1977+
if source == "openAi":
1978+
embedders[k] = OpenAiEmbedder(**v)
1979+
elif source == "huggingFace":
1980+
embedders[k] = HuggingFaceEmbedder(**v)
1981+
elif source == "ollama":
1982+
embedders[k] = OllamaEmbedder(**v)
1983+
elif source == "rest":
1984+
embedders[k] = RestEmbedder(**v)
1985+
elif source == "composite":
1986+
embedders[k] = CompositeEmbedder(**v)
1987+
elif source == "userProvided":
1988+
embedders[k] = UserProvidedEmbedder(**v)
1989+
else:
1990+
# Default to UserProvidedEmbedder for unknown sources
1991+
embedders[k] = UserProvidedEmbedder(**v)
19451992

1946-
if body:
1947-
for _, v in body.items():
1948-
if "documentTemplateMaxBytes" in v and v["documentTemplateMaxBytes"] is None:
1949-
del v["documentTemplateMaxBytes"]
1993+
body = {"embedders": {k: v.model_dump(by_alias=True) for k, v in embedders.items()}}
19501994

19511995
task = self.http.patch(self.__settings_url_for(self.config.paths.embedders), body)
19521996

@@ -1955,6 +1999,8 @@ def update_embedders(self, body: Union[MutableMapping[str, Any], None]) -> TaskI
19551999
def reset_embedders(self) -> TaskInfo:
19562000
"""Reset embedders of the index to default values.
19572001
2002+
Removes all embedder configurations from the index.
2003+
19582004
Returns
19592005
-------
19602006
task_info:

0 commit comments

Comments
 (0)