Skip to content

Commit 9903f95

Browse files
committed
docs
1 parent 5e98900 commit 9903f95

File tree

2 files changed

+65
-11
lines changed

2 files changed

+65
-11
lines changed

meilisearch/models/embedders.py

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,17 @@ class Distribution(CamelBase):
2121
sigma: float
2222

2323

24-
class PoolingOpt(str, Enum):
25-
"""Pooling options for HuggingFaceEmbedder.
24+
class PoolingType(str, Enum):
25+
"""Pooling strategies for HuggingFaceEmbedder.
2626
27-
Parameters
27+
Attributes
2828
----------
29-
mean: float
30-
Mean value between 0 and 1
31-
sigma: float
32-
Sigma value between 0 and 1
29+
USE_MODEL : str
30+
Use the model's default pooling strategy.
31+
FORCE_MEAN : str
32+
Force mean pooling over the token embeddings.
33+
FORCE_CLS : str
34+
Use the [CLS] token embedding as the sentence representation.
3335
"""
3436

3537
USE_MODEL = "useModel"
@@ -96,6 +98,8 @@ class HuggingFaceEmbedder(CamelBase):
9698
Describes the natural distribution of search results
9799
binary_quantized: Optional[bool]
98100
Once set to true, irreversibly converts all vector dimensions to 1-bit values
101+
pooling: Optional[PoolingType]
102+
Configures how individual tokens are merged into a single embedding
99103
"""
100104

101105
source: str = "huggingFace"
@@ -107,7 +111,7 @@ class HuggingFaceEmbedder(CamelBase):
107111
document_template_max_bytes: Optional[int] = None # Default to 400
108112
distribution: Optional[Distribution] = None
109113
binary_quantized: Optional[bool] = None
110-
pooling: Optional[PoolingOpt] = None
114+
pooling: Optional[PoolingType] = None
111115

112116

113117
class OllamaEmbedder(CamelBase):
@@ -210,15 +214,27 @@ class UserProvidedEmbedder(CamelBase):
210214

211215

212216
class CompositeEmbedder(CamelBase):
217+
"""Composite embedder configuration.
218+
219+
Parameters
220+
----------
221+
source: str
222+
The embedder source, must be "userProvided"
223+
indexing_embedder: Union[OpenAiEmbedder, HuggingFaceEmbedder, OllamaEmbedder, RestEmbedder, UserProvidedEmbedder]
224+
The embedder used for indexing queries remotely
225+
search_embedder: Union[OpenAiEmbedder, HuggingFaceEmbedder, OllamaEmbedder, RestEmbedder, UserProvidedEmbedder]
226+
The embedder used for processing queries locally
227+
"""
228+
213229
source: str = "composite"
214-
search_embedder: Union[
230+
indexing_embedder: Union[
215231
OpenAiEmbedder,
216232
HuggingFaceEmbedder,
217233
OllamaEmbedder,
218234
RestEmbedder,
219235
UserProvidedEmbedder,
220236
]
221-
indexing_embedder: Union[
237+
search_embedder: Union[
222238
OpenAiEmbedder,
223239
HuggingFaceEmbedder,
224240
OllamaEmbedder,

tests/settings/test_settings_embedders.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
# pylint: disable=redefined-outer-name
22

3-
from meilisearch.models.embedders import OpenAiEmbedder, UserProvidedEmbedder
3+
import pytest
4+
from meilisearch.models.embedders import (
5+
HuggingFaceEmbedder,
6+
OpenAiEmbedder,
7+
UserProvidedEmbedder,
8+
CompositeEmbedder,
9+
)
410

511

612
def test_get_default_embedders(empty_index):
@@ -183,3 +189,35 @@ def test_user_provided_embedder_format(empty_index):
183189
assert embedders.embedders["user_provided"].distribution.mean == 0.5
184190
assert embedders.embedders["user_provided"].distribution.sigma == 0.1
185191
assert embedders.embedders["user_provided"].binary_quantized is False
192+
193+
194+
@pytest.mark.usefixtures("enable_composite_embedders")
195+
def test_composite_embedder_format(empty_index):
196+
"""Tests that CompositeEmbedder embedder has the required fields and proper format."""
197+
index = empty_index()
198+
199+
hf_default = HuggingFaceEmbedder().model_dump(by_alias=True, exclude_none=True)
200+
201+
# create composite embedder
202+
composite_embedder = {
203+
"default": {
204+
"source": "composite",
205+
"searchEmbedder": hf_default,
206+
"indexingEmbedder": hf_default,
207+
}
208+
}
209+
210+
response = index.update_embedders(composite_embedder)
211+
index.wait_for_task(response.task_uid)
212+
embedders = index.get_embedders()
213+
print(embedders)
214+
assert embedders.embedders["composite"].source == "composite"
215+
216+
assert isinstance(embedders.embedders["composite"], CompositeEmbedder)
217+
assert isinstance(embedders.embedders["composite"].search_embedder, HuggingFaceEmbedder)
218+
assert isinstance(embedders.embedders["composite"].indexing_embedder, HuggingFaceEmbedder)
219+
220+
assert not hasattr(embedders.embedders["composite"].search_embedder, "document_template")
221+
assert not hasattr(embedders.embedders["composite"].search_embedder, "document_template_max_bytes")
222+
assert hasattr(embedders.embedders["composite"].indexing_embedder, "document_template")
223+

0 commit comments

Comments
 (0)