elastic
diff --git a/‎elasticsearch/helpers/vectorstore/__init__.py
Lines changed: 16 additions & 16 deletions b/‎elasticsearch/helpers/vectorstore/__init__.py
Lines changed: 16 additions & 16 deletions
diff --git a/‎elasticsearch/helpers/vectorstore/_async/_utils.py
Lines changed: 8 additions & 20 deletions b/‎elasticsearch/helpers/vectorstore/_async/_utils.py
Lines changed: 8 additions & 20 deletions
diff --git a/‎elasticsearch/helpers/vectorstore/_async/embedding_service.py
Lines changed: 17 additions & 25 deletions b/‎elasticsearch/helpers/vectorstore/_async/embedding_service.py
Lines changed: 17 additions & 25 deletions
diff --git a/‎elasticsearch/helpers/vectorstore/_async/strategies.py
Lines changed: 26 additions & 24 deletions b/‎elasticsearch/helpers/vectorstore/_async/strategies.py
Lines changed: 26 additions & 24 deletions
@@ -20,43 +20,43 @@
     AsyncEmbeddingService,
 )
 from elasticsearch.helpers.vectorstore._async.strategies import (
-    AsyncBM25,
-    AsyncDenseVector,
-    AsyncDenseVectorScriptScore,
+    AsyncBM25Strategy,
+    AsyncDenseVectorScriptScoreStrategy,
+    AsyncDenseVectorStrategy,
     AsyncRetrievalStrategy,
-    AsyncSparseVector,
+    AsyncSparseVectorStrategy,
 )
 from elasticsearch.helpers.vectorstore._async.vectorstore import AsyncVectorStore
 from elasticsearch.helpers.vectorstore._sync.embedding_service import (
     ElasticsearchEmbeddings,
     EmbeddingService,
 )
 from elasticsearch.helpers.vectorstore._sync.strategies import (
-    BM25,
-    DenseVector,
-    DenseVectorScriptScore,
+    BM25Strategy,
+    DenseVectorScriptScoreStrategy,
+    DenseVectorStrategy,
     RetrievalStrategy,
-    SparseVector,
+    SparseVectorStrategy,
 )
 from elasticsearch.helpers.vectorstore._sync.vectorstore import VectorStore
 from elasticsearch.helpers.vectorstore._utils import DistanceMetric
 
 __all__ = [
-    "BM25",
-    "DenseVector",
-    "DenseVectorScriptScore",
+    "BM25Strategy",
+    "DenseVectorStrategy",
+    "DenseVectorScriptScoreStrategy",
     "ElasticsearchEmbeddings",
     "EmbeddingService",
     "RetrievalStrategy",
-    "SparseVector",
+    "SparseVectorStrategy",
     "VectorStore",
-    "AsyncBM25",
-    "AsyncDenseVector",
-    "AsyncDenseVectorScriptScore",
+    "AsyncBM25Strategy",
+    "AsyncDenseVectorStrategy",
+    "AsyncDenseVectorScriptScoreStrategy",
     "AsyncElasticsearchEmbeddings",
     "AsyncEmbeddingService",
     "AsyncRetrievalStrategy",
-    "AsyncSparseVector",
+    "AsyncSparseVectorStrategy",
     "AsyncVectorStore",
     "DistanceMetric",
 ]
@@ -15,33 +15,21 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
-from elasticsearch import (
-    AsyncElasticsearch,
-    BadRequestError,
-    ConflictError,
-    NotFoundError,
-)
+from elasticsearch import AsyncElasticsearch, BadRequestError, NotFoundError
 
 
 async def model_must_be_deployed(client: AsyncElasticsearch, model_id: str) -> None:
+    """
+    :raises [NotFoundError]: if the model is neither downloaded nor deployed.
+    :raises [ConflictError]: if the model is downloaded but not yet deployed.
+    """
+    doc = {"text_field": f"test if the model '{model_id}' is deployed"}
     try:
-        dummy = {"x": "y"}
-        await client.ml.infer_trained_model(model_id=model_id, docs=[dummy])
-    except NotFoundError as err:
-        raise err
-    except ConflictError as err:
-        raise NotFoundError(
-            f"model '{model_id}' not found, please deploy it first",
-            meta=err.meta,
-            body=err.body,
-        ) from err
+        await client.ml.infer_trained_model(model_id=model_id, docs=[doc])
     except BadRequestError:
-        # This error is expected because we do not know the expected document
-        # shape and just use a dummy doc above.
+        # The model is deployed but expects a different input field name.
         pass
 
-    return None
-
 
 async def model_is_deployed(es_client: AsyncElasticsearch, model_id: str) -> bool:
     try:
 
@@ -16,32 +16,29 @@
 #  under the License.
 
 from abc import ABC, abstractmethod
-from typing import List, Optional
+from typing import List
 
 from elasticsearch import AsyncElasticsearch
+from elasticsearch._version import __versionstr__ as lib_version
 
 
 class AsyncEmbeddingService(ABC):
     @abstractmethod
     async def embed_documents(self, texts: List[str]) -> List[List[float]]:
         """Generate embeddings for a list of documents.
 
-        Args:
-            texts: A list of document strings to generate embeddings for.
+        :param texts: A list of document strings to generate embeddings for.
 
-        Returns:
-            A list of embeddings, one for each document in the input.
+        :return: A list of embeddings, one for each document in the input.
         """
 
     @abstractmethod
     async def embed_query(self, query: str) -> List[float]:
         """Generate an embedding for a single query text.
 
-        Args:
-            text: The query text to generate an embedding for.
+        :param text: The query text to generate an embedding for.
 
-        Returns:
-            The embedding for the input query text.
+        :return: The embedding for the input query text.
         """
 
 
@@ -56,31 +53,26 @@ class AsyncElasticsearchEmbeddings(AsyncEmbeddingService):
     def __init__(
         self,
         es_client: AsyncElasticsearch,
-        user_agent: str,
         model_id: str,
         input_field: str = "text_field",
-        num_dimensions: Optional[int] = None,
+        user_agent: str = f"elasticsearch-py-es/{lib_version}",
     ):
         """
-        Args:
-            agent_header: user agent header specific to the 3rd party integration.
-                Used for usage tracking in Elastic Cloud.
-            model_id: The model_id of the model deployed in the Elasticsearch cluster.
-            input_field: The name of the key for the input text field in the
-                document. Defaults to 'text_field'.
-            num_dimensions: The number of embedding dimensions. If None, then dimensions
-                will be infer from an example inference call.
-            es_client: Elasticsearch client connection. Alternatively specify the
-                Elasticsearch connection with the other es_* parameters.
+        :param agent_header: user agent header specific to the 3rd party integration.
+            Used for usage tracking in Elastic Cloud.
+        :param model_id: The model_id of the model deployed in the Elasticsearch cluster.
+        :param input_field: The name of the key for the input text field in the
+            document. Defaults to 'text_field'.
+        :param es_client: Elasticsearch client connection. Alternatively specify the
+            Elasticsearch connection with the other es_* parameters.
         """
         # Add integration-specific usage header for tracking usage in Elastic Cloud.
-        # client.options preserces existing (non-user-agent) headers.
+        # client.options preserves existing (non-user-agent) headers.
         es_client = es_client.options(headers={"User-Agent": user_agent})
 
-        self.client = es_client.ml
+        self.es_client = es_client
         self.model_id = model_id
         self.input_field = input_field
-        self._num_dimensions = num_dimensions
 
     async def embed_documents(self, texts: List[str]) -> List[List[float]]:
         result = await self._embedding_func(texts)
@@ -91,7 +83,7 @@ async def embed_query(self, text: str) -> List[float]:
         return result[0]
 
     async def _embedding_func(self, texts: List[str]) -> List[List[float]]:
-        response = await self.client.infer_trained_model(
+        response = await self.es_client.ml.infer_trained_model(
             model_id=self.model_id, docs=[{self.input_field: text} for text in texts]
         )
         return [doc["predicted_value"] for doc in response["inference_results"]]
@@ -39,15 +39,13 @@ def es_query(
         Returns the Elasticsearch query body for the given parameters.
         The store will execute the query.
 
-        Args:
-            query: The text query. Can be None if query_vector is given.
-            k: The total number of results to retrieve.
-            num_candidates: The number of results to fetch initially in knn search.
-            filter: List of filter clauses to apply to the query.
-            query_vector: The query vector. Can be None if a query string is given.
-
-        Returns:
-            Dict: The Elasticsearch query body.
+        :param query: The text query. Can be None if query_vector is given.
+        :param k: The total number of results to retrieve.
+        :param num_candidates: The number of results to fetch initially in knn search.
+        :param filter: List of filter clauses to apply to the query.
+        :param query_vector: The query vector. Can be None if a query string is given.
+
+        :return: The Elasticsearch query body.
         """
 
     @abstractmethod
@@ -61,11 +59,10 @@ def es_mappings_settings(
         Create the required index and do necessary preliminary work, like
         creating inference pipelines or checking if a required model was deployed.
 
-        Args:
-            client: Elasticsearch client connection.
-            index_name: The name of the Elasticsearch index to create.
-            metadata_mapping: Flat dictionary with field and field type pairs that
-                describe the schema of the metadata.
+        :param client: Elasticsearch client connection.
+        :param index_name: The name of the Elasticsearch index to create.
+        :param metadata_mapping: Flat dictionary with field and field type pairs that
+            describe the schema of the metadata.
         """
 
     async def before_index_creation(
@@ -74,22 +71,27 @@ async def before_index_creation(
         """
         Executes before the index is created. Used for setting up
         any required Elasticsearch resources like a pipeline.
+        Defaults to a no-op.
 
-        Args:
-            client: The Elasticsearch client.
-            text_field: The field containing the text data in the index.
-            vector_field: The field containing the vector representations in the index.
+        :param client: The Elasticsearch client.
+        :param text_field: The field containing the text data in the index.
+        :param vector_field: The field containing the vector representations in the index.
         """
         pass
 
     def needs_inference(self) -> bool:
         """
-        TODO
+        Some retrieval strategies index embedding vectors and allow search by embedding
+        vector, for example the `DenseVectorStrategy` strategy. Mapping a user input query
+        string to an embedding vector is called inference. Inference can be applied
+        in Elasticsearch (using a `model_id`) or outside of Elasticsearch (using an
+        `EmbeddingService` defined on the `VectorStore`). In the latter case,
+        this method has to return True.
         """
         return False
 
 
-class AsyncSparseVector(AsyncRetrievalStrategy):
+class AsyncSparseVectorStrategy(AsyncRetrievalStrategy):
     """Sparse retrieval strategy using the `text_expansion` processor."""
 
     def __init__(self, model_id: str = ".elser_model_2"):
@@ -176,7 +178,7 @@ async def before_index_creation(
             )
 
 
-class AsyncDenseVector(AsyncRetrievalStrategy):
+class AsyncDenseVectorStrategy(AsyncRetrievalStrategy):
     """K-nearest-neighbors retrieval."""
 
     def __init__(
@@ -189,7 +191,7 @@ def __init__(
     ):
         if hybrid and not text_field:
             raise ValueError(
-                "to enable hybrid you have to specify a text_field (for BM25 matching)"
+                "to enable hybrid you have to specify a text_field (for BM25Strategy matching)"
             )
 
         self.distance = distance
@@ -304,7 +306,7 @@ def needs_inference(self) -> bool:
         return not self.model_id
 
 
-class AsyncDenseVectorScriptScore(AsyncRetrievalStrategy):
+class AsyncDenseVectorScriptScoreStrategy(AsyncRetrievalStrategy):
     """Exact nearest neighbors retrieval using the `script_score` query."""
 
     def __init__(self, distance: DistanceMetric = DistanceMetric.COSINE) -> None:
@@ -383,7 +385,7 @@ def needs_inference(self) -> bool:
         return True
 
 
-class AsyncBM25(AsyncRetrievalStrategy):
+class AsyncBM25Strategy(AsyncRetrievalStrategy):
     def __init__(
         self,
         k1: Optional[float] = None,