elastic
diff --git a/‎compiler/src/model/utils.ts
Lines changed: 1 addition & 1 deletion b/‎compiler/src/model/utils.ts
Lines changed: 1 addition & 1 deletion
diff --git a/‎output/openapi/elasticsearch-openapi.json
Lines changed: 2 additions & 0 deletions b/‎output/openapi/elasticsearch-openapi.json
Lines changed: 2 additions & 0 deletions
diff --git a/‎output/openapi/elasticsearch-serverless-openapi.json
Lines changed: 2 additions & 0 deletions b/‎output/openapi/elasticsearch-serverless-openapi.json
Lines changed: 2 additions & 0 deletions
diff --git a/‎output/schema/schema.json
Lines changed: 8 additions & 3 deletions b/‎output/schema/schema.json
Lines changed: 8 additions & 3 deletions
diff --git a/‎specification/inference/put/PutRequest.ts
Lines changed: 11 additions & 1 deletion b/‎specification/inference/put/PutRequest.ts
Lines changed: 11 additions & 1 deletion
@@ -667,7 +667,7 @@ export function hoistRequestAnnotations (
     } else if (tag === 'cluster_privileges') {
       const privileges = [
         'all', 'cancel_task', 'create_snapshot', 'grant_api_key', 'manage', 'manage_api_key', 'manage_ccr',
-        'manage_enrich', 'manage_ilm', 'manage_index_templates', 'manage_ingest_pipelines', 'manage_logstash_pipelines',
+        'manage_enrich', 'manage_ilm', 'manage_index_templates', 'manage_inference', 'manage_ingest_pipelines', 'manage_logstash_pipelines',
         'manage_ml', 'manage_oidc', 'manage_own_api_key', 'manage_pipeline', 'manage_rollup', 'manage_saml',
         'manage_security', 'manage_service_account', 'manage_slm', 'manage_token', 'manage_transform', 'manage_user_profile',
         'manage_watcher', 'monitor', 'monitor_ml', 'monitor_rollup', 'monitor_snapshot', 'monitor_text_structure',
 
@@ -23,10 +23,20 @@ import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
 
 /**
- * Create an inference endpoint
+ * Create an inference endpoint.
+ * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
+ * After creating the endpoint, wait for the model deployment to complete before using it.
+ * To verify the deployment status, use the get trained model statistics API.
+ * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`.
+ * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
+ *
+ * IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
+ * For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.
+ * However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.
  * @rest_spec_name inference.put
  * @availability stack since=8.11.0 stability=stable visibility=public
  * @availability serverless stability=stable visibility=public
+ * @cluster_privileges manage_inference
  */
 export interface Request extends RequestBase {
   path_parts: {