Skip to content

Commit a29c031

Browse files
committed
Draft start
1 parent ef980f0 commit a29c031

File tree

3 files changed

+156
-0
lines changed

3 files changed

+156
-0
lines changed
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
/*
2+
* Licensed to Elasticsearch B.V. under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch B.V. licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
import { RateLimitSetting } from '@inference/_types/Services'
21+
import { RequestBase } from '@_types/Base'
22+
import { Id } from '@_types/common'
23+
24+
/**
25+
* Create a Watsonx inference endpoint.
26+
*
27+
* Creates an inference endpoint to perform an inference task with the `watsonxai` service.
28+
* You need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.
29+
* You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.
30+
*
31+
* When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
32+
* After creating the endpoint, wait for the model deployment to complete before using it.
33+
* To verify the deployment status, use the get trained model statistics API.
34+
* Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`.
35+
* Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
36+
* @rest_spec_name inference.put_watsonx
37+
* @availability stack since=8.16.0 stability=stable visibility=public
38+
* @availability serverless stability=stable visibility=public
39+
* @cluster_privileges manage_inference
40+
* @doc_id inference-api-put-watsonx
41+
*/
42+
export interface Request extends RequestBase {
43+
urls: [
44+
{
45+
path: '/_inference/{task_type}/{watsonx_inference_id}'
46+
methods: ['PUT']
47+
}
48+
]
49+
path_parts: {
50+
/**
51+
* The task type.
52+
* The only valid task type for the model to perform is `text_embedding`.
53+
*/
54+
task_type: WatsonxTaskType
55+
/**
56+
* The unique identifier of the inference endpoint.
57+
*/
58+
watsonx_inference_id: Id
59+
}
60+
body: {
61+
/**
62+
* The type of service supported for the specified task type. In this case, `watsonxai`.
63+
*/
64+
service: ServiceType
65+
/**
66+
* Settings used to install the inference model. These settings are specific to the `watsonxai` service.
67+
*/
68+
service_settings: WatsonxServiceSettings
69+
}
70+
}
71+
72+
export enum WatsonxTaskType {
73+
text_embedding
74+
}
75+
76+
export enum ServiceType {
77+
watsonxai
78+
}
79+
80+
export class WatsonxServiceSettings {
81+
/**
82+
* A valid API key of your Watsonx account.
83+
* You can find your Watsonx API keys or you can create a new one on the API keys page.
84+
*
85+
* IMPORTANT: You need to provide the API key only once, during the inference model creation.
86+
* The get inference endpoint API does not retrieve your API key.
87+
* After creating the inference model, you cannot change the associated API key.
88+
* If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
89+
* @ext_doc_id watsonx-api-keys
90+
*/
91+
api_key: string
92+
/**
93+
* A version parameter that takes a version date in the format of `YYYY-MM-DD`.
94+
* For the active version data parameters, refer to the Wastonx documentation.
95+
* @ext_doc_id watsonx-api-version
96+
*/
97+
api_version: string
98+
/**
99+
* The name of the model to use for the inference task.
100+
* Refer to the IBM Embedding Models section in the Watsonx documentation for the list of available text embedding models.
101+
* @ext_doc_id watsonx-api-models
102+
*/
103+
model_id: string
104+
/**
105+
* The identifier of the IBM Cloud project to use for the inference task.
106+
*/
107+
project_id: string
108+
/**
109+
* This setting helps to minimize the number of rate limit errors returned from Watsonx.
110+
* By default, the `watsonxai` service sets the number of requests allowed per minute to 120.
111+
*/
112+
rate_limit?: RateLimitSetting
113+
/**
114+
* The URL of the inference endpoint that you created on Watsonx.
115+
*/
116+
url: string
117+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*
2+
* Licensed to Elasticsearch B.V. under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch B.V. licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
import { InferenceEndpointInfo } from '@inference/_types/Services'
21+
22+
export class Response {
23+
body: InferenceEndpointInfo
24+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# summary:
2+
description: Run `PUT _inference/text_embedding/watsonx-embeddings` to create an Watonsx inference endpoint that performs a text embedding task.
3+
# method_request: "PUT _inference/text_embedding/watsonx-embeddings"
4+
# type: "request"
5+
value: |-
6+
{
7+
"service": "watsonxai",
8+
"service_settings": {
9+
"api_key": "Watsonx-API-Key",
10+
"url": "Wastonx-URL",
11+
"model_id": "ibm/slate-30m-english-rtrvr",
12+
"project_id": "IBM-Cloud-ID",
13+
"api_version": "2024-03-14"
14+
}
15+
}

0 commit comments

Comments
 (0)