Skip to content

Commit f0b3dca

Browse files
authored
inference refactor (#4153)
1 parent ff586fc commit f0b3dca

File tree

25 files changed

+5962
-5429
lines changed

25 files changed

+5962
-5429
lines changed

output/openapi/elasticsearch-openapi.json

Lines changed: 370 additions & 140 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

output/openapi/elasticsearch-serverless-openapi.json

Lines changed: 370 additions & 140 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

output/schema/schema-serverless.json

Lines changed: 648 additions & 634 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

output/schema/schema.json

Lines changed: 2965 additions & 2951 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

output/typescript/types.ts

Lines changed: 373 additions & 371 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

specification/inference/_types/CommonTypes.ts

Lines changed: 1115 additions & 6 deletions
Large diffs are not rendered by default.

specification/inference/chat_completion_unified/UnifiedRequest.ts

Lines changed: 4 additions & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,17 @@
1818
*/
1919

2020
import { RequestChatCompletionBase } from '@inference/_types/CommonTypes'
21-
import { UserDefinedValue } from '@spec_utils/UserDefinedValue'
21+
import { RequestBase } from '@_types/Base'
2222
import { Id } from '@_types/common'
2323
import { Duration } from '@_types/Time'
24-
2524
/**
2625
* Perform chat completion inference
2726
* @rest_spec_name inference.chat_completion_unified
2827
* @availability stack since=8.18.0 stability=stable visibility=public
2928
* @availability serverless stability=stable visibility=public
3029
* @doc_id inference-api-chat-completion
3130
*/
32-
export interface Request extends RequestChatCompletionBase {
31+
export interface Request extends RequestBase {
3332
urls: [
3433
{
3534
path: '/_inference/chat_completion/{inference_id}/_stream'
@@ -49,144 +48,6 @@ export interface Request extends RequestChatCompletionBase {
4948
*/
5049
timeout?: Duration
5150
}
52-
}
53-
54-
/**
55-
* @codegen_names string, object
56-
*/
57-
export type CompletionToolType = string | CompletionToolChoice
58-
59-
/**
60-
* An object style representation of a single portion of a conversation.
61-
*/
62-
export interface ContentObject {
63-
/**
64-
* The text content.
65-
*/
66-
text: string
67-
/**
68-
* The type of content.
69-
*/
70-
type: string
71-
}
72-
73-
/**
74-
* The function that the model called.
75-
*/
76-
export interface ToolCallFunction {
77-
/**
78-
* The arguments to call the function with in JSON format.
79-
*/
80-
arguments: string
81-
/**
82-
* The name of the function to call.
83-
*/
84-
name: string
85-
}
86-
87-
/**
88-
* A tool call generated by the model.
89-
*/
90-
export interface ToolCall {
91-
/**
92-
* The identifier of the tool call.
93-
*/
94-
id: Id
95-
/**
96-
* The function that the model called.
97-
*/
98-
function: ToolCallFunction
99-
/**
100-
* The type of the tool call.
101-
*/
102-
type: string
103-
}
104-
105-
/**
106-
* @codegen_names string, object
107-
*/
108-
export type MessageContent = string | Array<ContentObject>
109-
110-
/**
111-
* An object representing part of the conversation.
112-
*/
113-
export interface Message {
114-
/**
115-
* The content of the message.
116-
*/
117-
content?: MessageContent
118-
/**
119-
* The role of the message author.
120-
*/
121-
role: string
122-
/**
123-
* The tool call that this message is responding to.
124-
*/
125-
tool_call_id?: Id
126-
/**
127-
* The tool calls generated by the model.
128-
*/
129-
tool_calls?: Array<ToolCall>
130-
}
131-
132-
/**
133-
* The tool choice function.
134-
*
135-
*/
136-
export interface CompletionToolChoiceFunction {
137-
/**
138-
* The name of the function to call.
139-
*/
140-
name: string
141-
}
142-
143-
/**
144-
* Controls which tool is called by the model.
145-
*/
146-
export interface CompletionToolChoice {
147-
/**
148-
* The type of the tool.
149-
*/
150-
type: string
151-
/**
152-
* The tool choice function.
153-
*/
154-
function: CompletionToolChoiceFunction
155-
}
156-
157-
/**
158-
* The completion tool function definition.
159-
*/
160-
export interface CompletionToolFunction {
161-
/**
162-
* A description of what the function does.
163-
* This is used by the model to choose when and how to call the function.
164-
*/
165-
description?: string
166-
/**
167-
* The name of the function.
168-
*/
169-
name: string
170-
/**
171-
* The parameters the functional accepts. This should be formatted as a JSON object.
172-
*/
173-
parameters?: UserDefinedValue
174-
/**
175-
* Whether to enable schema adherence when generating the function call.
176-
*/
177-
strict?: boolean
178-
}
179-
180-
/**
181-
* A list of tools that the model can call.
182-
*/
183-
export interface CompletionTool {
184-
/**
185-
* The type of tool.
186-
*/
187-
type: string
188-
/**
189-
* The function definition.
190-
*/
191-
function: CompletionToolFunction
51+
/** @codegen_name chat_completion_request */
52+
body: RequestChatCompletionBase
19253
}

specification/inference/post_eis_chat_completion/PostEisChatCompletionRequest.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
*/
1919

2020
import { RequestChatCompletionBase } from '@inference/_types/CommonTypes'
21+
import { RequestBase } from '@_types/Base'
2122
import { Id } from '@_types/common'
22-
2323
/**
2424
* Perform a chat completion task through the Elastic Inference Service (EIS).
2525
*
@@ -30,7 +30,7 @@ import { Id } from '@_types/common'
3030
* @cluster_privileges manage_inference
3131
* @doc_id inference-api-post-eis-chat-completion
3232
*/
33-
export interface Request extends RequestChatCompletionBase {
33+
export interface Request extends RequestBase {
3434
urls: [
3535
{
3636
path: '/_inference/chat_completion/{eis_inference_id}/_stream'
@@ -43,4 +43,6 @@ export interface Request extends RequestChatCompletionBase {
4343
*/
4444
eis_inference_id: Id
4545
}
46+
/** @codegen_name chat_completion_request */
47+
body: RequestChatCompletionBase
4648
}

specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts

Lines changed: 7 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,12 @@
1818
*/
1919

2020
import {
21-
InferenceChunkingSettings,
22-
RateLimitSetting
23-
} from '@inference/_types/Services'
21+
AlibabaCloudServiceSettings,
22+
AlibabaCloudServiceType,
23+
AlibabaCloudTaskSettings,
24+
AlibabaCloudTaskType
25+
} from '@inference/_types/CommonTypes'
26+
import { InferenceChunkingSettings } from '@inference/_types/Services'
2427
import { RequestBase } from '@_types/Base'
2528
import { Id } from '@_types/common'
2629

@@ -66,7 +69,7 @@ export interface Request extends RequestBase {
6669
/**
6770
* The type of service supported for the specified task type. In this case, `alibabacloud-ai-search`.
6871
*/
69-
service: ServiceType
72+
service: AlibabaCloudServiceType
7073
/**
7174
* Settings used to install the inference model. These settings are specific to the `alibabacloud-ai-search` service.
7275
*/
@@ -78,77 +81,3 @@ export interface Request extends RequestBase {
7881
task_settings?: AlibabaCloudTaskSettings
7982
}
8083
}
81-
82-
export enum AlibabaCloudTaskType {
83-
completion,
84-
rerank,
85-
space_embedding,
86-
text_embedding
87-
}
88-
89-
export enum ServiceType {
90-
'alibabacloud-ai-search'
91-
}
92-
93-
export class AlibabaCloudServiceSettings {
94-
/**
95-
* A valid API key for the AlibabaCloud AI Search API.
96-
*/
97-
api_key: string
98-
/**
99-
* The name of the host address used for the inference task.
100-
* You can find the host address in the API keys section of the documentation.
101-
* @ext_doc_id alibabacloud-api-keys
102-
*/
103-
host: string
104-
/**
105-
* This setting helps to minimize the number of rate limit errors returned from AlibabaCloud AI Search.
106-
* By default, the `alibabacloud-ai-search` service sets the number of requests allowed per minute to `1000`.
107-
*/
108-
rate_limit?: RateLimitSetting
109-
/**
110-
* The name of the model service to use for the inference task.
111-
* The following service IDs are available for the `completion` task:
112-
*
113-
* * `ops-qwen-turbo`
114-
* * `qwen-turbo`
115-
* * `qwen-plus`
116-
* * `qwen-max ÷ qwen-max-longcontext`
117-
*
118-
* The following service ID is available for the `rerank` task:
119-
*
120-
* * `ops-bge-reranker-larger`
121-
*
122-
* The following service ID is available for the `sparse_embedding` task:
123-
*
124-
* * `ops-text-sparse-embedding-001`
125-
*
126-
* The following service IDs are available for the `text_embedding` task:
127-
*
128-
* `ops-text-embedding-001`
129-
* `ops-text-embedding-zh-001`
130-
* `ops-text-embedding-en-001`
131-
* `ops-text-embedding-002`
132-
*/
133-
service_id: string
134-
/**
135-
* The name of the workspace used for the inference task.
136-
*/
137-
workspace: string
138-
}
139-
140-
export class AlibabaCloudTaskSettings {
141-
/**
142-
* For a `sparse_embedding` or `text_embedding` task, specify the type of input passed to the model.
143-
* Valid values are:
144-
*
145-
* * `ingest` for storing document embeddings in a vector database.
146-
* * `search` for storing embeddings of search queries run against a vector database to find relevant documents.
147-
*/
148-
input_type?: string
149-
/**
150-
* For a `sparse_embedding` task, it affects whether the token name will be returned in the response.
151-
* It defaults to `false`, which means only the token ID will be returned in the response.
152-
*/
153-
return_token?: boolean
154-
}

0 commit comments

Comments
 (0)