Skip to content

Inference refactor #4153

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
510 changes: 370 additions & 140 deletions output/openapi/elasticsearch-openapi.json

Large diffs are not rendered by default.

510 changes: 370 additions & 140 deletions output/openapi/elasticsearch-serverless-openapi.json

Large diffs are not rendered by default.

1,282 changes: 648 additions & 634 deletions output/schema/schema-serverless.json

Large diffs are not rendered by default.

5,916 changes: 2,965 additions & 2,951 deletions output/schema/schema.json

Large diffs are not rendered by default.

744 changes: 373 additions & 371 deletions output/typescript/types.ts

Large diffs are not rendered by default.

1,121 changes: 1,115 additions & 6 deletions specification/inference/_types/CommonTypes.ts

Large diffs are not rendered by default.

147 changes: 4 additions & 143 deletions specification/inference/chat_completion_unified/UnifiedRequest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,17 @@
*/

import { RequestChatCompletionBase } from '@inference/_types/CommonTypes'
import { UserDefinedValue } from '@spec_utils/UserDefinedValue'
import { RequestBase } from '@_types/Base'
import { Id } from '@_types/common'
import { Duration } from '@_types/Time'

/**
* Perform chat completion inference
* @rest_spec_name inference.chat_completion_unified
* @availability stack since=8.18.0 stability=stable visibility=public
* @availability serverless stability=stable visibility=public
* @doc_id inference-api-chat-completion
*/
export interface Request extends RequestChatCompletionBase {
export interface Request extends RequestBase {
urls: [
{
path: '/_inference/chat_completion/{inference_id}/_stream'
Expand All @@ -49,144 +48,6 @@ export interface Request extends RequestChatCompletionBase {
*/
timeout?: Duration
}
}

/**
* @codegen_names string, object
*/
export type CompletionToolType = string | CompletionToolChoice

/**
* An object style representation of a single portion of a conversation.
*/
export interface ContentObject {
/**
* The text content.
*/
text: string
/**
* The type of content.
*/
type: string
}

/**
* The function that the model called.
*/
export interface ToolCallFunction {
/**
* The arguments to call the function with in JSON format.
*/
arguments: string
/**
* The name of the function to call.
*/
name: string
}

/**
* A tool call generated by the model.
*/
export interface ToolCall {
/**
* The identifier of the tool call.
*/
id: Id
/**
* The function that the model called.
*/
function: ToolCallFunction
/**
* The type of the tool call.
*/
type: string
}

/**
* @codegen_names string, object
*/
export type MessageContent = string | Array<ContentObject>

/**
* An object representing part of the conversation.
*/
export interface Message {
/**
* The content of the message.
*/
content?: MessageContent
/**
* The role of the message author.
*/
role: string
/**
* The tool call that this message is responding to.
*/
tool_call_id?: Id
/**
* The tool calls generated by the model.
*/
tool_calls?: Array<ToolCall>
}

/**
* The tool choice function.
*
*/
export interface CompletionToolChoiceFunction {
/**
* The name of the function to call.
*/
name: string
}

/**
* Controls which tool is called by the model.
*/
export interface CompletionToolChoice {
/**
* The type of the tool.
*/
type: string
/**
* The tool choice function.
*/
function: CompletionToolChoiceFunction
}

/**
* The completion tool function definition.
*/
export interface CompletionToolFunction {
/**
* A description of what the function does.
* This is used by the model to choose when and how to call the function.
*/
description?: string
/**
* The name of the function.
*/
name: string
/**
* The parameters the functional accepts. This should be formatted as a JSON object.
*/
parameters?: UserDefinedValue
/**
* Whether to enable schema adherence when generating the function call.
*/
strict?: boolean
}

/**
* A list of tools that the model can call.
*/
export interface CompletionTool {
/**
* The type of tool.
*/
type: string
/**
* The function definition.
*/
function: CompletionToolFunction
/** @codegen_name chat_completion_request */
body: RequestChatCompletionBase
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
*/

import { RequestChatCompletionBase } from '@inference/_types/CommonTypes'
import { RequestBase } from '@_types/Base'
import { Id } from '@_types/common'

/**
* Perform a chat completion task through the Elastic Inference Service (EIS).
*
Expand All @@ -30,7 +30,7 @@ import { Id } from '@_types/common'
* @cluster_privileges manage_inference
* @doc_id inference-api-post-eis-chat-completion
*/
export interface Request extends RequestChatCompletionBase {
export interface Request extends RequestBase {
urls: [
{
path: '/_inference/chat_completion/{eis_inference_id}/_stream'
Expand All @@ -43,4 +43,6 @@ export interface Request extends RequestChatCompletionBase {
*/
eis_inference_id: Id
}
/** @codegen_name chat_completion_request */
body: RequestChatCompletionBase
}
85 changes: 7 additions & 78 deletions specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@
*/

import {
InferenceChunkingSettings,
RateLimitSetting
} from '@inference/_types/Services'
AlibabaCloudServiceSettings,
AlibabaCloudServiceType,
AlibabaCloudTaskSettings,
AlibabaCloudTaskType
} from '@inference/_types/CommonTypes'
import { InferenceChunkingSettings } from '@inference/_types/Services'
import { RequestBase } from '@_types/Base'
import { Id } from '@_types/common'

Expand Down Expand Up @@ -66,7 +69,7 @@ export interface Request extends RequestBase {
/**
* The type of service supported for the specified task type. In this case, `alibabacloud-ai-search`.
*/
service: ServiceType
service: AlibabaCloudServiceType
/**
* Settings used to install the inference model. These settings are specific to the `alibabacloud-ai-search` service.
*/
Expand All @@ -78,77 +81,3 @@ export interface Request extends RequestBase {
task_settings?: AlibabaCloudTaskSettings
}
}

export enum AlibabaCloudTaskType {
completion,
rerank,
space_embedding,
text_embedding
}

export enum ServiceType {
'alibabacloud-ai-search'
}

export class AlibabaCloudServiceSettings {
/**
* A valid API key for the AlibabaCloud AI Search API.
*/
api_key: string
/**
* The name of the host address used for the inference task.
* You can find the host address in the API keys section of the documentation.
* @ext_doc_id alibabacloud-api-keys
*/
host: string
/**
* This setting helps to minimize the number of rate limit errors returned from AlibabaCloud AI Search.
* By default, the `alibabacloud-ai-search` service sets the number of requests allowed per minute to `1000`.
*/
rate_limit?: RateLimitSetting
/**
* The name of the model service to use for the inference task.
* The following service IDs are available for the `completion` task:
*
* * `ops-qwen-turbo`
* * `qwen-turbo`
* * `qwen-plus`
* * `qwen-max ÷ qwen-max-longcontext`
*
* The following service ID is available for the `rerank` task:
*
* * `ops-bge-reranker-larger`
*
* The following service ID is available for the `sparse_embedding` task:
*
* * `ops-text-sparse-embedding-001`
*
* The following service IDs are available for the `text_embedding` task:
*
* `ops-text-embedding-001`
* `ops-text-embedding-zh-001`
* `ops-text-embedding-en-001`
* `ops-text-embedding-002`
*/
service_id: string
/**
* The name of the workspace used for the inference task.
*/
workspace: string
}

export class AlibabaCloudTaskSettings {
/**
* For a `sparse_embedding` or `text_embedding` task, specify the type of input passed to the model.
* Valid values are:
*
* * `ingest` for storing document embeddings in a vector database.
* * `search` for storing embeddings of search queries run against a vector database to find relevant documents.
*/
input_type?: string
/**
* For a `sparse_embedding` task, it affects whether the token name will be returned in the response.
* It defaults to `false`, which means only the token ID will be returned in the response.
*/
return_token?: boolean
}
Loading
Loading