Skip to content

Adds custom InferenceEndpointInfo classes #4444

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 169 additions & 1 deletion specification/inference/_types/Services.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,21 @@ import { integer } from '@_types/Numeric'
import {
TaskType,
TaskTypeAlibabaCloudAI,
TaskTypeJinaAi
TaskTypeAmazonBedrock,
TaskTypeAnthropic,
TaskTypeAzureAIStudio,
TaskTypeAzureOpenAI,
TaskTypeCohere,
TaskTypeElasticsearch,
TaskTypeELSER,
TaskTypeGoogleAIStudio,
TaskTypeGoogleVertexAI,
TaskTypeHuggingFace,
TaskTypeJinaAi,
TaskTypeMistral,
TaskTypeOpenAI,
TaskTypeVoyageAI,
TaskTypeWatsonx
} from '../_types/TaskType'

/**
Expand Down Expand Up @@ -83,6 +97,160 @@ export class InferenceEndpointInfoAlibabaCloudAI extends InferenceEndpoint {
task_type: TaskTypeAlibabaCloudAI
}

export class InferenceEndpointInfoAmazonBedrock extends InferenceEndpoint {
/**
* The inference Id
*/
inference_id: string
/**
* The task type
*/
task_type: TaskTypeAmazonBedrock
}

export class InferenceEndpointInfoAnthropic extends InferenceEndpoint {
/**
* The inference Id
*/
inference_id: string
/**
* The task type
*/
task_type: TaskTypeAnthropic
}

export class InferenceEndpointInfoAzureAIStudio extends InferenceEndpoint {
/**
* The inference Id
*/
inference_id: string
/**
* The task type
*/
task_type: TaskTypeAzureAIStudio
}

export class InferenceEndpointInfoAzureOpenAI extends InferenceEndpoint {
/**
* The inference Id
*/
inference_id: string
/**
* The task type
*/
task_type: TaskTypeAzureOpenAI
}

export class InferenceEndpointInfoCohere extends InferenceEndpoint {
/**
* The inference Id
*/
inference_id: string
/**
* The task type
*/
task_type: TaskTypeCohere
}

export class InferenceEndpointInfoElasticsearch extends InferenceEndpoint {
/**
* The inference Id
*/
inference_id: string
/**
* The task type
*/
task_type: TaskTypeElasticsearch
}

export class InferenceEndpointInfoELSER extends InferenceEndpoint {
/**
* The inference Id
*/
inference_id: string
/**
* The task type
*/
task_type: TaskTypeELSER
}

export class InferenceEndpointInfoGoogleAIStudio extends InferenceEndpoint {
/**
* The inference Id
*/
inference_id: string
/**
* The task type
*/
task_type: TaskTypeGoogleAIStudio
}

export class InferenceEndpointInfoGoogleVertexAI extends InferenceEndpoint {
/**
* The inference Id
*/
inference_id: string
/**
* The task type
*/
task_type: TaskTypeGoogleVertexAI
}

export class InferenceEndpointInfoHuggingFace extends InferenceEndpoint {
/**
* The inference Id
*/
inference_id: string
/**
* The task type
*/
task_type: TaskTypeHuggingFace
}

export class InferenceEndpointInfoMistral extends InferenceEndpoint {
/**
* The inference Id
*/
inference_id: string
/**
* The task type
*/
task_type: TaskTypeMistral
}

export class InferenceEndpointInfoOpenAI extends InferenceEndpoint {
/**
* The inference Id
*/
inference_id: string
/**
* The task type
*/
task_type: TaskTypeOpenAI
}

export class InferenceEndpointInfoVoyageAI extends InferenceEndpoint {
/**
* The inference Id
*/
inference_id: string
/**
* The task type
*/
task_type: TaskTypeVoyageAI
}

export class InferenceEndpointInfoWatsonx extends InferenceEndpoint {
/**
* The inference Id
*/
inference_id: string
/**
* The task type
*/
task_type: TaskTypeWatsonx
}

/**
* Chunking configuration object
*/
Expand Down
68 changes: 68 additions & 0 deletions specification/inference/_types/TaskType.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,71 @@ export enum TaskTypeAlibabaCloudAI {
completion,
sparse_embedding
}

export enum TaskTypeAmazonBedrock {
text_embedding,
completion
}

export enum TaskTypeAnthropic {
completion
}

export enum TaskTypeAzureAIStudio {
text_embedding,
completion
}

export enum TaskTypeAzureOpenAI {
text_embedding,
completion
}

export enum TaskTypeCohere {
text_embedding,
rerank,
completion
}

export enum TaskTypeElasticsearch {
sparse_embedding,
text_embedding,
rerank
}

export enum TaskTypeELSER {
sparse_embedding
}

export enum TaskTypeGoogleAIStudio {
text_embedding,
completion
}

export enum TaskTypeGoogleVertexAI {
text_embedding,
rerank
}

export enum TaskTypeHuggingFace {
text_embedding
}

export enum TaskTypeMistral {
text_embedding
}

export enum TaskTypeOpenAI {
text_embedding,
chat_completion,
completion
}

export enum TaskTypeVoyageAI {
text_embedding,
rerank
}

export enum TaskTypeWatsonx {
text_embedding
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,10 @@ import { Id } from '@_types/common'
import { Duration } from '@_types/Time'
/**
* Perform chat completion inference
*
* The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.
* It only works with the `chat_completion` task type for `openai`, `elastic` and `googlevertexai` inference services.

* IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
* For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.
*
*
* The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.
* It only works with the `chat_completion` task type for `openai` and `elastic` inference services.
*
* NOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.
* The Chat completion inference API and the Stream inference API differ in their response structure and capabilities.
* The Chat completion inference API provides more comprehensive customization options through more fields and function calling support.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
* under the License.
*/

import { InferenceEndpointInfo } from '@inference/_types/Services'
import { InferenceEndpointInfoAmazonBedrock } from '@inference/_types/Services'

export class Response {
/** @codegen_name endpoint_info */
body: InferenceEndpointInfo
body: InferenceEndpointInfoAmazonBedrock
}
4 changes: 2 additions & 2 deletions specification/inference/put_anthropic/PutAnthropicResponse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
* under the License.
*/

import { InferenceEndpointInfo } from '@inference/_types/Services'
import { InferenceEndpointInfoAnthropic } from '@inference/_types/Services'

export class Response {
/** @codegen_name endpoint_info */
body: InferenceEndpointInfo
body: InferenceEndpointInfoAnthropic
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
* under the License.
*/

import { InferenceEndpointInfo } from '@inference/_types/Services'
import { InferenceEndpointInfoAzureAIStudio } from '@inference/_types/Services'

export class Response {
/** @codegen_name endpoint_info */
body: InferenceEndpointInfo
body: InferenceEndpointInfoAzureAIStudio
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
* under the License.
*/

import { InferenceEndpointInfo } from '@inference/_types/Services'
import { InferenceEndpointInfoAzureOpenAI } from '@inference/_types/Services'

export class Response {
/** @codegen_name endpoint_info */
body: InferenceEndpointInfo
body: InferenceEndpointInfoAzureOpenAI
}
4 changes: 2 additions & 2 deletions specification/inference/put_cohere/PutCohereResponse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
* under the License.
*/

import { InferenceEndpointInfo } from '@inference/_types/Services'
import { InferenceEndpointInfoCohere } from '@inference/_types/Services'

export class Response {
/** @codegen_name endpoint_info */
body: InferenceEndpointInfo
body: InferenceEndpointInfoCohere
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
* under the License.
*/

import { InferenceEndpointInfo } from '@inference/_types/Services'
import { InferenceEndpointInfoElasticsearch } from '@inference/_types/Services'

export class Response {
/** @codegen_name endpoint_info */
body: InferenceEndpointInfo
body: InferenceEndpointInfoElasticsearch
}
4 changes: 2 additions & 2 deletions specification/inference/put_elser/PutElserResponse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
* under the License.
*/

import { InferenceEndpointInfo } from '@inference/_types/Services'
import { InferenceEndpointInfoELSER } from '@inference/_types/Services'

export class Response {
/** @codegen_name endpoint_info */
body: InferenceEndpointInfo
body: InferenceEndpointInfoELSER
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
* under the License.
*/

import { InferenceEndpointInfo } from '@inference/_types/Services'
import { InferenceEndpointInfoGoogleAIStudio } from '@inference/_types/Services'

export class Response {
/** @codegen_name endpoint_info */
body: InferenceEndpointInfo
body: InferenceEndpointInfoGoogleAIStudio
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
* under the License.
*/

import { InferenceEndpointInfo } from '@inference/_types/Services'
import { InferenceEndpointInfoGoogleVertexAI } from '@inference/_types/Services'

export class Response {
/** @codegen_name endpoint_info */
body: InferenceEndpointInfo
body: InferenceEndpointInfoGoogleVertexAI
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
* under the License.
*/

import { InferenceEndpointInfo } from '@inference/_types/Services'
import { InferenceEndpointInfoHuggingFace } from '@inference/_types/Services'

export class Response {
/** @codegen_name endpoint_info */
body: InferenceEndpointInfo
body: InferenceEndpointInfoHuggingFace
}
Loading
Loading