Skip to content

Commit efe223c

Browse files
Update inference specification for Hugging Face's rerank task (#4417) (#4563)
(cherry picked from commit eba5dca)
1 parent d897f73 commit efe223c

File tree

12 files changed

+309
-35
lines changed

12 files changed

+309
-35
lines changed

output/openapi/elasticsearch-openapi.json

Lines changed: 45 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

output/openapi/elasticsearch-serverless-openapi.json

Lines changed: 45 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

output/schema/schema.json

Lines changed: 101 additions & 27 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

output/typescript/types.ts

Lines changed: 7 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

specification/inference/_types/CommonTypes.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -979,9 +979,22 @@ export class HuggingFaceServiceSettings {
979979
model_id?: string
980980
}
981981

982+
export class HuggingFaceTaskSettings {
983+
/**
984+
* For a `rerank` task, return doc text within the results.
985+
*/
986+
return_documents?: boolean
987+
/**
988+
* For a `rerank` task, the number of most relevant documents to return.
989+
* It defaults to the number of the documents.
990+
*/
991+
top_n?: integer
992+
}
993+
982994
export enum HuggingFaceTaskType {
983995
chat_completion,
984996
completion,
997+
rerank,
985998
text_embedding
986999
}
9871000

specification/inference/put/PutRequest.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ import { TaskType } from '@inference/_types/TaskType'
4040
* * ELSER (`sparse_embedding`)
4141
* * Google AI Studio (`completion`, `text_embedding`)
4242
* * Google Vertex AI (`rerank`, `text_embedding`)
43-
* * Hugging Face (`chat_completion`, `completion`, `text_embedding`)
43+
* * Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)
4444
* * Mistral (`chat_completion`, `completion`, `text_embedding`)
4545
* * OpenAI (`chat_completion`, `completion`, `text_embedding`)
4646
* * VoyageAI (`text_embedding`, `rerank`)

specification/inference/put_hugging_face/PutHuggingFaceRequest.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import { Id } from '@_types/common'
2222
import {
2323
HuggingFaceServiceSettings,
2424
HuggingFaceServiceType,
25+
HuggingFaceTaskSettings,
2526
HuggingFaceTaskType
2627
} from '@inference/_types/CommonTypes'
2728
import { InferenceChunkingSettings } from '@inference/_types/Services'
@@ -56,6 +57,16 @@ import { InferenceChunkingSettings } from '@inference/_types/Services'
5657
* * `Mistral-7B-Instruct-v0.2`
5758
* * `QwQ-32B`
5859
* * `Phi-3-mini-128k-instruct`
60+
*
61+
* For Elastic's `rerank` task:
62+
* The selected model must support the `sentence-ranking` task and expose OpenAI API.
63+
* HuggingFace supports only dedicated (not serverless) endpoints for `Rerank` so far.
64+
* After the endpoint is initialized, copy the full endpoint URL for use.
65+
* Tested models for `rerank` task:
66+
*
67+
* * `bge-reranker-base`
68+
* * `jina-reranker-v1-turbo-en-GGUF`
69+
*
5970
* @rest_spec_name inference.put_hugging_face
6071
* @availability stack since=8.12.0 stability=stable visibility=public
6172
* @availability serverless stability=stable visibility=public
@@ -93,5 +104,10 @@ export interface Request extends RequestBase {
93104
* Settings used to install the inference model. These settings are specific to the `hugging_face` service.
94105
*/
95106
service_settings: HuggingFaceServiceSettings
107+
/**
108+
* Settings to configure the inference task.
109+
* These settings are specific to the task type you specified.
110+
*/
111+
task_settings?: HuggingFaceTaskSettings
96112
}
97113
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
summary: A rerank task
2+
description: Run `PUT _inference/rerank/hugging-face-rerank` to create an inference endpoint that performs a `rerank` task type.
3+
# method_request: "PUT _inference/rerank/hugging-face-rerank"
4+
# type: "request"
5+
value: |-
6+
{
7+
"service": "hugging_face",
8+
"service_settings": {
9+
"api_key": "hugging-face-access-token",
10+
"url": "url-endpoint"
11+
},
12+
"task_settings": {
13+
"return_documents": true,
14+
"top_n": 3
15+
}
16+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
summary: Rerank task
2+
description: Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face
3+
# method_request: "POST _inference/rerank/bge-reranker-base-mkn"
4+
# type: "request"
5+
value: |-
6+
{
7+
"input": ["luke", "like", "leia", "chewy","r2d2", "star", "wars"],
8+
"query": "star wars main character",
9+
"return_documents": false,
10+
"top_n": 2
11+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
summary: Rerank task
2+
description: Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face
3+
# method_request: "POST _inference/rerank/bge-reranker-base-mkn"
4+
# type: "request"
5+
value: |-
6+
{
7+
"input": ["luke", "like", "leia", "chewy","r2d2", "star", "wars"],
8+
"query": "star wars main character",
9+
"return_documents": true,
10+
"top_n": 3
11+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
summary: Rerank task
2+
description: >
3+
A successful response from `POST _inference/rerank/bge-reranker-base-mkn`.
4+
# type: "response"
5+
# response_code:
6+
value: |-
7+
{
8+
"rerank": [
9+
{
10+
"index": 6,
11+
"relevance_score": 0.50955844
12+
},
13+
{
14+
"index": 5,
15+
"relevance_score": 0.084341794
16+
}
17+
]
18+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
summary: Rerank task
2+
description: >
3+
A successful response from `POST _inference/rerank/bge-reranker-base-mkn`.
4+
# type: "response"
5+
# response_code:
6+
value: |-
7+
{
8+
"rerank": [
9+
{
10+
"index": 6,
11+
"relevance_score": 0.50955844,
12+
"text": "wars"
13+
},
14+
{
15+
"index": 5,
16+
"relevance_score": 0.084341794,
17+
"text": "star"
18+
},
19+
{
20+
"index": 3,
21+
"relevance_score": 0.004520818,
22+
"text": "chewy"
23+
}
24+
]
25+
}

0 commit comments

Comments
 (0)