Add text-ranking pipeline tag (#1267)

tomaarsen · web-flow · commit 1540c48e53be · 2025-03-18T15:01:15.000+01:00
Hello! ## Pull Request overview * Add `text-ranking` pipeline tag * Slightly update the docs for `sentence-similarity` ## Details This PR adds a `text-ranking` pipeline tag for reranker models like: * https://huggingface.co/models?author=cross-encoder * https://huggingface.co/models?search=reranker E.g.: ```python from sentence_transformers import CrossEncoder # 1. Load a pre-trained CrossEncoder model model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2") # 2a. Either: predict scores for a pair of sentences scores = model.predict([ ("How many people live in Berlin?", "Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers."), ("How many people live in Berlin?", "Berlin is well known for its museums."), ]) # => array([ 8.607138 , -4.3200774], dtype=float32) # 2b. Or: rank a list of passages for a query query = "How many people live in Berlin?" passages = [ "Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers.", "Berlin is well known for its museums.", "In 2014, the city state Berlin had 37,368 live births (+6.6%), a record number since 1991.", "The urban area of Berlin comprised about 4.1 million people in 2014, making it the seventh most populous urban area in the European Union.", "The city of Paris had a population of 2,165,423 people within its administrative city limits as of January 1, 2019", "An estimated 300,000-420,000 Muslims reside in Berlin, making up about 8-11 percent of the population.", "Berlin is subdivided into 12 boroughs or districts (Bezirke).", "In 2015, the total labour force in Berlin was 1.85 million.", "In 2013 around 600,000 Berliners were registered in one of the more than 2,300 sport and fitness clubs.", "Berlin has a yearly total of about 135 million day visitors, which puts it in third place among the most-visited city destinations in the European Union.", ] ranks = model.rank(query, passages) # Print the scores print("Query:", query) for rank in ranks: print(f"{rank['score']:.2f}\t{passages[rank['corpus_id']]}") """ Query: How many people live in Berlin? 8.92 The urban area of Berlin comprised about 4.1 million people in 2014, making it the seventh most populous urban area in the European Union. 8.61 Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers. 8.24 An estimated 300,000-420,000 Muslims reside in Berlin, making up about 8-11 percent of the population. 7.60 In 2014, the city state Berlin had 37,368 live births (+6.6%), a record number since 1991. 6.35 In 2013 around 600,000 Berliners were registered in one of the more than 2,300 sport and fitness clubs. 5.42 Berlin has a yearly total of about 135 million day visitors, which puts it in third place among the most-visited city destinations in the European Union. 3.45 In 2015, the total labour force in Berlin was 1.85 million. 0.33 Berlin is subdivided into 12 boroughs or districts (Bezirke). -4.24 The city of Paris had a population of 2,165,423 people within its administrative city limits as of January 1, 2019 -4.32 Berlin is well known for its museums. """ ``` I haven't created a spec for the API here, as I think that's better left to those who've created other specs. I think we might already have a Sentence Ranking API that we might not want to break. This is slightly blocking the next [Sentence Transformers release](https://github.com/UKPLab/sentence-transformers), as I'd like to know whether I can tag CrossEncoder (a.k.a. reranker) models as `text-ranking`. Related to this PR: huggingface-internal/moon-landing#12877 (private repo). - Tom Aarsen
diff --git a/packages/tasks/src/pipelines.ts b/packages/tasks/src/pipelines.ts
@@ -550,6 +550,11 @@ export const PIPELINE_DATA = {
 		color: "blue",
 		hideInModels: true,
 	},
+	"text-ranking": {
+		name: "Text Ranking",
+		modality: "nlp",
+		color: "red",
+	},
 	"text-retrieval": {
 		name: "Text Retrieval",
 		subtasks: [
diff --git a/packages/tasks/src/tasks/index.ts b/packages/tasks/src/tasks/index.ts
@@ -147,6 +147,7 @@ export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
 	"tabular-to-text": ["transformers"],
 	"text-classification": ["adapter-transformers", "setfit", "spacy", "transformers", "transformers.js"],
 	"text-generation": ["transformers", "transformers.js"],
+	"text-ranking": ["sentence-transformers", "transformers"],
 	"text-retrieval": [],
 	"text-to-image": ["diffusers"],
 	"text-to-speech": ["espnet", "tensorflowtts", "transformers", "transformers.js"],
@@ -232,6 +233,7 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
 	"tabular-to-text": undefined,
 	"text-classification": getData("text-classification", textClassification),
 	"text-generation": getData("text-generation", textGeneration),
+	"text-ranking": getData("text-ranking", placeholder),
 	"text-retrieval": undefined,
 	"text-to-image": getData("text-to-image", textToImage),
 	"text-to-speech": getData("text-to-speech", textToSpeech),
diff --git a/packages/tasks/src/tasks/sentence-similarity/about.md b/packages/tasks/src/tasks/sentence-similarity/about.md
@@ -8,7 +8,7 @@ You can extract information from documents using Sentence Similarity models. The
 
 The [Sentence Transformers](https://www.sbert.net/) library is very powerful for calculating embeddings of sentences, paragraphs, and entire documents. An embedding is just a vector representation of a text and is useful for finding how similar two texts are.
 
-You can find and use [hundreds of Sentence Transformers](https://huggingface.co/models?library=sentence-transformers&sort=downloads) models from the Hub by directly using the library, playing with the widgets in the browser or using Inference Endpoints.
+You can find and use [thousands of Sentence Transformers](https://huggingface.co/models?library=sentence-transformers&sort=downloads) models from the Hub by directly using the library, playing with the widgets in the browser or using Inference Endpoints.
 
 ## Task Variants
 
@@ -79,8 +79,8 @@ sentences = ["I'm happy", "I'm full of happiness"]
 
 model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
 
-#Compute embedding for both lists
-embedding_1= model.encode(sentences[0], convert_to_tensor=True)
+# Compute embedding for both lists
+embedding_1 = model.encode(sentences[0], convert_to_tensor=True)
 embedding_2 = model.encode(sentences[1], convert_to_tensor=True)
 
 util.pytorch_cos_sim(embedding_1, embedding_2)
diff --git a/packages/tasks/src/tasks/sentence-similarity/data.ts b/packages/tasks/src/tasks/sentence-similarity/data.ts
@@ -4,7 +4,7 @@ const taskData: TaskDataCustom = {
 	datasets: [
 		{
 			description: "Bing queries with relevant passages from various web sources.",
-			id: "ms_marco",
+			id: "microsoft/ms_marco",
 		},
 	],
 	demo: {
diff --git a/packages/tasks/src/tasks/text-ranking/about.md b/packages/tasks/src/tasks/text-ranking/about.md
@@ -0,0 +1,74 @@
+## Use Cases 🔍
+
+### Information Retrieval
+
+You can improve Information Retrieval search stacks by applying a Text Ranking model as a Reranker in the common "[Retrieve and Rerank pipeline](https://sbert.net/examples/applications/retrieve_rerank/README.html)". First, you can use a [Sentence Similarity](https://huggingface.co/tasks/sentence-similarity) or [Feature Extraction](https://huggingface.co/tasks/feature-extraction) model as a Retriever to find the (for example) 100 most relevant documents for a query. Afterwards, you can rerank each of these 100 documents with a Text Ranking model to select an updated top 10. Often times, this results in improved retrieval performance than only using a Retriever model.
+
+## The Sentence Transformers library
+
+The [Sentence Transformers](https://www.sbert.net/) library is very powerful for using and training both Sentence Transformer (a.k.a. embedding or retriever) models as well as Cross Encoder (a.k.a. reranker) models.
+
+You can find and use [Sentence Transformers](https://huggingface.co/models?library=sentence-transformers&sort=downloads) models from the Hub by directly using the library, playing with the widgets in the browser or using Inference Endpoints.
+
+## Task Variants
+
+### Passage Ranking
+
+Passage Ranking is the task of ranking documents based on their relevance to a given query. The task is evaluated on Normalized Discounted Cumulative Gain, Mean Reciprocal Rank, or Mean Average Precision. These models take one query and multiple documents and return ranked documents according to the relevancy to the query. 📄
+
+You can use it via the [Sentence Transformers library](https://sbert.net/docs/cross_encoder/usage/usage.html) like so:
+
+```python
+from sentence_transformers import CrossEncoder
+
+# 1. Load a pre-trained CrossEncoder model
+model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")
+
+query = "How many people live in Berlin?"
+passages = [
+    "Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers.",
+    "Berlin is well known for its museums.",
+    "In 2014, the city state Berlin had 37,368 live births (+6.6%), a record number since 1991.",
+    "The urban area of Berlin comprised about 4.1 million people in 2014, making it the seventh most populous urban area in the European Union.",
+    "The city of Paris had a population of 2,165,423 people within its administrative city limits as of January 1, 2019",
+    "An estimated 300,000-420,000 Muslims reside in Berlin, making up about 8-11 percent of the population.",
+    "Berlin is subdivided into 12 boroughs or districts (Bezirke).",
+    "In 2015, the total labour force in Berlin was 1.85 million.",
+    "In 2013 around 600,000 Berliners were registered in one of the more than 2,300 sport and fitness clubs.",
+    "Berlin has a yearly total of about 135 million day visitors, which puts it in third place among the most-visited city destinations in the European Union.",
+]
+
+# 2a. Either: predict scores for all pairs of sentences involved in the query
+scores = model.predict([(query, passage) for passage in passages])
+# => [ 8.607138   -4.320077    7.5978117   8.915804   -4.237982    8.2359  0.33119553  3.4510403   6.352979    5.416662  ]
+
+# 2b. Or rank a list of passages for a query
+ranks = model.rank(query, passages, return_documents=True)
+
+# Print the reranked passages
+print("Query:", query)
+for rank in ranks:
+    print(f"- #{rank['corpus_id']} ({rank['score']:.2f}): {rank['text']}")
+"""
+Query: How many people live in Berlin?
+- #3 (8.92): The urban area of Berlin comprised about 4.1 million people in 2014, making it the seventh most populous urban area in the European Union.
+- #0 (8.61): Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers.
+- #5 (8.24): An estimated 300,000-420,000 Muslims reside in Berlin, making up about 8-11 percent of the population.
+- #2 (7.60): In 2014, the city state Berlin had 37,368 live births (+6.6%), a record number since 1991.
+- #8 (6.35): In 2013 around 600,000 Berliners were registered in one of the more than 2,300 sport and fitness clubs.
+- #9 (5.42): Berlin has a yearly total of about 135 million day visitors, which puts it in third place among the most-visited city destinations in the European Union.
+- #7 (3.45): In 2015, the total labour force in Berlin was 1.85 million.
+- #6 (0.33): Berlin is subdivided into 12 boroughs or districts (Bezirke).
+- #4 (-4.24): The city of Paris had a population of 2,165,423 people within its administrative city limits as of January 1, 2019
+- #1 (-4.32): Berlin is well known for its museums.
+"""
+```
+
+Rerankers often outperform [Sentence Similarity](https://huggingface.co/tasks/sentence-similarity) or [Feature Extraction](https://huggingface.co/tasks/feature-extraction) models, but they're too slow to rank a query against all documents. This is why they're commonly used to perform a final reranking of the top documents from a retriever: you can get the efficiency of a retriever model with the performance of a reranker.
+
+## Useful Resources
+
+Would you like to learn more about Text Ranking? Here is a curated resource that you may find helpful!
+
+- [Sentence Transformers > Cross Encoder Documentation](https://www.sbert.net/docs/cross_encoder/usage/usage.html)
+- [Sentence Transformers > Usage > Retrieve & Re-Rank](https://www.sbert.net/examples/applications/retrieve_rerank/README.html)
diff --git a/packages/tasks/src/tasks/text-ranking/data.ts b/packages/tasks/src/tasks/text-ranking/data.ts
@@ -0,0 +1,91 @@
+import type { TaskDataCustom } from "../index.js";
+
+const taskData: TaskDataCustom = {
+	datasets: [
+		{
+			description: "Bing queries with relevant passages from various web sources.",
+			id: "microsoft/ms_marco",
+		},
+	],
+	demo: {
+		inputs: [
+			{
+				label: "Source sentence",
+				content: "Machine learning is so easy.",
+				type: "text",
+			},
+			{
+				label: "Sentences to compare to",
+				content: "Deep learning is so straightforward.",
+				type: "text",
+			},
+			{
+				label: "",
+				content: "This is so difficult, like rocket science.",
+				type: "text",
+			},
+			{
+				label: "",
+				content: "I can't believe how much I struggled with this.",
+				type: "text",
+			},
+		],
+		outputs: [
+			{
+				type: "chart",
+				data: [
+					{
+						label: "Deep learning is so straightforward.",
+						score: 2.2006407,
+					},
+					{
+						label: "This is so difficult, like rocket science.",
+						score: -6.2634873,
+					},
+					{
+						label: "I can't believe how much I struggled with this.",
+						score: -10.251488,
+					},
+				],
+			},
+		],
+	},
+	metrics: [
+		{
+			description:
+				"Discounted Cumulative Gain (DCG) measures the gain, or usefulness, of search results discounted by their position. The normalization is done by dividing the DCG by the ideal DCG, which is the DCG of the perfect ranking.",
+			id: "Normalized Discounted Cumulative Gain",
+		},
+		{
+			description:
+				"Reciprocal Rank is a measure used to rank the relevancy of documents given a set of documents. Reciprocal Rank is the reciprocal of the rank of the document retrieved, meaning, if the rank is 3, the Reciprocal Rank is 0.33. If the rank is 1, the Reciprocal Rank is 1",
+			id: "Mean Reciprocal Rank",
+		},
+		{
+			description:
+				"Mean Average Precision (mAP) is the overall average of the Average Precision (AP) values, where AP is the Area Under the PR Curve (AUC-PR)",
+			id: "Mean Average Precision",
+		},
+	],
+	models: [
+		{
+			description: "An extremely efficient text ranking model trained on a web search dataset.",
+			id: "cross-encoder/ms-marco-MiniLM-L6-v2",
+		},
+		{
+			description: "A strong multilingual text reranker model.",
+			id: "Alibaba-NLP/gte-multilingual-reranker-base",
+		},
+		{
+			description: "An efficient text ranking model that punches above its weight.",
+			id: "Alibaba-NLP/gte-reranker-modernbert-base",
+		},
+	],
+	spaces: [],
+	summary:
+		"Text Ranking is the task of ranking a set of texts based on their relevance to a query. Text ranking models are trained on large datasets of queries and relevant documents to learn how to rank documents based on their relevance to the query. This task is particularly useful for search engines and information retrieval systems.",
+	widgetModels: ["cross-encoder/ms-marco-MiniLM-L6-v2"],
+	youtubeId: "",
+};
+
+export default taskData;

Original file line number	Diff line number	Diff line change
`@@ -4,7 +4,7 @@ const taskData: TaskDataCustom = {`
`4`	`4`	`datasets: [`
`5`	`5`	`{`
`6`	`6`	`description: "Bing queries with relevant passages from various web sources.",`
`7`		`- id: "ms_marco",`
	`7`	`+ id: "microsoft/ms_marco",`
`8`	`8`	`},`
`9`	`9`	`],`
`10`	`10`	`demo: {`