Skip to content

Commit a087416

Browse files
merveenoyanMerve Noyanpcuenca
authored
Tasks: update widget models, model recommendations (#876)
Updated widget models and model recommendations and fixed former canonical model and dataset ids. --------- Co-authored-by: Merve Noyan <[email protected]> Co-authored-by: Pedro Cuenca <[email protected]>
1 parent 148e413 commit a087416

File tree

21 files changed

+106
-57
lines changed

21 files changed

+106
-57
lines changed

packages/tasks/src/tasks/audio-classification/data.ts

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@ const taskData: TaskDataCustom = {
44
datasets: [
55
{
66
description: "A benchmark of 10 different audio tasks.",
7-
id: "superb",
7+
id: "s3prl/superb",
8+
},
9+
{
10+
description: "A dataset of YouTube clips and their sound categories.",
11+
id: "agkphysics/AudioSet",
812
},
913
],
1014
demo: {
@@ -50,11 +54,11 @@ const taskData: TaskDataCustom = {
5054
],
5155
models: [
5256
{
53-
description: "An easy-to-use model for Command Recognition.",
57+
description: "An easy-to-use model for command recognition.",
5458
id: "speechbrain/google_speech_command_xvector",
5559
},
5660
{
57-
description: "An Emotion Recognition model.",
61+
description: "An emotion recognition model.",
5862
id: "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
5963
},
6064
{
@@ -70,7 +74,7 @@ const taskData: TaskDataCustom = {
7074
],
7175
summary:
7276
"Audio classification is the task of assigning a label or class to a given audio. It can be used for recognizing which command a user is giving or the emotion of a statement, as well as identifying a speaker.",
73-
widgetModels: ["facebook/mms-lid-126"],
77+
widgetModels: ["MIT/ast-finetuned-audioset-10-10-0.4593"],
7478
youtubeId: "KWwzcmG98Ds",
7579
};
7680

packages/tasks/src/tasks/audio-to-audio/data.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,11 @@ const taskData: TaskDataCustom = {
4444
},
4545
{
4646
description: "A speech enhancement model.",
47-
id: "speechbrain/metricgan-plus-voicebank",
47+
id: "ResembleAI/resemble-enhance",
48+
},
49+
{
50+
description: "A model that can change the voice in a speech recording.",
51+
id: "microsoft/speecht5_vc",
4852
},
4953
],
5054
spaces: [

packages/tasks/src/tasks/automatic-speech-recognition/data.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ const taskData: TaskDataCustom = {
77
id: "mozilla-foundation/common_voice_17_0",
88
},
99
{
10-
description: "An English dataset with 1,000 hours of data.",
11-
id: "librispeech_asr",
10+
description: "A dataset with 44.6k hours of English speaker data and 6k hours of other language speakers.",
11+
id: "parler-tts/mls_eng",
1212
},
1313
{
1414
description: "A multi-lingual audio dataset with 370K hours of audio.",
@@ -54,6 +54,10 @@ const taskData: TaskDataCustom = {
5454
description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
5555
id: "facebook/seamless-m4t-v2-large",
5656
},
57+
{
58+
description: "Powerful speaker diarization model.",
59+
id: "pyannote/speaker-diarization-3.1",
60+
},
5761
],
5862
spaces: [
5963
{

packages/tasks/src/tasks/document-question-answering/data.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,15 @@ const taskData: TaskDataCustom = {
4646
],
4747
models: [
4848
{
49-
description: "A LayoutLM model for the document QA task, fine-tuned on DocVQA and SQuAD2.0.",
49+
description: "A robust document question answering model.",
5050
id: "impira/layoutlm-document-qa",
5151
},
5252
{
53-
description: "A special model for OCR-free Document QA task.",
53+
description: "A document question answering model specialized in invoices.",
54+
id: "impira/layoutlm-invoices",
55+
},
56+
{
57+
description: "A special model for OCR-free document question answering.",
5458
id: "microsoft/udop-large",
5559
},
5660
{
@@ -74,7 +78,7 @@ const taskData: TaskDataCustom = {
7478
],
7579
summary:
7680
"Document Question Answering (also known as Document Visual Question Answering) is the task of answering questions on document images. Document question answering models take a (document, question) pair as input and return an answer in natural language. Models usually rely on multi-modal features, combining text, position of words (bounding-boxes) and image.",
77-
widgetModels: ["impira/layoutlm-document-qa"],
81+
widgetModels: ["impira/layoutlm-invoices"],
7882
youtubeId: "",
7983
};
8084

packages/tasks/src/tasks/fill-mask/data.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,12 @@ const taskData: TaskDataCustom = {
6161
],
6262
models: [
6363
{
64-
description: "A faster and smaller model than the famous BERT model.",
65-
id: "distilbert-base-uncased",
64+
description: "The famous BERT model.",
65+
id: "google-bert/bert-base-uncased",
6666
},
6767
{
6868
description: "A multilingual model trained on 100 languages.",
69-
id: "xlm-roberta-base",
69+
id: "FacebookAI/xlm-roberta-base",
7070
},
7171
],
7272
spaces: [],

packages/tasks/src/tasks/image-segmentation/data.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ const taskData: TaskDataCustom = {
9292
],
9393
summary:
9494
"Image Segmentation divides an image into segments where each pixel in the image is mapped to an object. This task has multiple variants such as instance segmentation, panoptic segmentation and semantic segmentation.",
95-
widgetModels: ["facebook/detr-resnet-50-panoptic"],
95+
widgetModels: ["nvidia/segformer-b0-finetuned-ade-512-512"],
9696
youtubeId: "dKE8SIt9C-w",
9797
};
9898

packages/tasks/src/tasks/image-to-image/data.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ const taskData: TaskDataCustom = {
9494
],
9595
summary:
9696
"Image-to-image is the task of transforming an input image through a variety of possible manipulations and enhancements, such as super-resolution, image inpainting, colorization, and more.",
97-
widgetModels: ["lllyasviel/sd-controlnet-canny"],
97+
widgetModels: ["stabilityai/stable-diffusion-2-inpainting"],
9898
youtubeId: "",
9999
};
100100

packages/tasks/src/tasks/image-to-text/data.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ const taskData: TaskDataCustom = {
7575
],
7676
summary:
7777
"Image to text models output a text from a given image. Image captioning or optical character recognition can be considered as the most common applications of image to text.",
78-
widgetModels: ["Salesforce/blip-image-captioning-base"],
78+
widgetModels: ["Salesforce/blip-image-captioning-large"],
7979
youtubeId: "",
8080
};
8181

packages/tasks/src/tasks/question-answering/data.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,11 @@ const taskData: TaskDataCustom = {
5252
id: "deepset/roberta-base-squad2",
5353
},
5454
{
55-
description: "A special model that can answer questions from tables!",
55+
description: "Small yet robust model that can answer questions.",
56+
id: "distilbert/distilbert-base-cased-distilled-squad",
57+
},
58+
{
59+
description: "A special model that can answer questions from tables.",
5660
id: "google/tapas-base-finetuned-wtq",
5761
},
5862
],

packages/tasks/src/tasks/sentence-similarity/data.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ const taskData: TaskDataCustom = {
6969
id: "sentence-transformers/all-mpnet-base-v2",
7070
},
7171
{
72-
description: "A multilingual model trained for FAQ retrieval.",
73-
id: "clips/mfaq",
72+
description: "A multilingual robust sentence similarity model..",
73+
id: "BAAI/bge-m3",
7474
},
7575
],
7676
spaces: [
@@ -94,7 +94,7 @@ const taskData: TaskDataCustom = {
9494
],
9595
summary:
9696
"Sentence Similarity is the task of determining how similar two texts are. Sentence similarity models convert input texts into vectors (embeddings) that capture semantic information and calculate how close (similar) they are between them. This task is particularly useful for information retrieval and clustering/grouping.",
97-
widgetModels: ["sentence-transformers/all-MiniLM-L6-v2"],
97+
widgetModels: ["BAAI/bge-small-en-v1.5"],
9898
youtubeId: "VCZq5AkbNEU",
9999
};
100100

packages/tasks/src/tasks/summarization/data.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ const taskData: TaskDataCustom = {
4646
},
4747
{
4848
description: "A summarization model trained on medical articles.",
49-
id: "google/bigbird-pegasus-large-pubmed",
49+
id: "Falconsai/medical_summarization",
5050
},
5151
],
5252
spaces: [
@@ -69,7 +69,7 @@ const taskData: TaskDataCustom = {
6969
],
7070
summary:
7171
"Summarization is the task of producing a shorter version of a document while preserving its important information. Some models can extract text from the original input, while other models can generate entirely new text.",
72-
widgetModels: ["sshleifer/distilbart-cnn-12-6"],
72+
widgetModels: ["facebook/bart-large-cnn"],
7373
youtubeId: "yHnr5Dk2zCI",
7474
};
7575

packages/tasks/src/tasks/text-classification/data.ts

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ const taskData: TaskDataCustom = {
44
datasets: [
55
{
66
description: "A widely used dataset used to benchmark multiple variants of text classification.",
7-
id: "glue",
7+
id: "nyu-mll/glue",
88
},
99
{
1010
description: "A text classification dataset used to benchmark natural language inference models",
11-
id: "snli",
11+
id: "stanfordnlp/snli",
1212
},
1313
],
1414
demo: {
@@ -61,11 +61,23 @@ const taskData: TaskDataCustom = {
6161
models: [
6262
{
6363
description: "A robust model trained for sentiment analysis.",
64-
id: "distilbert-base-uncased-finetuned-sst-2-english",
64+
id: "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
6565
},
6666
{
67-
description: "Multi-genre natural language inference model.",
68-
id: "roberta-large-mnli",
67+
description: "A sentiment analysis model specialized in financial sentiment.",
68+
id: "ProsusAI/finbert",
69+
},
70+
{
71+
description: "A sentiment analysis model specialized in analyzing tweets.",
72+
id: "cardiffnlp/twitter-roberta-base-sentiment-latest",
73+
},
74+
{
75+
description: "A model that can classify languages.",
76+
id: "papluca/xlm-roberta-base-language-detection",
77+
},
78+
{
79+
description: "A model that can classify text generation attacks.",
80+
id: "meta-llama/Prompt-Guard-86M",
6981
},
7082
],
7183
spaces: [
@@ -84,7 +96,7 @@ const taskData: TaskDataCustom = {
8496
],
8597
summary:
8698
"Text Classification is the task of assigning a label or class to a given text. Some use cases are sentiment analysis, natural language inference, and assessing grammatical correctness.",
87-
widgetModels: ["distilbert-base-uncased-finetuned-sst-2-english"],
99+
widgetModels: ["distilbert/distilbert-base-uncased-finetuned-sst-2-english"],
88100
youtubeId: "leNG9fN9FQU",
89101
};
90102

packages/tasks/src/tasks/text-generation/data.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,8 @@ const taskData: TaskDataCustom = {
9797
id: "HuggingFaceH4/zephyr-chat",
9898
},
9999
{
100-
description: "An text generation application that combines OpenAI and Hugging Face models.",
101-
id: "microsoft/HuggingGPT",
100+
description: "A leaderboard that ranks text generation models based on blind votes from people.",
101+
id: "lmsys/chatbot-arena-leaderboard",
102102
},
103103
{
104104
description: "An chatbot to converse with a very powerful text generation model.",
@@ -107,7 +107,7 @@ const taskData: TaskDataCustom = {
107107
],
108108
summary:
109109
"Generating text is the task of generating new text given another text. These models can, for example, fill in incomplete text or paraphrase.",
110-
widgetModels: ["HuggingFaceH4/zephyr-7b-beta"],
110+
widgetModels: ["mistralai/Mistral-Nemo-Instruct-2407"],
111111
youtubeId: "e9gNEAlsOvU",
112112
};
113113

packages/tasks/src/tasks/text-to-image/data.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ const taskData: TaskDataCustom = {
9393
],
9494
summary:
9595
"Generates images from input text. These models can be used to generate and modify images based on text prompts.",
96-
widgetModels: ["CompVis/stable-diffusion-v1-4"],
96+
widgetModels: ["black-forest-labs/FLUX.1-dev"],
9797
youtubeId: "",
9898
};
9999

packages/tasks/src/tasks/text-to-speech/data.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ const taskData: TaskDataCustom = {
99
},
1010
{
1111
description: "Multi-speaker English dataset.",
12-
id: "LibriTTS",
12+
id: "mythicinfinity/libritts_r",
1313
},
1414
],
1515
demo: {
@@ -36,11 +36,15 @@ const taskData: TaskDataCustom = {
3636
models: [
3737
{
3838
description: "A powerful TTS model.",
39-
id: "suno/bark",
39+
id: "parler-tts/parler-tts-large-v1",
4040
},
4141
{
4242
description: "A massively multi-lingual TTS model.",
43-
id: "facebook/mms-tts",
43+
id: "coqui/XTTS-v2",
44+
},
45+
{
46+
description: "Robust TTS model.",
47+
id: "metavoiceio/metavoice-1B-v0.1",
4448
},
4549
{
4650
description: "A prompt based, powerful TTS model.",

packages/tasks/src/tasks/token-classification/data.ts

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@ const taskData: TaskDataCustom = {
44
datasets: [
55
{
66
description: "A widely used dataset useful to benchmark named entity recognition models.",
7-
id: "conll2003",
7+
id: "eriktks/conll2003",
88
},
99
{
1010
description:
1111
"A multilingual dataset of Uncyclopedia articles annotated for named entity recognition in over 150 different languages.",
12-
id: "wikiann",
12+
id: "unimelb-nlp/wikiann",
1313
},
1414
],
1515
demo: {
@@ -63,6 +63,14 @@ const taskData: TaskDataCustom = {
6363
"A robust performance model to identify people, locations, organizations and names of miscellaneous entities.",
6464
id: "dslim/bert-base-NER",
6565
},
66+
{
67+
description: "A strong model to identify people, locations, organizations and names in multiple languages.",
68+
id: "FacebookAI/xlm-roberta-large-finetuned-conll03-english",
69+
},
70+
{
71+
description: "A token classification model specialized on medical entity recognition.",
72+
id: "blaze999/Medical-NER",
73+
},
6674
{
6775
description: "Flair models are typically the state of the art in named entity recognition tasks.",
6876
id: "flair/ner-english",
@@ -77,7 +85,7 @@ const taskData: TaskDataCustom = {
7785
],
7886
summary:
7987
"Token classification is a natural language understanding task in which a label is assigned to some tokens in a text. Some popular token classification subtasks are Named Entity Recognition (NER) and Part-of-Speech (PoS) tagging. NER models could be trained to identify specific entities in a text, such as dates, individuals and places; and PoS tagging would identify, for example, which words in a text are verbs, nouns, and punctuation marks.",
80-
widgetModels: ["dslim/bert-base-NER"],
88+
widgetModels: ["FacebookAI/xlm-roberta-large-finetuned-conll03-english"],
8189
youtubeId: "wVHdVlPScxA",
8290
};
8391

packages/tasks/src/tasks/translation/data.ts

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@ const taskData: TaskDataCustom = {
55
datasets: [
66
{
77
description: "A dataset of copyright-free books translated into 16 different languages.",
8-
id: "opus_books",
8+
id: "Helsinki-NLP/opus_books",
99
},
1010
{
1111
description:
1212
"An example of translation between programming languages. This dataset consists of functions in Java and C#.",
13-
id: "code_x_glue_cc_code_to_code_trans",
13+
id: "google/code_x_glue_cc_code_to_code_trans",
1414
},
1515
],
1616
demo: {
@@ -42,13 +42,14 @@ const taskData: TaskDataCustom = {
4242
],
4343
models: [
4444
{
45-
description: "A model that translates from English to French.",
46-
id: "Helsinki-NLP/opus-mt-en-fr",
45+
description:
46+
"Very powerful model that can translate many languages between each other, especially low-resource languages.",
47+
id: "facebook/nllb-200-1.3B",
4748
},
4849
{
4950
description:
5051
"A general-purpose Transformer that can be used to translate from English to German, French, or Romanian.",
51-
id: "t5-base",
52+
id: "google-t5/t5-base",
5253
},
5354
],
5455
spaces: [
@@ -57,12 +58,12 @@ const taskData: TaskDataCustom = {
5758
id: "Iker/Translate-100-languages",
5859
},
5960
{
60-
description: "An application that can translate between English, Spanish and Hindi.",
61-
id: "EuroPython2022/Translate-with-Bloom",
61+
description: "An application that can translate between many languages.",
62+
id: "Geonmo/nllb-translation-demo",
6263
},
6364
],
6465
summary: "Translation is the task of converting text from one language to another.",
65-
widgetModels: ["t5-small"],
66+
widgetModels: ["facebook/mbart-large-50-many-to-many-mmt"],
6667
youtubeId: "1JvfrvZgi6c",
6768
};
6869

0 commit comments

Comments
 (0)