Skip to content

Commit cd0e7c6

Browse files
Deep-unlearningSteven ZhengVaibhavs10
authored
Update ASR task page (#1015)
Co-authored-by: Steven Zheng <[email protected]> Co-authored-by: vb <[email protected]>
1 parent 58bbe80 commit cd0e7c6

File tree

1 file changed

+15
-3
lines changed
  • packages/tasks/src/tasks/automatic-speech-recognition

1 file changed

+15
-3
lines changed

packages/tasks/src/tasks/automatic-speech-recognition/data.ts

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,16 @@ const taskData: TaskDataCustom = {
66
description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
77
id: "mozilla-foundation/common_voice_17_0",
88
},
9+
{
10+
description: "Multilingual and diverse audio dataset with 101k hours of audio.",
11+
id: "amphion/Emilia-Dataset",
12+
},
913
{
1014
description: "A dataset with 44.6k hours of English speaker data and 6k hours of other language speakers.",
1115
id: "parler-tts/mls_eng",
1216
},
1317
{
14-
description: "A multi-lingual audio dataset with 370K hours of audio.",
18+
description: "A multilingual audio dataset with 370K hours of audio.",
1519
id: "espnet/yodas",
1620
},
1721
],
@@ -54,6 +58,10 @@ const taskData: TaskDataCustom = {
5458
description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
5559
id: "facebook/seamless-m4t-v2-large",
5660
},
61+
{
62+
description: "A powerful multilingual ASR and Speech Translation model by Nvidia.",
63+
id: "nvidia/canary-1b",
64+
},
5765
{
5866
description: "Powerful speaker diarization model.",
5967
id: "pyannote/speaker-diarization-3.1",
@@ -65,13 +73,17 @@ const taskData: TaskDataCustom = {
6573
id: "hf-audio/whisper-large-v3",
6674
},
6775
{
68-
description: "Fastest speech recognition application.",
69-
id: "sanchit-gandhi/whisper-jax",
76+
description: "Latest ASR model from Useful Sensors.",
77+
id: "mrfakename/Moonshinex",
7078
},
7179
{
7280
description: "A high quality speech and text translation model by Meta.",
7381
id: "facebook/seamless_m4t",
7482
},
83+
{
84+
description: "A powerful multilingual ASR and Speech Translation model by Nvidia",
85+
id: "nvidia/canary-1b",
86+
},
7587
],
7688
summary:
7789
"Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.",

0 commit comments

Comments
 (0)