Update ASR task page (#1015)

Deep-unlearning · Steven Zheng · Vaibhavs10 · web-flow · commit cd0e7c619206 · 2024-11-15T13:37:06.000+01:00
Co-authored-by: Steven Zheng &lt;steven@MacBook-Pro-de-Steven.local&gt;
Co-authored-by: vb &lt;vaibhavs10@gmail.com&gt;
diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/data.ts b/packages/tasks/src/tasks/automatic-speech-recognition/data.ts
@@ -6,12 +6,16 @@ const taskData: TaskDataCustom = {
 			description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
 			id: "mozilla-foundation/common_voice_17_0",
 		},
+		{
+			description: "Multilingual and diverse audio dataset with 101k hours of audio.",
+			id: "amphion/Emilia-Dataset",
+		},
 		{
 			description: "A dataset with 44.6k hours of English speaker data and 6k hours of other language speakers.",
 			id: "parler-tts/mls_eng",
 		},
 		{
-			description: "A multi-lingual audio dataset with 370K hours of audio.",
+			description: "A multilingual audio dataset with 370K hours of audio.",
 			id: "espnet/yodas",
 		},
 	],
@@ -54,6 +58,10 @@ const taskData: TaskDataCustom = {
 			description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
 			id: "facebook/seamless-m4t-v2-large",
 		},
+		{
+			description: "A powerful multilingual ASR and Speech Translation model by Nvidia.",
+			id: "nvidia/canary-1b",
+		},
 		{
 			description: "Powerful speaker diarization model.",
 			id: "pyannote/speaker-diarization-3.1",
@@ -65,13 +73,17 @@ const taskData: TaskDataCustom = {
 			id: "hf-audio/whisper-large-v3",
 		},
 		{
-			description: "Fastest speech recognition application.",
-			id: "sanchit-gandhi/whisper-jax",
+			description: "Latest ASR model from Useful Sensors.",
+			id: "mrfakename/Moonshinex",
 		},
 		{
 			description: "A high quality speech and text translation model by Meta.",
 			id: "facebook/seamless_m4t",
 		},
+		{
+			description: "A powerful multilingual ASR and Speech Translation model by Nvidia",
+			id: "nvidia/canary-1b",
+		},
 	],
 	summary:
 		"Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.",