Skip to content

Commit afc250e

Browse files
authored
feat: Add audio-text-to-text task. (#1033)
There's an increasing number of Audio LMs: 1. https://huggingface.co/collections/reach-vb/ultravox-audio-language-model-release-67373b602af0a52b2a88ae71 2. https://huggingface.co/homebrewltd And.. many more. Will Open PRs after this is merged.
1 parent 72b0d9b commit afc250e

File tree

3 files changed

+9
-0
lines changed

3 files changed

+9
-0
lines changed

packages/tasks/src/pipelines.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,12 @@ export const PIPELINE_DATA = {
355355
modality: "audio",
356356
color: "green",
357357
},
358+
"audio-text-to-text": {
359+
name: "Audio-Text-to-Text",
360+
modality: "multimodal",
361+
color: "red",
362+
hideInDatasets: true,
363+
},
358364
"voice-activity-detection": {
359365
name: "Voice Activity Detection",
360366
modality: "audio",

packages/tasks/src/tasks/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
116116
"audio-classification": ["speechbrain", "transformers", "transformers.js"],
117117
"audio-to-audio": ["asteroid", "fairseq", "speechbrain"],
118118
"automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
119+
"audio-text-to-text": [],
119120
"depth-estimation": ["transformers", "transformers.js"],
120121
"document-question-answering": ["transformers", "transformers.js"],
121122
"feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
@@ -197,6 +198,7 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
197198
"any-to-any": getData("any-to-any", placeholder),
198199
"audio-classification": getData("audio-classification", audioClassification),
199200
"audio-to-audio": getData("audio-to-audio", audioToAudio),
201+
"audio-text-to-text": getData("audio-text-to-text", placeholder),
200202
"automatic-speech-recognition": getData("automatic-speech-recognition", automaticSpeechRecognition),
201203
"depth-estimation": getData("depth-estimation", depthEstimation),
202204
"document-question-answering": getData("document-question-answering", documentQuestionAnswering),

packages/widgets/src/lib/components/PipelineIcon/PipelineIcon.svelte

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
"automatic-speech-recognition": IconAutomaticSpeechRecognition,
7474
"audio-to-audio": IconAudioToAudio,
7575
"audio-classification": IconAudioClassification,
76+
"audio-text-to-text": IconAudioToAudio,
7677
"voice-activity-detection": IconVoiceActivityDetection,
7778
"depth-estimation": IconDepthEstimation,
7879
"image-classification": IconImageClassification,

0 commit comments

Comments
 (0)