Skip to content

Tasks: update new models and Spaces #980

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions packages/tasks/src/tasks/depth-estimation/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,15 @@ const taskData: TaskDataCustom = {
},
{
description: "A strong monocular depth estimation model.",
id: "Bingxin/Marigold",
id: "jingheya/lotus-depth-g-v1-0",
},
{
description: "A metric depth estimation model trained on NYU dataset.",
id: "Intel/zoedepth-nyu",
description: "A depth estimation model that predicts depth in videos.",
id: "tencent/DepthCrafter",
},
{
description: "A robust depth estimation model.",
id: "apple/DepthPro",
},
],
spaces: [
Expand All @@ -46,12 +50,16 @@ const taskData: TaskDataCustom = {
id: "radames/dpt-depth-estimation-3d-voxels",
},
{
description: "An application on cutting-edge depth estimation.",
id: "depth-anything/Depth-Anything-V2",
description: "An application for bleeding-edge depth estimation.",
id: "akhaliq/depth-pro",
},
{
description: "An application on cutting-edge depth estimation in videos.",
id: "tencent/DepthCrafter",
},
{
description: "An application to try state-of-the-art depth estimation.",
id: "merve/compare_depth_models",
description: "A human-centric depth estimation application.",
id: "facebook/sapiens-depth",
},
],
summary: "Depth estimation is the task of predicting depth of the objects present in an image.",
Expand Down
10 changes: 5 additions & 5 deletions packages/tasks/src/tasks/image-segmentation/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ const taskData: TaskDataCustom = {
id: "ZhengPeng7/BiRefNet",
},
{
description: "Semantic segmentation model trained on ADE20k dataset.",
id: "nvidia/segformer-b0-finetuned-ade-512-512",
description: "Powerful human-centric image segmentation model.",
id: "facebook/sapiens-seg-1b",
},
{
description: "Panoptic segmentation model trained COCO (common objects) dataset.",
description: "Panoptic segmentation model trained on the COCO (common objects) dataset.",
id: "facebook/mask2former-swin-large-coco-panoptic",
},
],
Expand All @@ -75,8 +75,8 @@ const taskData: TaskDataCustom = {
id: "jbrinkma/segment-anything",
},
{
description: "A semantic segmentation application that predicts human silhouettes.",
id: "keras-io/Human-Part-Segmentation",
description: "A human-centric segmentation model.",
id: "facebook/sapiens-pose",
},
{
description: "An instance segmentation application to predict neuronal cell types from microscopy images.",
Expand Down
24 changes: 16 additions & 8 deletions packages/tasks/src/tasks/image-text-to-text/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,20 +47,24 @@ const taskData: TaskDataCustom = {
id: "meta-llama/Llama-3.2-11B-Vision-Instruct",
},
{
description: "Cutting-edge conversational vision language model that can take multiple image inputs.",
id: "HuggingFaceM4/idefics2-8b-chatty",
description: "Cutting-edge vision language models.",
id: "allenai/Molmo-7B-D-0924",
},
{
description: "Small yet powerful model.",
id: "vikhyatk/moondream2",
},
{
description: "Strong image-text-to-text model made to understand documents.",
id: "mPLUG/DocOwl1.5",
description: "Strong image-text-to-text model.",
id: "Qwen/Qwen2-VL-7B-Instruct",
},
{
description: "Strong image-text-to-text model.",
id: "microsoft/Phi-3.5-vision-instruct",
id: "mistralai/Pixtral-12B-2409",
},
{
description: "Strong image-text-to-text model focused on documents.",
id: "stepfun-ai/GOT-OCR2_0",
},
],
spaces: [
Expand All @@ -74,15 +78,19 @@ const taskData: TaskDataCustom = {
},
{
description: "Powerful vision-language model assistant.",
id: "liuhaotian/LLaVA-1.6",
id: "akhaliq/Molmo-7B-D-0924",
},
{
description: "An image-text-to-text application focused on documents.",
id: "stepfun-ai/GOT_official_online_demo",
},
{
description: "An application to compare outputs of different vision language models.",
id: "merve/compare_VLMs",
},
{
description: "An application for document vision language tasks.",
id: "mPLUG/DocOwl",
description: "An application for chatting with an image-text-to-text model.",
id: "GanymedeNil/Qwen2-VL-7B",
},
],
summary:
Expand Down
2 changes: 1 addition & 1 deletion packages/tasks/src/tasks/keypoint-detection/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ const taskData: TaskDataCustom = {
},
{
description: "Strong keypoint detection model used to detect human pose.",
id: "qualcomm/MediaPipe-Pose-Estimation",
id: "facebook/sapiens-pose-1b",
},
],
spaces: [
Expand Down
14 changes: 7 additions & 7 deletions packages/tasks/src/tasks/text-generation/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,6 @@ const taskData: TaskDataCustom = {
description: "A text-generation model trained to follow instructions.",
id: "google/gemma-2-2b-it",
},
{
description: "A code generation model that can generate code in 80+ languages.",
id: "bigcode/starcoder",
},
{
description: "Very powerful text generation model trained to follow instructions.",
id: "meta-llama/Meta-Llama-3.1-8B-Instruct",
Expand All @@ -75,19 +71,23 @@ const taskData: TaskDataCustom = {
id: "AI-MO/NuminaMath-7B-TIR",
},
{
description: "Strong coding assistant model.",
id: "HuggingFaceH4/starchat2-15b-v0.1",
description: "Strong text generation model to follow instructions.",
id: "Qwen/Qwen2.5-7B-Instruct",
},
{
description: "Very strong open-source large language model.",
id: "mistralai/Mistral-Nemo-Instruct-2407",
id: "nvidia/Llama-3.1-Nemotron-70B-Instruct",
},
],
spaces: [
{
description: "A leaderboard to compare different open-source text generation models based on various benchmarks.",
id: "open-llm-leaderboard/open_llm_leaderboard",
},
{
description: "A leaderboard for comparing chain-of-thought performance of models.",
id: "logikon/open_cot_leaderboard",
},
{
description: "An text generation based application based on a very powerful LLaMA2 model.",
id: "ysharma/Explore_llamav2_with_TGI",
Expand Down
4 changes: 2 additions & 2 deletions packages/tasks/src/tasks/text-to-image/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ const taskData: TaskDataCustom = {
id: "jbilcke-hf/ai-comic-factory",
},
{
description: "A text-to-image application that can generate coherent text inside the image.",
id: "DeepFloyd/IF",
description: "An application to match multiple custom image generation models.",
id: "multimodalart/flux-lora-lab",
},
{
description: "A powerful yet very fast image generation application.",
Expand Down
6 changes: 5 additions & 1 deletion packages/tasks/src/tasks/text-to-speech/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,13 @@ const taskData: TaskDataCustom = {
id: "suno/bark",
},
{
description: "XTTS is a Voice generation model that lets you clone voices into different languages.",
description: "An application on XTTS, a voice generation model that lets you clone voices into different languages.",
id: "coqui/xtts",
},
{
description: "An application that generates speech in different styles in English and Chinese.",
id: "mrfakename/E2-F5-TTS",
},
{
description: "An application that synthesizes speech for diverse speaker prompts.",
id: "parler-tts/parler_tts_mini",
Expand Down
20 changes: 10 additions & 10 deletions packages/tasks/src/tasks/text-to-video/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,30 +67,30 @@ const taskData: TaskDataCustom = {
],
models: [
{
description: "A strong model for video generation.",
id: "Vchitect/LaVie",
description: "A strong model for consistent video generation.",
id: "rain1011/pyramid-flow-sd3",
},
{
description: "A robust model for text-to-video generation.",
id: "damo-vilab/text-to-video-ms-1.7b",
id: "VideoCrafter/VideoCrafter2",
},
{
description: "A text-to-video generation model with high quality and smooth outputs.",
id: "hotshotco/Hotshot-XL",
description: "A cutting-edge text-to-video generation model.",
id: "TIGER-Lab/T2V-Turbo-V2",
},
],
spaces: [
{
description: "An application that generates video from text.",
id: "fffiloni/zeroscope",
id: "VideoCrafter/VideoCrafter",
},
{
description: "An application that generates video from image and text.",
id: "Vchitect/LaVie",
description: "Consistent video generation application.",
id: "TIGER-Lab/T2V-Turbo-V2",
},
{
description: "An application that generates videos from text and provides multi-model support.",
id: "ArtGAN/Video-Diffusion-WebUI",
description: "A cutting edge video generation application.",
id: "Pyramid-Flow/pyramid-flow",
},
],
summary:
Expand Down
8 changes: 8 additions & 0 deletions packages/tasks/src/tasks/video-text-to-text/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ const taskData: TaskDataCustom = {
description: "A dataset of instructions and question-answer pairs about videos.",
id: "lmms-lab/VideoChatGPT",
},
{
description: "Large video understanding dataset.",
id: "HuggingFaceFV/finevideo",
},
],
demo: {
inputs: [
Expand Down Expand Up @@ -48,6 +52,10 @@ const taskData: TaskDataCustom = {
description: "An application to chat with a video-text-to-text model.",
id: "llava-hf/video-llava",
},
{
description: "A leaderboard for various video-text-to-text models.",
id: "opencompass/openvlm_video_leaderboard",
},
],
summary:
"Video-text-to-text models take in a video and a text prompt and output text. These models are also called video-language models.",
Expand Down
Loading