Skip to content

feat. Add text-to-video - Wan2.1-T2V-14B from Novita #1263

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions packages/inference/src/providers/novita.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
*/
import type { ProviderConfig, UrlParams, HeaderParams, BodyParams } from "../types";

const NOVITA_API_BASE_URL = "https://api.novita.ai/v3/openai";
const NOVITA_API_BASE_URL = "https://api.novita.ai";

const makeBody = (params: BodyParams): Record<string, unknown> => {
return {
Expand All @@ -32,9 +32,11 @@ const makeHeaders = (params: HeaderParams): Record<string, string> => {
const makeUrl = (params: UrlParams): string => {
if (params.task === "text-generation") {
if (params.chatCompletion) {
return `${params.baseUrl}/chat/completions`;
return `${params.baseUrl}/v3/openai/chat/completions`;
}
return `${params.baseUrl}/completions`;
return `${params.baseUrl}/v3/openai/completions`;
} else if (params.task === "text-to-video") {
return `${params.baseUrl}/v3/hf/${params.model}`;
}
return params.baseUrl;
};
Expand Down
30 changes: 25 additions & 5 deletions packages/inference/src/tasks/cv/textToVideo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,13 @@ interface ReplicateOutput {
output: string;
}

const SUPPORTED_PROVIDERS = ["fal-ai", "replicate"] as const satisfies readonly InferenceProvider[];
interface NovitaOutput {
video: {
video_url: string;
};
}

const SUPPORTED_PROVIDERS = ["fal-ai", "novita", "replicate"] as const satisfies readonly InferenceProvider[];

export async function textToVideo(args: TextToVideoArgs, options?: Options): Promise<TextToVideoOutput> {
if (!args.provider || !typedInclude(SUPPORTED_PROVIDERS, args.provider)) {
Expand All @@ -30,14 +36,13 @@ export async function textToVideo(args: TextToVideoArgs, options?: Options): Pro
}

const payload =
args.provider === "fal-ai" || args.provider === "replicate"
args.provider === "fal-ai" || args.provider === "replicate" || args.provider === "novita"
? { ...omit(args, ["inputs", "parameters"]), ...args.parameters, prompt: args.inputs }
: args;
const res = await request<FalAiOutput | ReplicateOutput>(payload, {
const res = await request<FalAiOutput | ReplicateOutput | NovitaOutput>(payload, {
...options,
task: "text-to-video",
});

if (args.provider === "fal-ai") {
const isValidOutput =
typeof res === "object" &&
Expand All @@ -51,7 +56,22 @@ export async function textToVideo(args: TextToVideoArgs, options?: Options): Pro
if (!isValidOutput) {
throw new InferenceOutputError("Expected { video: { url: string } }");
}
const urlResponse = await fetch(res.video.url);
const urlResponse = await fetch((res as FalAiOutput).video.url);
return await urlResponse.blob();
} else if (args.provider === "novita") {
const isValidOutput =
typeof res === "object" &&
!!res &&
"video" in res &&
typeof res.video === "object" &&
!!res.video &&
"video_url" in res.video &&
typeof res.video.video_url === "string" &&
isUrl(res.video.video_url);
if (!isValidOutput) {
throw new InferenceOutputError("Expected { video: { video_url: string } }");
}
const urlResponse = await fetch((res as NovitaOutput).video.video_url);
return await urlResponse.blob();
} else {
/// TODO: Replicate: handle the case where the generation request "times out" / is async (ie output is null)
Expand Down