Skip to content

Commit 57154a5

Browse files
authored
Feat/novita (#1194)
Add new model provider Novita AI
1 parent 56ffe40 commit 57154a5

File tree

8 files changed

+148
-0
lines changed

8 files changed

+148
-0
lines changed

.github/workflows/test.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ jobs:
4646
HF_REPLICATE_KEY: dummy
4747
HF_SAMBANOVA_KEY: dummy
4848
HF_TOGETHER_KEY: dummy
49+
HF_NOVITA_KEY: dummy
4950
HF_FIREWORKS_KEY: dummy
5051

5152
browser:
@@ -88,6 +89,7 @@ jobs:
8889
HF_REPLICATE_KEY: dummy
8990
HF_SAMBANOVA_KEY: dummy
9091
HF_TOGETHER_KEY: dummy
92+
HF_NOVITA_KEY: dummy
9193
HF_FIREWORKS_KEY: dummy
9294

9395
e2e:
@@ -157,4 +159,5 @@ jobs:
157159
HF_REPLICATE_KEY: dummy
158160
HF_SAMBANOVA_KEY: dummy
159161
HF_TOGETHER_KEY: dummy
162+
HF_NOVITA_KEY: dummy
160163
HF_FIREWORKS_KEY: dummy

packages/inference/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ Currently, we support the following providers:
5050
- [Fal.ai](https://fal.ai)
5151
- [Fireworks AI](https://fireworks.ai)
5252
- [Nebius](https://studio.nebius.ai)
53+
- [Novita](https://novita.ai/?utm_source=github_huggingface&utm_medium=github_readme&utm_campaign=link)
5354
- [Replicate](https://replicate.com)
5455
- [Sambanova](https://sambanova.ai)
5556
- [Together](https://together.xyz)

packages/inference/src/lib/makeRequestOptions.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { NEBIUS_API_BASE_URL } from "../providers/nebius";
44
import { REPLICATE_API_BASE_URL } from "../providers/replicate";
55
import { SAMBANOVA_API_BASE_URL } from "../providers/sambanova";
66
import { TOGETHER_API_BASE_URL } from "../providers/together";
7+
import { NOVITA_API_BASE_URL } from "../providers/novita";
78
import { FIREWORKS_AI_API_BASE_URL } from "../providers/fireworks-ai";
89
import type { InferenceProvider } from "../types";
910
import type { InferenceTask, Options, RequestArgs } from "../types";
@@ -206,6 +207,7 @@ function makeUrl(params: {
206207
}
207208
return baseUrl;
208209
}
210+
209211
case "fireworks-ai": {
210212
const baseUrl = shouldProxy
211213
? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider)
@@ -215,6 +217,18 @@ function makeUrl(params: {
215217
}
216218
return baseUrl;
217219
}
220+
case "novita": {
221+
const baseUrl = shouldProxy
222+
? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider)
223+
: NOVITA_API_BASE_URL;
224+
if (params.taskHint === "text-generation") {
225+
if (params.chatCompletion) {
226+
return `${baseUrl}/chat/completions`;
227+
}
228+
return `${baseUrl}/completions`;
229+
}
230+
return baseUrl;
231+
}
218232
default: {
219233
const baseUrl = HF_HUB_INFERENCE_PROXY_TEMPLATE.replaceAll("{{PROVIDER}}", "hf-inference");
220234
if (params.taskHint === "text-generation" && params.chatCompletion) {

packages/inference/src/providers/consts.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,5 @@ export const HARDCODED_MODEL_ID_MAPPING: Record<InferenceProvider, Record<ModelI
2323
replicate: {},
2424
sambanova: {},
2525
together: {},
26+
novita: {},
2627
};
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
export const NOVITA_API_BASE_URL = "https://api.novita.ai/v3/openai";
2+
3+
/**
4+
* See the registered mapping of HF model ID => Novita model ID here:
5+
*
6+
* https://huggingface.co/api/partners/novita/models
7+
*
8+
* This is a publicly available mapping.
9+
*
10+
* If you want to try to run inference for a new model locally before it's registered on huggingface.co,
11+
* you can add it to the dictionary "HARDCODED_MODEL_ID_MAPPING" in consts.ts, for dev purposes.
12+
*
13+
* - If you work at Novita and want to update this mapping, please use the model mapping API we provide on huggingface.co
14+
* - If you're a community member and want to add a new supported HF model to Novita, please open an issue on the present repo
15+
* and we will tag Novita team members.
16+
*
17+
* Thanks!
18+
*/

packages/inference/src/types.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ export const INFERENCE_PROVIDERS = [
3636
"replicate",
3737
"sambanova",
3838
"together",
39+
"novita",
3940
] as const;
41+
4042
export type InferenceProvider = (typeof INFERENCE_PROVIDERS)[number];
4143

4244
export interface BaseArgs {

packages/inference/test/HfInference.spec.ts

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,4 +1166,52 @@ describe.concurrent("HfInference", () => {
11661166
},
11671167
TIMEOUT
11681168
);
1169+
1170+
describe.concurrent(
1171+
"Novita",
1172+
() => {
1173+
const client = new HfInference(env.HF_NOVITA_KEY);
1174+
1175+
HARDCODED_MODEL_ID_MAPPING["novita"] = {
1176+
"meta-llama/llama-3.1-8b-instruct": "meta-llama/llama-3.1-8b-instruct",
1177+
"deepseek/deepseek-r1-distill-qwen-14b": "deepseek/deepseek-r1-distill-qwen-14b",
1178+
};
1179+
1180+
it("chatCompletion", async () => {
1181+
const res = await client.chatCompletion({
1182+
model: "meta-llama/llama-3.1-8b-instruct",
1183+
provider: "novita",
1184+
messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
1185+
});
1186+
if (res.choices && res.choices.length > 0) {
1187+
const completion = res.choices[0].message?.content;
1188+
expect(completion).toContain("two");
1189+
}
1190+
});
1191+
1192+
it("chatCompletion stream", async () => {
1193+
const stream = client.chatCompletionStream({
1194+
model: "deepseek/deepseek-r1-distill-qwen-14b",
1195+
provider: "novita",
1196+
messages: [{ role: "user", content: "Say this is a test" }],
1197+
stream: true,
1198+
}) as AsyncGenerator<ChatCompletionStreamOutput>;
1199+
1200+
let fullResponse = "";
1201+
for await (const chunk of stream) {
1202+
if (chunk.choices && chunk.choices.length > 0) {
1203+
const content = chunk.choices[0].delta?.content;
1204+
if (content) {
1205+
fullResponse += content;
1206+
}
1207+
}
1208+
}
1209+
1210+
// Verify we got a meaningful response
1211+
expect(fullResponse).toBeTruthy();
1212+
expect(fullResponse.length).toBeGreaterThan(0);
1213+
});
1214+
},
1215+
TIMEOUT
1216+
);
11691217
});

packages/inference/test/tapes.json

Lines changed: 61 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)