Skip to content

Commit 33771e6

Browse files
Kaihuang724julien-c
authored andcommitted
draft: add hyperbolic support
1 parent 18bd1f5 commit 33771e6

File tree

7 files changed

+130
-6
lines changed

7 files changed

+130
-6
lines changed

.github/workflows/test.yml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,12 @@ jobs:
4242
env:
4343
HF_TOKEN: ${{ secrets.HF_TOKEN }}
4444
HF_FAL_KEY: dummy
45+
HF_FIREWORKS_KEY: dummy
46+
HF_HYPERBOLIC_KEY: dummy
4547
HF_NEBIUS_KEY: dummy
4648
HF_REPLICATE_KEY: dummy
4749
HF_SAMBANOVA_KEY: dummy
4850
HF_TOGETHER_KEY: dummy
49-
HF_FIREWORKS_KEY: dummy
50-
5151
browser:
5252
runs-on: ubuntu-latest
5353
timeout-minutes: 10
@@ -84,12 +84,12 @@ jobs:
8484
env:
8585
HF_TOKEN: ${{ secrets.HF_TOKEN }}
8686
HF_FAL_KEY: dummy
87+
HF_FIREWORKS_KEY: dummy
88+
HF_HYPERBOLIC_KEY: dummy
8789
HF_NEBIUS_KEY: dummy
8890
HF_REPLICATE_KEY: dummy
8991
HF_SAMBANOVA_KEY: dummy
9092
HF_TOGETHER_KEY: dummy
91-
HF_FIREWORKS_KEY: dummy
92-
9393
e2e:
9494
runs-on: ubuntu-latest
9595
timeout-minutes: 10
@@ -153,8 +153,9 @@ jobs:
153153
NPM_CONFIG_REGISTRY: http://localhost:4874/
154154
HF_TOKEN: ${{ secrets.HF_TOKEN }}
155155
HF_FAL_KEY: dummy
156+
HF_FIREWORKS_KEY: dummy
157+
HF_HYPERBOLIC_KEY: dummy
156158
HF_NEBIUS_KEY: dummy
157159
HF_REPLICATE_KEY: dummy
158160
HF_SAMBANOVA_KEY: dummy
159161
HF_TOGETHER_KEY: dummy
160-
HF_FIREWORKS_KEY: dummy

packages/inference/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ You can send inference requests to third-party providers with the inference clie
4949
Currently, we support the following providers:
5050
- [Fal.ai](https://fal.ai)
5151
- [Fireworks AI](https://fireworks.ai)
52+
- [Hyperbolic](https://hyperbolic.xyz)
5253
- [Nebius](https://studio.nebius.ai)
5354
- [Replicate](https://replicate.com)
5455
- [Sambanova](https://sambanova.ai)
@@ -72,6 +73,7 @@ When authenticated with a third-party provider key, the request is made directly
7273
Only a subset of models are supported when requesting third-party providers. You can check the list of supported models per pipeline tasks here:
7374
- [Fal.ai supported models](https://huggingface.co/api/partners/fal-ai/models)
7475
- [Fireworks AI supported models](https://huggingface.co/api/partners/fireworks-ai/models)
76+
- [Hyperbolic supported models](https://huggingface.co/api/partners/hyperbolic/models)
7577
- [Nebius supported models](https://huggingface.co/api/partners/nebius/models)
7678
- [Replicate supported models](https://huggingface.co/api/partners/replicate/models)
7779
- [Sambanova supported models](https://huggingface.co/api/partners/sambanova/models)

packages/inference/src/lib/makeRequestOptions.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import { REPLICATE_API_BASE_URL } from "../providers/replicate";
55
import { SAMBANOVA_API_BASE_URL } from "../providers/sambanova";
66
import { TOGETHER_API_BASE_URL } from "../providers/together";
77
import { FIREWORKS_AI_API_BASE_URL } from "../providers/fireworks-ai";
8+
import { HYPERBOLIC_API_BASE_URL } from "../providers/hyperbolic";
9+
810
import type { InferenceProvider } from "../types";
911
import type { InferenceTask, Options, RequestArgs } from "../types";
1012
import { isUrl } from "./isUrl";
@@ -234,6 +236,11 @@ function makeUrl(params: {
234236
return `${baseUrl}/v1/chat/completions`;
235237
}
236238
return baseUrl;
239+
case "hyperbolic": {
240+
const baseUrl = shouldProxy
241+
? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider)
242+
: HYPERBOLIC_API_BASE_URL;
243+
return `${baseUrl}/v1/chat/completions`;
237244
}
238245
default: {
239246
const baseUrl = HF_HUB_INFERENCE_PROXY_TEMPLATE.replaceAll("{{PROVIDER}}", "hf-inference");
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import type { ProviderMapping } from "./types";
2+
3+
export const HYPERBOLIC_API_BASE_URL = "https://api.hyperbolic.xyz";
4+
5+
type HyperbolicId = string;
6+
7+
/**
8+
* https://docs.together.ai/reference/models-1
9+
*/
10+
export const HYPERBOLIC_SUPPORTED_MODEL_IDS: ProviderMapping<HyperbolicId> = {
11+
"text-to-image": {
12+
"black-forest-labs/FLUX.1-dev": "black-forest-labs/FLUX.1-dev",
13+
"stabilityai/stable-diffusion-xl-base-1.0": "SDXL1.0-base",
14+
"stable-diffusion-v1-5/stable-diffusion-v1-5": "stable-diffusion-v1-5/stable-diffusion-v1-5",
15+
"segmind/SSD-1B": "segmind/SSD-1B",
16+
"stabilityai/stable-diffusion-2": "stabilityai/stable-diffusion-2",
17+
"stabilityai/sdxl-turbo": "stabilityai/sdxl-turbo",
18+
},
19+
"image-text-to-text": {
20+
"Qwen/Qwen2-VL-72B-Instruct": "Qwen/Qwen2-VL-72B-Instruct",
21+
"mistralai/Pixtral-12B-2409": "mistralai/Pixtral-12B-2409",
22+
"Qwen/Qwen2-VL-7B-Instruct": "Qwen/Qwen2-VL-7B-Instruct",
23+
},
24+
"text-generation": {
25+
"meta-llama/Llama-3.1-405B-BASE-BF16": "meta-llama/Llama-3.1-405B-BASE-BF16",
26+
"meta-llama/Llama-3.1-405B-BASE-FP8": "meta-llama/Llama-3.1-405B-BASE-FP8",
27+
"Qwen/Qwen2.5-72B-Instruct": "Qwen/Qwen2.5-72B-Instruct-BF16",
28+
},
29+
"text-to-audio": {
30+
"myshell-ai/MeloTTS-English-v3": "myshell-ai/MeloTTS-English-v3",
31+
},
32+
conversational: {
33+
"deepseek-ai/DeepSeek-R1": "deepseek-ai/DeepSeek-R1",
34+
"deepseek-ai/DeepSeek-R1-Zero": "deepseek-ai/DeepSeek-R1-Zero",
35+
"deepseek-ai/DeepSeek-V3": "deepseek-ai/DeepSeek-V3",
36+
"meta-llama/Llama-3.2-3B-Instruct": "meta-llama/Llama-3.2-3B-Instruct",
37+
"meta-llama/Llama-3.3-70B-Instruct": "meta-llama/Llama-3.3-70B-Instruct",
38+
"meta-llama/Llama-3.1-70B-Instruct": "meta-llama/Llama-3.1-70B-Instruct-BF16",
39+
"meta-llama/Meta-Llama-3-70B-Instruct": "meta-llama/Llama-3-70b-BF16",
40+
"meta-llama/Llama-3.1-8B-Instruct": "meta-llama/Llama-3.1-8B-Instruct-BF16",
41+
"NousResearch/Hermes-3-Llama-3.1-70B": "NousResearch/Hermes-3-Llama-3.1-70B-BF16",
42+
"Qwen/Qwen2.5-72B-Instruct": "Qwen/Qwen2.5-72B-Instruct-BF16",
43+
"Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen/Qwen2.5-Coder-32B-Instruct-BF16",
44+
"Qwen/QwQ-32B-Preview": "Qwen/QwQ-32B-Preview-BF16",
45+
},
46+
};

packages/inference/src/tasks/custom/request.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,10 @@ export async function request<T>(
3636
}
3737
if (output.error || output.detail) {
3838
throw new Error(JSON.stringify(output.error ?? output.detail));
39+
} else if (typeof output === "object") {
40+
throw new Error(JSON.stringify(output));
3941
} else {
40-
throw new Error(output);
42+
throw new Error(String(output));
4143
}
4244
}
4345
const message = contentType?.startsWith("text/plain;") ? await response.text() : undefined;

packages/inference/src/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ export type InferenceTask = Exclude<PipelineType, "other">;
4747
export const INFERENCE_PROVIDERS = [
4848
"fal-ai",
4949
"fireworks-ai",
50+
"hyperbolic",
5051
"nebius",
5152
"hf-inference",
5253
"replicate",

packages/inference/test/HfInference.spec.ts

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1175,4 +1175,69 @@ describe.concurrent("HfInference", () => {
11751175
},
11761176
TIMEOUT
11771177
);
1178+
1179+
describe.concurrent(
1180+
"Hyperbolic",
1181+
() => {
1182+
const client = new HfInference(env.HF_HYPERBOLIC_KEY);
1183+
1184+
it("chatCompletion - hyperbolic", async () => {
1185+
const res = await client.chatCompletion({
1186+
model: "meta-llama/Llama-3.2-3B-Instruct",
1187+
provider: "hyperbolic",
1188+
messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
1189+
temperature: 0.1,
1190+
});
1191+
1192+
expect(res).toBeDefined();
1193+
expect(res.choices).toBeDefined();
1194+
expect(res.choices?.length).toBeGreaterThan(0);
1195+
1196+
if (res.choices && res.choices.length > 0) {
1197+
const completion = res.choices[0].message?.content;
1198+
expect(completion).toBeDefined();
1199+
expect(typeof completion).toBe("string");
1200+
expect(completion).toContain("two");
1201+
}
1202+
});
1203+
1204+
it("chatCompletion stream", async () => {
1205+
const stream = client.chatCompletionStream({
1206+
model: "meta-llama/Llama-3.3-70B-Instruct",
1207+
provider: "hyperbolic",
1208+
messages: [{ role: "user", content: "Complete the equation 1 + 1 = , just the answer" }],
1209+
}) as AsyncGenerator<ChatCompletionStreamOutput>;
1210+
let out = "";
1211+
for await (const chunk of stream) {
1212+
if (chunk.choices && chunk.choices.length > 0) {
1213+
out += chunk.choices[0].delta.content;
1214+
}
1215+
}
1216+
expect(out).toContain("2");
1217+
});
1218+
1219+
it("textToImage", async () => {
1220+
const res = await client.textToImage({
1221+
model: "stabilityai/stable-diffusion-2",
1222+
provider: "hyperbolic",
1223+
inputs: "award winning high resolution photo of a giant tortoise",
1224+
});
1225+
expect(res).toBeInstanceOf(Blob);
1226+
});
1227+
1228+
it("textGeneration", async () => {
1229+
const res = await client.textGeneration({
1230+
model: "meta-llama/Llama-3.1-405B-BASE-FP8",
1231+
provider: "hyperbolic",
1232+
inputs: "Paris is",
1233+
parameters: {
1234+
temperature: 0,
1235+
max_tokens: 10,
1236+
},
1237+
});
1238+
expect(res).toMatchObject({ generated_text: " city of love" });
1239+
});
1240+
},
1241+
TIMEOUT
1242+
);
11781243
});

0 commit comments

Comments
 (0)