draft: add hyperbolic support

Kaihuang724 · julien-c · commit 33771e62eb07 · 2025-02-08T00:29:53.000+01:00
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -42,12 +42,12 @@ jobs:
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           HF_FAL_KEY: dummy
+          HF_FIREWORKS_KEY: dummy
+          HF_HYPERBOLIC_KEY: dummy
           HF_NEBIUS_KEY: dummy
           HF_REPLICATE_KEY: dummy
           HF_SAMBANOVA_KEY: dummy
           HF_TOGETHER_KEY: dummy
-          HF_FIREWORKS_KEY: dummy
-
   browser:
     runs-on: ubuntu-latest
     timeout-minutes: 10
@@ -84,12 +84,12 @@ jobs:
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           HF_FAL_KEY: dummy
+          HF_FIREWORKS_KEY: dummy
+          HF_HYPERBOLIC_KEY: dummy
           HF_NEBIUS_KEY: dummy
           HF_REPLICATE_KEY: dummy
           HF_SAMBANOVA_KEY: dummy
           HF_TOGETHER_KEY: dummy
-          HF_FIREWORKS_KEY: dummy
-
   e2e:
     runs-on: ubuntu-latest
     timeout-minutes: 10
@@ -153,8 +153,9 @@ jobs:
           NPM_CONFIG_REGISTRY: http://localhost:4874/
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           HF_FAL_KEY: dummy
+          HF_FIREWORKS_KEY: dummy
+          HF_HYPERBOLIC_KEY: dummy
           HF_NEBIUS_KEY: dummy
           HF_REPLICATE_KEY: dummy
           HF_SAMBANOVA_KEY: dummy
           HF_TOGETHER_KEY: dummy
-          HF_FIREWORKS_KEY: dummy
diff --git a/packages/inference/README.md b/packages/inference/README.md
@@ -49,6 +49,7 @@ You can send inference requests to third-party providers with the inference clie
 Currently, we support the following providers:
 - [Fal.ai](https://fal.ai)
 - [Fireworks AI](https://fireworks.ai)
+- [Hyperbolic](https://hyperbolic.xyz)
 - [Nebius](https://studio.nebius.ai)
 - [Replicate](https://replicate.com)
 - [Sambanova](https://sambanova.ai)
@@ -72,6 +73,7 @@ When authenticated with a third-party provider key, the request is made directly
 Only a subset of models are supported when requesting third-party providers. You can check the list of supported models per pipeline tasks here:
 - [Fal.ai supported models](https://huggingface.co/api/partners/fal-ai/models)
 - [Fireworks AI supported models](https://huggingface.co/api/partners/fireworks-ai/models)
+- [Hyperbolic supported models](https://huggingface.co/api/partners/hyperbolic/models)
 - [Nebius supported models](https://huggingface.co/api/partners/nebius/models)
 - [Replicate supported models](https://huggingface.co/api/partners/replicate/models)
 - [Sambanova supported models](https://huggingface.co/api/partners/sambanova/models)
diff --git a/packages/inference/src/lib/makeRequestOptions.ts b/packages/inference/src/lib/makeRequestOptions.ts
@@ -5,6 +5,8 @@ import { REPLICATE_API_BASE_URL } from "../providers/replicate";
 import { SAMBANOVA_API_BASE_URL } from "../providers/sambanova";
 import { TOGETHER_API_BASE_URL } from "../providers/together";
 import { FIREWORKS_AI_API_BASE_URL } from "../providers/fireworks-ai";
+import { HYPERBOLIC_API_BASE_URL } from "../providers/hyperbolic";
+
 import type { InferenceProvider } from "../types";
 import type { InferenceTask, Options, RequestArgs } from "../types";
 import { isUrl } from "./isUrl";
@@ -234,6 +236,11 @@ function makeUrl(params: {
 				return `${baseUrl}/v1/chat/completions`;
 			}
 			return baseUrl;
+		case "hyperbolic": {
+			const baseUrl = shouldProxy
+				? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider)
+				: HYPERBOLIC_API_BASE_URL;
+			return `${baseUrl}/v1/chat/completions`;
 		}
 		default: {
 			const baseUrl = HF_HUB_INFERENCE_PROXY_TEMPLATE.replaceAll("{{PROVIDER}}", "hf-inference");
diff --git a/packages/inference/src/providers/hyperbolic.ts b/packages/inference/src/providers/hyperbolic.ts
@@ -0,0 +1,46 @@
+import type { ProviderMapping } from "./types";
+
+export const HYPERBOLIC_API_BASE_URL = "https://api.hyperbolic.xyz";
+
+type HyperbolicId = string;
+
+/**
+ * https://docs.together.ai/reference/models-1
+ */
+export const HYPERBOLIC_SUPPORTED_MODEL_IDS: ProviderMapping<HyperbolicId> = {
+	"text-to-image": {
+		"black-forest-labs/FLUX.1-dev": "black-forest-labs/FLUX.1-dev",
+		"stabilityai/stable-diffusion-xl-base-1.0": "SDXL1.0-base",
+		"stable-diffusion-v1-5/stable-diffusion-v1-5": "stable-diffusion-v1-5/stable-diffusion-v1-5",
+		"segmind/SSD-1B": "segmind/SSD-1B",
+		"stabilityai/stable-diffusion-2": "stabilityai/stable-diffusion-2",
+		"stabilityai/sdxl-turbo": "stabilityai/sdxl-turbo",
+	},
+	"image-text-to-text": {
+		"Qwen/Qwen2-VL-72B-Instruct": "Qwen/Qwen2-VL-72B-Instruct",
+		"mistralai/Pixtral-12B-2409": "mistralai/Pixtral-12B-2409",
+		"Qwen/Qwen2-VL-7B-Instruct": "Qwen/Qwen2-VL-7B-Instruct",
+	},
+	"text-generation": {
+		"meta-llama/Llama-3.1-405B-BASE-BF16": "meta-llama/Llama-3.1-405B-BASE-BF16",
+		"meta-llama/Llama-3.1-405B-BASE-FP8": "meta-llama/Llama-3.1-405B-BASE-FP8",
+		"Qwen/Qwen2.5-72B-Instruct": "Qwen/Qwen2.5-72B-Instruct-BF16",
+	},
+	"text-to-audio": {
+		"myshell-ai/MeloTTS-English-v3": "myshell-ai/MeloTTS-English-v3",
+	},
+	conversational: {
+		"deepseek-ai/DeepSeek-R1": "deepseek-ai/DeepSeek-R1",
+		"deepseek-ai/DeepSeek-R1-Zero": "deepseek-ai/DeepSeek-R1-Zero",
+		"deepseek-ai/DeepSeek-V3": "deepseek-ai/DeepSeek-V3",
+		"meta-llama/Llama-3.2-3B-Instruct": "meta-llama/Llama-3.2-3B-Instruct",
+		"meta-llama/Llama-3.3-70B-Instruct": "meta-llama/Llama-3.3-70B-Instruct",
+		"meta-llama/Llama-3.1-70B-Instruct": "meta-llama/Llama-3.1-70B-Instruct-BF16",
+		"meta-llama/Meta-Llama-3-70B-Instruct": "meta-llama/Llama-3-70b-BF16",
+		"meta-llama/Llama-3.1-8B-Instruct": "meta-llama/Llama-3.1-8B-Instruct-BF16",
+		"NousResearch/Hermes-3-Llama-3.1-70B": "NousResearch/Hermes-3-Llama-3.1-70B-BF16",
+		"Qwen/Qwen2.5-72B-Instruct": "Qwen/Qwen2.5-72B-Instruct-BF16",
+		"Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen/Qwen2.5-Coder-32B-Instruct-BF16",
+		"Qwen/QwQ-32B-Preview": "Qwen/QwQ-32B-Preview-BF16",
+	},
+};
diff --git a/packages/inference/src/tasks/custom/request.ts b/packages/inference/src/tasks/custom/request.ts
@@ -36,8 +36,10 @@ export async function request<T>(
 			}
 			if (output.error || output.detail) {
 				throw new Error(JSON.stringify(output.error ?? output.detail));
+			} else if (typeof output === "object") {
+				throw new Error(JSON.stringify(output));
 			} else {
-				throw new Error(output);
+				throw new Error(String(output));
 			}
 		}
 		const message = contentType?.startsWith("text/plain;") ? await response.text() : undefined;
diff --git a/packages/inference/src/types.ts b/packages/inference/src/types.ts
@@ -47,6 +47,7 @@ export type InferenceTask = Exclude<PipelineType, "other">;
 export const INFERENCE_PROVIDERS = [
 	"fal-ai",
 	"fireworks-ai",
+	"hyperbolic",
 	"nebius",
 	"hf-inference",
 	"replicate",
diff --git a/packages/inference/test/HfInference.spec.ts b/packages/inference/test/HfInference.spec.ts
@@ -1175,4 +1175,69 @@ describe.concurrent("HfInference", () => {
 		},
 		TIMEOUT
 	);
+
+	describe.concurrent(
+		"Hyperbolic",
+		() => {
+			const client = new HfInference(env.HF_HYPERBOLIC_KEY);
+
+			it("chatCompletion - hyperbolic", async () => {
+				const res = await client.chatCompletion({
+					model: "meta-llama/Llama-3.2-3B-Instruct",
+					provider: "hyperbolic",
+					messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
+					temperature: 0.1,
+				});
+
+				expect(res).toBeDefined();
+				expect(res.choices).toBeDefined();
+				expect(res.choices?.length).toBeGreaterThan(0);
+
+				if (res.choices && res.choices.length > 0) {
+					const completion = res.choices[0].message?.content;
+					expect(completion).toBeDefined();
+					expect(typeof completion).toBe("string");
+					expect(completion).toContain("two");
+				}
+			});
+
+			it("chatCompletion stream", async () => {
+				const stream = client.chatCompletionStream({
+					model: "meta-llama/Llama-3.3-70B-Instruct",
+					provider: "hyperbolic",
+					messages: [{ role: "user", content: "Complete the equation 1 + 1 = , just the answer" }],
+				}) as AsyncGenerator<ChatCompletionStreamOutput>;
+				let out = "";
+				for await (const chunk of stream) {
+					if (chunk.choices && chunk.choices.length > 0) {
+						out += chunk.choices[0].delta.content;
+					}
+				}
+				expect(out).toContain("2");
+			});
+
+			it("textToImage", async () => {
+				const res = await client.textToImage({
+					model: "stabilityai/stable-diffusion-2",
+					provider: "hyperbolic",
+					inputs: "award winning high resolution photo of a giant tortoise",
+				});
+				expect(res).toBeInstanceOf(Blob);
+			});
+
+			it("textGeneration", async () => {
+				const res = await client.textGeneration({
+					model: "meta-llama/Llama-3.1-405B-BASE-FP8",
+					provider: "hyperbolic",
+					inputs: "Paris is",
+					parameters: {
+						temperature: 0,
+						max_tokens: 10,
+					},
+				});
+				expect(res).toMatchObject({ generated_text: " city of love" });
+			});
+		},
+		TIMEOUT
+	);
 });

Original file line number	Diff line number	Diff line change
`@@ -36,8 +36,10 @@ export async function request<T>(`
`36`	`36`	`}`
`37`	`37`	`if (output.error \|\| output.detail) {`
`38`	`38`	`throw new Error(JSON.stringify(output.error ?? output.detail));`
	`39`	`+ } else if (typeof output === "object") {`
	`40`	`+ throw new Error(JSON.stringify(output));`
`39`	`41`	`} else {`
`40`		`- throw new Error(output);`
	`42`	`+ throw new Error(String(output));`
`41`	`43`	`}`
`42`	`44`	`}`
`43`	`45`	`const message = contentType?.startsWith("text/plain;") ? await response.text() : undefined;`