Fireworks AI Conversational Models

teofeliu · julien-c · commit 5540b2bcafb2 · 2025-02-06T14:53:43.000+01:00
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -45,6 +45,7 @@ jobs:
           HF_REPLICATE_KEY: dummy
           HF_SAMBANOVA_KEY: dummy
           HF_TOGETHER_KEY: dummy
+          HF_FIREWORKS_AI_KEY: dummy
 
   browser:
     runs-on: ubuntu-latest
@@ -85,6 +86,7 @@ jobs:
           HF_REPLICATE_KEY: dummy
           HF_SAMBANOVA_KEY: dummy
           HF_TOGETHER_KEY: dummy
+          HF_FIREWORKS_AI_KEY: dummy
 
   e2e:
     runs-on: ubuntu-latest
@@ -152,3 +154,4 @@ jobs:
           HF_REPLICATE_KEY: dummy
           HF_SAMBANOVA_KEY: dummy
           HF_TOGETHER_KEY: dummy
+          HF_FIREWORKS_AI_KEY: dummy
diff --git a/packages/inference/README.md b/packages/inference/README.md
@@ -46,7 +46,7 @@ Your access token should be kept private. If you need to protect it in front-end
 
 You can send inference requests to third-party providers with the inference client.
 
-Currently, we support the following providers: [Fal.ai](https://fal.ai), [Replicate](https://replicate.com), [Together](https://together.xyz) and [Sambanova](https://sambanova.ai).
+Currently, we support the following providers: [Fal.ai](https://fal.ai), [Replicate](https://replicate.com), [Together](https://together.xyz), [Sambanova](https://sambanova.ai), and [Fireworks AI](https://fireworks.ai).
 
 To send requests to a third-party provider, you have to pass the `provider` parameter to the inference function. Make sure your request is authenticated with an access token.
 ```ts
@@ -68,6 +68,7 @@ Only a subset of models are supported when requesting third-party providers. You
 - [Replicate supported models](./src/providers/replicate.ts)
 - [Sambanova supported models](./src/providers/sambanova.ts)
 - [Together supported models](./src/providers/together.ts)
+- [Fireworks AI supported models](./src/providers/fireworks-ai.ts)
 - [HF Inference API (serverless)](https://huggingface.co/models?inference=warm&sort=trending)
 
 ❗**Important note:** To be compatible, the third-party API must adhere to the "standard" shape API we expect on HF model pages for each pipeline task type. 
diff --git a/packages/inference/src/lib/makeRequestOptions.ts b/packages/inference/src/lib/makeRequestOptions.ts
@@ -208,6 +208,15 @@ function makeUrl(params: {
 			}
 			return baseUrl;
 		}
+		case "fireworks-ai": {
+			const baseUrl = shouldProxy
+				? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider)
+				: FIREWORKS_AI_API_BASE_URL;
+			if (params.taskHint === "text-generation" && params.chatCompletion) {
+				return `${baseUrl}/v1/chat/completions`;
+			}
+			return baseUrl;
+		}
 		default: {
 			const baseUrl = HF_HUB_INFERENCE_PROXY_TEMPLATE.replaceAll("{{PROVIDER}}", "hf-inference");
 			const url = params.forceTask
diff --git a/packages/inference/src/providers/fireworks-ai.ts b/packages/inference/src/providers/fireworks-ai.ts
@@ -0,0 +1,32 @@
+import type { ProviderMapping } from "./types";
+
+export const FIREWORKS_AI_API_BASE_URL = "https://api.fireworks.ai/inference";
+
+type FireworksAiId = string;  // you can make this more specific if needed
+
+/**
+ * Mapping of HuggingFace model IDs to Fireworks model IDs
+ */
+export const FIREWORKS_AI_SUPPORTED_MODEL_IDS: ProviderMapping<FireworksAiId> = {
+    // Chat/Conversational models
+    "conversational": {
+        "meta-llama/Llama-3.3-70B-Instruct": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "meta-llama/Llama-3.2-3B-Instruct": "accounts/fireworks/models/llama-v3p2-3b-instruct",
+        "meta-llama/Llama-3.1-8B-Instruct": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+        "mistralai/Mixtral-8x7B-Instruct-v0.1": "accounts/fireworks/models/mixtral-8x7b-instruct",
+        "deepseek-ai/DeepSeek-R1": "accounts/fireworks/models/deepseek-r1",
+        "deepseek-ai/DeepSeek-V3": "accounts/fireworks/models/deepseek-v3",
+        "meta-llama/Llama-3.2-90B-Vision-Instruct": "accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
+        "meta-llama/Llama-3.2-11B-Vision-Instruct": "accounts/fireworks/models/llama-v3p2-11b-vision-instruct",
+        "meta-llama/Meta-Llama-3-70B-Instruct": "accounts/fireworks/models/llama-v3-70b-instruct",
+        "meta-llama/Meta-Llama-3-8B-Instruct": "accounts/fireworks/models/llama-v3-8b-instruct",
+        "mistralai/Mistral-Small-24B-Instruct-2501": "accounts/fireworks/models/mistral-small-24b-instruct-2501",
+        "mistralai/Mixtral-8x22B-Instruct-v0.1": "accounts/fireworks/models/mixtral-8x22b-instruct",
+        "Qwen/QWQ-32B-Preview": "accounts/fireworks/models/qwen-qwq-32b-preview",
+        "Qwen/Qwen2.5-72B-Instruct": "accounts/fireworks/models/qwen2p5-72b-instruct",
+        "Qwen/Qwen2.5-Coder-32B-Instruct": "accounts/fireworks/models/qwen2p5-coder-32b-instruct",
+        "Qwen/Qwen2-VL-72B-Instruct": "accounts/fireworks/models/qwen2-vl-72b-instruct",
+        "Gryphe/MythoMax-L2-13b": "accounts/fireworks/models/mythomax-l2-13b",
+        "microsoft/Phi-3.5-vision-instruct": "accounts/fireworks/models/phi-3-vision-128k-instruct"
+    },
+};
diff --git a/packages/inference/src/types.ts b/packages/inference/src/types.ts
@@ -44,7 +44,7 @@ export interface Options {
 
 export type InferenceTask = Exclude<PipelineType, "other">;
 
-export const INFERENCE_PROVIDERS = ["fal-ai", "replicate", "sambanova", "together", "hf-inference"] as const;
+export const INFERENCE_PROVIDERS = ["fal-ai", "replicate", "sambanova", "together", "fireworks-ai", "hf-inference"] as const;
 export type InferenceProvider = (typeof INFERENCE_PROVIDERS)[number];
 
 export interface BaseArgs {
diff --git a/packages/inference/test/HfInference.spec.ts b/packages/inference/test/HfInference.spec.ts
@@ -1077,4 +1077,47 @@ describe.concurrent("HfInference", () => {
 			);
 		});
 	});
-});
+
+	describe.concurrent(
+		"Fireworks",
+		() => {
+			const client = new HfInference(env.HF_FIREWORKS_AI_KEY);
+
+			it("chatCompletion", async () => {
+				const res = await client.chatCompletion({
+					model: "deepseek-ai/DeepSeek-R1",
+					provider: "fireworks-ai",
+					messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
+				});
+				if (res.choices && res.choices.length > 0) {
+					const completion = res.choices[0].message?.content;
+					expect(completion).toContain("two");
+				}
+			});		
+
+			it("chatCompletion stream", async () => {
+				const stream = client.chatCompletionStream({
+					model: "deepseek-ai/DeepSeek-R1",
+					provider: "fireworks-ai",
+					messages: [{ role: "user", content: "Say this is a test" }],
+					stream: true
+				}) as AsyncGenerator<ChatCompletionStreamOutput>;
+
+				let fullResponse = "";
+				for await (const chunk of stream) {
+					if (chunk.choices && chunk.choices.length > 0) {
+						const content = chunk.choices[0].delta?.content;
+						if (content) {
+							fullResponse += content;
+						}
+					}
+				}
+				
+				// Verify we got a meaningful response
+				expect(fullResponse).toBeTruthy();
+				expect(fullResponse.length).toBeGreaterThan(0);
+			});
+		},
+		TIMEOUT
+	);
+});