Skip to content

Commit e6b4ba2

Browse files
authored
Revert the Revert (router.huggingface.co) (#1195)
This re-applies #1188
1 parent 9e24f0c commit e6b4ba2

File tree

6 files changed

+190
-188
lines changed

6 files changed

+190
-188
lines changed

packages/inference/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ for await (const output of hf.textGenerationStream({
125125

126126
### Text Generation (Chat Completion API Compatible)
127127

128-
Using the `chatCompletion` method, you can generate text with models compatible with the OpenAI Chat Completion API. All models served by [TGI](https://api-inference.huggingface.co/framework/text-generation-inference) on Hugging Face support Messages API.
128+
Using the `chatCompletion` method, you can generate text with models compatible with the OpenAI Chat Completion API. All models served by [TGI](https://huggingface.co/docs/text-generation-inference/) on Hugging Face support Messages API.
129129

130130
[Demo](https://huggingface.co/spaces/huggingfacejs/streaming-chat-completion)
131131

packages/inference/src/config.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
export const HF_HUB_URL = "https://huggingface.co";
2+
export const HF_ROUTER_URL = "https://router.huggingface.co";

packages/inference/src/lib/makeRequestOptions.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { HF_HUB_URL } from "../config";
1+
import { HF_HUB_URL, HF_ROUTER_URL } from "../config";
22
import { FAL_AI_API_BASE_URL } from "../providers/fal-ai";
33
import { NEBIUS_API_BASE_URL } from "../providers/nebius";
44
import { REPLICATE_API_BASE_URL } from "../providers/replicate";
@@ -11,7 +11,7 @@ import { isUrl } from "./isUrl";
1111
import { version as packageVersion, name as packageName } from "../../package.json";
1212
import { getProviderModelId } from "./getProviderModelId";
1313

14-
const HF_HUB_INFERENCE_PROXY_TEMPLATE = `${HF_HUB_URL}/api/inference-proxy/{{PROVIDER}}`;
14+
const HF_HUB_INFERENCE_PROXY_TEMPLATE = `${HF_ROUTER_URL}/{{PROVIDER}}`;
1515

1616
/**
1717
* Lazy-loaded from huggingface.co/api/tasks when needed

packages/inference/test/HfInference.spec.ts

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,14 @@ describe.concurrent("HfInference", () => {
2222
"HF Inference",
2323
() => {
2424
const hf = new HfInference(env.HF_TOKEN);
25+
2526
it("throws error if model does not exist", () => {
2627
expect(
2728
hf.fillMask({
28-
model: "this-model-does-not-exist-123",
29+
model: "this-model/does-not-exist-123",
2930
inputs: "[MASK] world!",
3031
})
31-
).rejects.toThrowError("Not Found: Model not found");
32+
).rejects.toThrowError("Model this-model/does-not-exist-123 does not exist");
3233
});
3334

3435
it("fillMask", async () => {
@@ -648,7 +649,7 @@ describe.concurrent("HfInference", () => {
648649
});
649650

650651
it("endpoint - makes request to specified endpoint", async () => {
651-
const ep = hf.endpoint("https://api-inference.huggingface.co/models/openai-community/gpt2");
652+
const ep = hf.endpoint("https://router.huggingface.co/hf-inference/models/openai-community/gpt2");
652653
const { generated_text } = await ep.textGeneration({
653654
inputs: "one plus two equals",
654655
});
@@ -686,7 +687,7 @@ describe.concurrent("HfInference", () => {
686687
expect(out).toContain("2");
687688
});
688689

689-
it("chatCompletionStream modelId Fail - OpenAI Specs", async () => {
690+
it.skip("chatCompletionStream modelId Fail - OpenAI Specs", async () => {
690691
expect(
691692
hf
692693
.chatCompletionStream({
@@ -703,7 +704,7 @@ describe.concurrent("HfInference", () => {
703704
});
704705

705706
it("chatCompletion - OpenAI Specs", async () => {
706-
const ep = hf.endpoint("https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2");
707+
const ep = hf.endpoint("https://router.huggingface.co/hf-inference/models/mistralai/Mistral-7B-Instruct-v0.2");
707708
const res = await ep.chatCompletion({
708709
model: "tgi",
709710
messages: [{ role: "user", content: "Complete the this sentence with words one plus one is equal " }],
@@ -717,7 +718,7 @@ describe.concurrent("HfInference", () => {
717718
}
718719
});
719720
it("chatCompletionStream - OpenAI Specs", async () => {
720-
const ep = hf.endpoint("https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2");
721+
const ep = hf.endpoint("https://router.huggingface.co/hf-inference/models/mistralai/Mistral-7B-Instruct-v0.2");
721722
const stream = ep.chatCompletionStream({
722723
model: "tgi",
723724
messages: [{ role: "user", content: "Complete the equation 1+1= ,just the answer" }],

0 commit comments

Comments
 (0)