Skip to content

Commit 86abcb2

Browse files
SBrandeisjulien-cWauplin
authored
[Inference] Update snippets (#1129)
Co-authored-by: Julien Chaumond <[email protected]> Co-authored-by: Lucain <[email protected]>
1 parent 9e79985 commit 86abcb2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+635
-194
lines changed

packages/agents/pnpm-lock.yaml

Lines changed: 12 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/inference/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# 🤗 Hugging Face Inference Endpoints
1+
# 🤗 Hugging Face Inference
22

33
A Typescript powered wrapper for the Hugging Face Inference API (serverless), Inference Endpoints (dedicated), and third-party Inference Providers.
44
It works with [Inference API (serverless)](https://huggingface.co/docs/api-inference/index) and [Inference Endpoints (dedicated)](https://huggingface.co/docs/inference-endpoints/index), and even with supported third-party Inference Providers.

packages/tasks-gen/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
"type-fest": "^3.13.1"
2727
},
2828
"dependencies": {
29-
"@huggingface/tasks": "workspace:^"
29+
"@huggingface/tasks": "workspace:^",
30+
"@huggingface/inference": "workspace:^"
3031
}
3132
}

packages/tasks-gen/pnpm-lock.yaml

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/tasks-gen/scripts/generate-snippets-fixtures.ts

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import { existsSync as pathExists } from "node:fs";
1919
import * as fs from "node:fs/promises";
2020
import * as path from "node:path/posix";
2121

22-
import type { InferenceSnippet } from "@huggingface/tasks";
22+
import type { InferenceProvider, InferenceSnippet } from "@huggingface/tasks";
2323
import { snippets } from "@huggingface/tasks";
2424

2525
type LANGUAGE = "sh" | "js" | "py";
@@ -28,6 +28,7 @@ const TEST_CASES: {
2828
testName: string;
2929
model: snippets.ModelDataMinimal;
3030
languages: LANGUAGE[];
31+
providers: InferenceProvider[];
3132
opts?: Record<string, unknown>;
3233
}[] = [
3334
{
@@ -39,6 +40,7 @@ const TEST_CASES: {
3940
inference: "",
4041
},
4142
languages: ["sh", "js", "py"],
43+
providers: ["hf-inference", "together"],
4244
opts: { streaming: false },
4345
},
4446
{
@@ -50,6 +52,7 @@ const TEST_CASES: {
5052
inference: "",
5153
},
5254
languages: ["sh", "js", "py"],
55+
providers: ["hf-inference"],
5356
opts: { streaming: true },
5457
},
5558
{
@@ -61,6 +64,7 @@ const TEST_CASES: {
6164
inference: "",
6265
},
6366
languages: ["sh", "js", "py"],
67+
providers: ["hf-inference"],
6468
opts: { streaming: false },
6569
},
6670
{
@@ -72,6 +76,7 @@ const TEST_CASES: {
7276
inference: "",
7377
},
7478
languages: ["sh", "js", "py"],
79+
providers: ["hf-inference"],
7580
opts: { streaming: true },
7681
},
7782
{
@@ -82,6 +87,7 @@ const TEST_CASES: {
8287
tags: [],
8388
inference: "",
8489
},
90+
providers: ["hf-inference"],
8591
languages: ["sh", "js", "py"],
8692
},
8793
] as const;
@@ -113,31 +119,41 @@ function getFixtureFolder(testName: string): string {
113119
function generateInferenceSnippet(
114120
model: snippets.ModelDataMinimal,
115121
language: LANGUAGE,
122+
provider: InferenceProvider,
116123
opts?: Record<string, unknown>
117124
): InferenceSnippet[] {
118-
const generatedSnippets = GET_SNIPPET_FN[language](model, "api_token", opts);
125+
const generatedSnippets = GET_SNIPPET_FN[language](model, "api_token", provider, opts);
119126
return Array.isArray(generatedSnippets) ? generatedSnippets : [generatedSnippets];
120127
}
121128

122-
async function getExpectedInferenceSnippet(testName: string, language: LANGUAGE): Promise<InferenceSnippet[]> {
129+
async function getExpectedInferenceSnippet(
130+
testName: string,
131+
language: LANGUAGE,
132+
provider: InferenceProvider
133+
): Promise<InferenceSnippet[]> {
123134
const fixtureFolder = getFixtureFolder(testName);
124135
const files = await fs.readdir(fixtureFolder);
125136

126137
const expectedSnippets: InferenceSnippet[] = [];
127-
for (const file of files.filter((file) => file.endsWith("." + language)).sort()) {
128-
const client = path.basename(file).split(".").slice(1, -1).join("."); // e.g. '0.huggingface.js.js' => "huggingface.js"
138+
for (const file of files.filter((file) => file.endsWith("." + language) && file.includes(`.${provider}.`)).sort()) {
139+
const client = path.basename(file).split(".").slice(1, -2).join("."); // e.g. '0.huggingface.js.replicate.js' => "huggingface.js"
129140
const content = await fs.readFile(path.join(fixtureFolder, file), { encoding: "utf-8" });
130-
expectedSnippets.push(client === "default" ? { content } : { client, content });
141+
expectedSnippets.push({ client, content });
131142
}
132143
return expectedSnippets;
133144
}
134145

135-
async function saveExpectedInferenceSnippet(testName: string, language: LANGUAGE, snippets: InferenceSnippet[]) {
146+
async function saveExpectedInferenceSnippet(
147+
testName: string,
148+
language: LANGUAGE,
149+
provider: InferenceProvider,
150+
snippets: InferenceSnippet[]
151+
) {
136152
const fixtureFolder = getFixtureFolder(testName);
137153
await fs.mkdir(fixtureFolder, { recursive: true });
138154

139155
for (const [index, snippet] of snippets.entries()) {
140-
const file = path.join(fixtureFolder, `${index}.${snippet.client ?? "default"}.${language}`);
156+
const file = path.join(fixtureFolder, `${index}.${snippet.client ?? "default"}.${provider}.${language}`);
141157
await fs.writeFile(file, snippet.content);
142158
}
143159
}
@@ -147,13 +163,15 @@ if (import.meta.vitest) {
147163
const { describe, expect, it } = import.meta.vitest;
148164

149165
describe("inference API snippets", () => {
150-
TEST_CASES.forEach(({ testName, model, languages, opts }) => {
166+
TEST_CASES.forEach(({ testName, model, languages, providers, opts }) => {
151167
describe(testName, () => {
152168
languages.forEach((language) => {
153-
it(language, async () => {
154-
const generatedSnippets = generateInferenceSnippet(model, language, opts);
155-
const expectedSnippets = await getExpectedInferenceSnippet(testName, language);
156-
expect(generatedSnippets).toEqual(expectedSnippets);
169+
providers.forEach((provider) => {
170+
it(language, async () => {
171+
const generatedSnippets = generateInferenceSnippet(model, language, provider, opts);
172+
const expectedSnippets = await getExpectedInferenceSnippet(testName, language, provider);
173+
expect(generatedSnippets).toEqual(expectedSnippets);
174+
});
157175
});
158176
});
159177
});
@@ -166,11 +184,13 @@ if (import.meta.vitest) {
166184
await fs.rm(path.join(rootDirFinder(), "snippets-fixtures"), { recursive: true, force: true });
167185

168186
console.debug(" 🏭 Generating new fixtures...");
169-
TEST_CASES.forEach(({ testName, model, languages, opts }) => {
170-
console.debug(` ${testName} (${languages.join(", ")})`);
187+
TEST_CASES.forEach(({ testName, model, languages, providers, opts }) => {
188+
console.debug(` ${testName} (${languages.join(", ")}) (${providers.join(", ")})`);
171189
languages.forEach(async (language) => {
172-
const generatedSnippets = generateInferenceSnippet(model, language, opts);
173-
await saveExpectedInferenceSnippet(testName, language, generatedSnippets);
190+
providers.forEach(async (provider) => {
191+
const generatedSnippets = generateInferenceSnippet(model, language, provider, opts);
192+
await saveExpectedInferenceSnippet(testName, language, provider, generatedSnippets);
193+
});
174194
});
175195
});
176196
console.log("✅ All done!");

packages/tasks-gen/scripts/inference-codegen.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,10 +147,10 @@ async function generateBinaryInputTypes(
147147
const propName = propSignature.name.getText(tsSource);
148148

149149
const propIsMedia =
150-
typeof spec["properties"] !== "string" &&
151-
typeof spec["properties"]?.[propName] !== "string" &&
152-
typeof spec["properties"]?.[propName]?.["comment"] === "string"
153-
? !!spec["properties"]?.[propName]?.["comment"]?.includes("type=binary")
150+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
151+
typeof (spec as any)["properties"]?.[propName]?.["comment"] === "string"
152+
? // eslint-disable-next-line @typescript-eslint/no-explicit-any
153+
!!(spec as any)["properties"][propName]["comment"].includes("type=binary")
154154
: false;
155155
if (!propIsMedia) {
156156
return;
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
curl 'https://huggingface.co/api/inference-proxy/together/v1/chat/completions' \
2+
-H 'Authorization: Bearer api_token' \
3+
-H 'Content-Type: application/json' \
4+
--data '{
5+
"model": "meta-llama/Llama-3.1-8B-Instruct",
6+
"messages": [
7+
{
8+
"role": "user",
9+
"content": "What is the capital of France?"
10+
}
11+
],
12+
"max_tokens": 500,
13+
"stream": false
14+
}'
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import { HfInference } from "@huggingface/inference";
2+
3+
const client = new HfInference("api_token");
4+
5+
const chatCompletion = await client.chatCompletion({
6+
model: "meta-llama/Llama-3.1-8B-Instruct",
7+
messages: [
8+
{
9+
role: "user",
10+
content: "What is the capital of France?"
11+
}
12+
],
13+
provider: "hf-inference",
14+
max_tokens: 500
15+
});
16+
17+
console.log(chatCompletion.choices[0].message);

packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/0.huggingface.js.js renamed to packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/0.huggingface.js.together.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ const chatCompletion = await client.chatCompletion({
1010
content: "What is the capital of France?"
1111
}
1212
],
13+
provider: "together",
1314
max_tokens: 500
1415
});
1516

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from huggingface_hub import InferenceClient
2+
3+
client = InferenceClient(
4+
provider="hf-inference",
5+
api_key="api_token"
6+
)
7+
8+
messages = [
9+
{
10+
"role": "user",
11+
"content": "What is the capital of France?"
12+
}
13+
]
14+
15+
completion = client.chat.completions.create(
16+
model="meta-llama/Llama-3.1-8B-Instruct",
17+
messages=messages,
18+
max_tokens=500
19+
)
20+
21+
print(completion.choices[0].message)

packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/0.huggingface_hub.py renamed to packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/0.huggingface_hub.together.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from huggingface_hub import InferenceClient
22

3-
client = InferenceClient(api_key="api_token")
3+
client = InferenceClient(
4+
provider="together",
5+
api_key="api_token"
6+
)
47

58
messages = [
69
{

packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/1.openai.js renamed to packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/1.openai.hf-inference.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import { OpenAI } from "openai";
22

33
const client = new OpenAI({
4-
baseURL: "https://api-inference.huggingface.co/v1/",
5-
apiKey: "api_token"
4+
baseURL: "https://api-inference.huggingface.co/v1/",
5+
apiKey: "api_token"
66
});
77

88
const chatCompletion = await client.chat.completions.create({
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import { OpenAI } from "openai";
2+
3+
const client = new OpenAI({
4+
baseURL: "https://huggingface.co/api/inference-proxy/together",
5+
apiKey: "api_token"
6+
});
7+
8+
const chatCompletion = await client.chat.completions.create({
9+
model: "meta-llama/Llama-3.1-8B-Instruct",
10+
messages: [
11+
{
12+
role: "user",
13+
content: "What is the capital of France?"
14+
}
15+
],
16+
max_tokens: 500
17+
});
18+
19+
console.log(chatCompletion.choices[0].message);
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from openai import OpenAI
2+
3+
client = OpenAI(
4+
base_url="https://huggingface.co/api/inference-proxy/together",
5+
api_key="api_token"
6+
)
7+
8+
messages = [
9+
{
10+
"role": "user",
11+
"content": "What is the capital of France?"
12+
}
13+
]
14+
15+
completion = client.chat.completions.create(
16+
model="meta-llama/Llama-3.1-8B-Instruct",
17+
messages=messages,
18+
max_tokens=500
19+
)
20+
21+
print(completion.choices[0].message)

packages/tasks-gen/snippets-fixtures/conversational-llm-stream/0.huggingface.js.js renamed to packages/tasks-gen/snippets-fixtures/conversational-llm-stream/0.huggingface.js.hf-inference.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ const stream = client.chatCompletionStream({
1212
content: "What is the capital of France?"
1313
}
1414
],
15+
provider: "hf-inference",
1516
max_tokens: 500
1617
});
1718

packages/tasks-gen/snippets-fixtures/conversational-llm-stream/0.huggingface_hub.py renamed to packages/tasks-gen/snippets-fixtures/conversational-llm-stream/0.huggingface_hub.hf-inference.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from huggingface_hub import InferenceClient
22

3-
client = InferenceClient(api_key="api_token")
3+
client = InferenceClient(
4+
provider="hf-inference",
5+
api_key="api_token"
6+
)
47

58
messages = [
69
{

packages/tasks-gen/snippets-fixtures/conversational-llm-stream/1.openai.js renamed to packages/tasks-gen/snippets-fixtures/conversational-llm-stream/1.openai.hf-inference.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { OpenAI } from "openai";
22

33
const client = new OpenAI({
44
baseURL: "https://api-inference.huggingface.co/v1/",
5-
apiKey: "api_token"
5+
apiKey: "api_token"
66
});
77

88
let out = "";

packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/0.huggingface.js.js renamed to packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/0.huggingface.js.hf-inference.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ const chatCompletion = await client.chatCompletion({
2121
]
2222
}
2323
],
24+
provider: "hf-inference",
2425
max_tokens: 500
2526
});
2627

packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/0.huggingface_hub.py renamed to packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/0.huggingface_hub.hf-inference.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from huggingface_hub import InferenceClient
22

3-
client = InferenceClient(api_key="api_token")
3+
client = InferenceClient(
4+
provider="hf-inference",
5+
api_key="api_token"
6+
)
47

58
messages = [
69
{

0 commit comments

Comments
 (0)