Skip to content

Commit 4ee9b89

Browse files
authored
Fix openai stream js snippet (#1344)
_Originally from @cfahlgren1 in [moon-landing](huggingface-internal/moon-landing#13272) (private link):_ > In inference snippets > - **[Bug]** We don't add `stream` flag on javascript / openai snippet >- **[Bug]** We pass `provider` param in openai api which causes error > - **[Improvement]** Maybe we should have example not have `\n` after each chunk EXample: https://huggingface.co/deepseek-ai/DeepSeek-V3-0324?inference_api=true&inference_provider=fireworks-ai&language=js This PR addresses all 3 points. Fix is in https://github.com/huggingface/huggingface.js/pull/1344/files#diff-593c31a95f2e065cf06d977764fdc415600f67c5cc4a8204d73337ff6efd99c3. _Originally from @gary149 on slack ([private link](https://huggingface.slack.com/archives/C07KX53FZTK/p1743771312130449)):_ > micro nit: "max_tokens": 512 everywhere instead of 500 (500 looks quite weird imo) This PR addresses that as well.
1 parent 3d82e89 commit 4ee9b89

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+62
-92
lines changed

packages/inference/src/snippets/getInferenceSnippets.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ const prepareConversationalInput = (
254254
return {
255255
messages: opts?.messages ?? getModelInputSnippet(model),
256256
...(opts?.temperature ? { temperature: opts?.temperature } : undefined),
257-
max_tokens: opts?.max_tokens ?? 500,
257+
max_tokens: opts?.max_tokens ?? 512,
258258
...(opts?.top_p ? { top_p: opts?.top_p } : undefined),
259259
};
260260
};

packages/inference/src/snippets/templates/js/openai/conversationalStream.jinja

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,12 @@ const client = new OpenAI({
55
apiKey: "{{ accessToken }}",
66
});
77

8-
let out = "";
9-
108
const stream = await client.chat.completions.create({
11-
provider: "{{ provider }}",
12-
model: "{{ model.id }}",
9+
model: "{{ providerModelId }}",
1310
{{ inputs.asTsString }}
11+
stream: true,
1412
});
1513

1614
for await (const chunk of stream) {
17-
if (chunk.choices && chunk.choices.length > 0) {
18-
const newContent = chunk.choices[0].delta.content;
19-
out += newContent;
20-
console.log(newContent);
21-
}
15+
process.stdout.write(chunk.choices[0]?.delta?.content || "");
2216
}

packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.hf-inference.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ const chatCompletion = await client.chatCompletion({
1111
content: "What is the capital of France?",
1212
},
1313
],
14-
max_tokens: 500,
14+
max_tokens: 512,
1515
});
1616

1717
console.log(chatCompletion.choices[0].message);

packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.together.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ const chatCompletion = await client.chatCompletion({
1111
content: "What is the capital of France?",
1212
},
1313
],
14-
max_tokens: 500,
14+
max_tokens: 512,
1515
});
1616

1717
console.log(chatCompletion.choices[0].message);

packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.hf-inference.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ const chatCompletion = await client.chat.completions.create({
1313
content: "What is the capital of France?",
1414
},
1515
],
16-
max_tokens: 500,
16+
max_tokens: 512,
1717
});
1818

1919
console.log(chatCompletion.choices[0].message);

packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.together.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ const chatCompletion = await client.chat.completions.create({
1313
content: "What is the capital of France?",
1414
},
1515
],
16-
max_tokens: 500,
16+
max_tokens: 512,
1717
});
1818

1919
console.log(chatCompletion.choices[0].message);

packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.hf-inference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"content": "What is the capital of France?"
1414
}
1515
],
16-
max_tokens=500,
16+
max_tokens=512,
1717
)
1818

1919
print(completion.choices[0].message)

packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.together.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"content": "What is the capital of France?"
1414
}
1515
],
16-
max_tokens=500,
16+
max_tokens=512,
1717
)
1818

1919
print(completion.choices[0].message)

packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/openai/0.hf-inference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"content": "What is the capital of France?"
1414
}
1515
],
16-
max_tokens=500,
16+
max_tokens=512,
1717
)
1818

1919
print(completion.choices[0].message)

packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/openai/0.together.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"content": "What is the capital of France?"
1414
}
1515
],
16-
max_tokens=500,
16+
max_tokens=512,
1717
)
1818

1919
print(completion.choices[0].message)

packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/requests/0.hf-inference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def query(payload):
1414
"content": "What is the capital of France?"
1515
}
1616
],
17-
"max_tokens": 500,
17+
"max_tokens": 512,
1818
"model": "meta-llama/Llama-3.1-8B-Instruct"
1919
})
2020

packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/requests/0.together.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def query(payload):
1414
"content": "What is the capital of France?"
1515
}
1616
],
17-
"max_tokens": 500,
17+
"max_tokens": 512,
1818
"model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>"
1919
})
2020

packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/sh/curl/0.hf-inference.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ curl https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-I
88
"content": "What is the capital of France?"
99
}
1010
],
11-
"max_tokens": 500,
11+
"max_tokens": 512,
1212
"model": "meta-llama/Llama-3.1-8B-Instruct",
1313
"stream": false
1414
}'

packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/sh/curl/0.together.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ curl https://api.together.xyz/v1/chat/completions \
88
"content": "What is the capital of France?"
99
}
1010
],
11-
"max_tokens": 500,
11+
"max_tokens": 512,
1212
"model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>",
1313
"stream": false
1414
}'

packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.hf-inference.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ const stream = await client.chatCompletionStream({
1313
content: "What is the capital of France?",
1414
},
1515
],
16-
max_tokens: 500,
16+
max_tokens: 512,
1717
});
1818

1919
for await (const chunk of stream) {

packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.together.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ const stream = await client.chatCompletionStream({
1313
content: "What is the capital of France?",
1414
},
1515
],
16-
max_tokens: 500,
16+
max_tokens: 512,
1717
});
1818

1919
for await (const chunk of stream) {

packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/openai/0.hf-inference.js

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,18 @@ const client = new OpenAI({
55
apiKey: "api_token",
66
});
77

8-
let out = "";
9-
108
const stream = await client.chat.completions.create({
11-
provider: "hf-inference",
129
model: "meta-llama/Llama-3.1-8B-Instruct",
1310
messages: [
1411
{
1512
role: "user",
1613
content: "What is the capital of France?",
1714
},
1815
],
19-
max_tokens: 500,
16+
max_tokens: 512,
17+
stream: true,
2018
});
2119

2220
for await (const chunk of stream) {
23-
if (chunk.choices && chunk.choices.length > 0) {
24-
const newContent = chunk.choices[0].delta.content;
25-
out += newContent;
26-
console.log(newContent);
27-
}
21+
process.stdout.write(chunk.choices[0]?.delta?.content || "");
2822
}

packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/openai/0.together.js

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,18 @@ const client = new OpenAI({
55
apiKey: "api_token",
66
});
77

8-
let out = "";
9-
108
const stream = await client.chat.completions.create({
11-
provider: "together",
12-
model: "meta-llama/Llama-3.1-8B-Instruct",
9+
model: "<together alias for meta-llama/Llama-3.1-8B-Instruct>",
1310
messages: [
1411
{
1512
role: "user",
1613
content: "What is the capital of France?",
1714
},
1815
],
19-
max_tokens: 500,
16+
max_tokens: 512,
17+
stream: true,
2018
});
2119

2220
for await (const chunk of stream) {
23-
if (chunk.choices && chunk.choices.length > 0) {
24-
const newContent = chunk.choices[0].delta.content;
25-
out += newContent;
26-
console.log(newContent);
27-
}
21+
process.stdout.write(chunk.choices[0]?.delta?.content || "");
2822
}

packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.hf-inference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"content": "What is the capital of France?"
1414
}
1515
],
16-
max_tokens=500,
16+
max_tokens=512,
1717
stream=True,
1818
)
1919

packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.together.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"content": "What is the capital of France?"
1414
}
1515
],
16-
max_tokens=500,
16+
max_tokens=512,
1717
stream=True,
1818
)
1919

packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/openai/0.hf-inference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"content": "What is the capital of France?"
1414
}
1515
],
16-
max_tokens=500,
16+
max_tokens=512,
1717
stream=True,
1818
)
1919

packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/openai/0.together.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"content": "What is the capital of France?"
1414
}
1515
],
16-
max_tokens=500,
16+
max_tokens=512,
1717
stream=True,
1818
)
1919

packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/requests/0.hf-inference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def query(payload):
2020
"content": "What is the capital of France?"
2121
}
2222
],
23-
"max_tokens": 500,
23+
"max_tokens": 512,
2424
"model": "meta-llama/Llama-3.1-8B-Instruct",
2525
"stream": True,
2626
})

packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/requests/0.together.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def query(payload):
2020
"content": "What is the capital of France?"
2121
}
2222
],
23-
"max_tokens": 500,
23+
"max_tokens": 512,
2424
"model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>",
2525
"stream": True,
2626
})

packages/tasks-gen/snippets-fixtures/conversational-llm-stream/sh/curl/0.hf-inference.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ curl https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-I
88
"content": "What is the capital of France?"
99
}
1010
],
11-
"max_tokens": 500,
11+
"max_tokens": 512,
1212
"model": "meta-llama/Llama-3.1-8B-Instruct",
1313
"stream": true
1414
}'

packages/tasks-gen/snippets-fixtures/conversational-llm-stream/sh/curl/0.together.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ curl https://api.together.xyz/v1/chat/completions \
88
"content": "What is the capital of France?"
99
}
1010
],
11-
"max_tokens": 500,
11+
"max_tokens": 512,
1212
"model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>",
1313
"stream": true
1414
}'

packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.fireworks-ai.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ const chatCompletion = await client.chatCompletion({
2222
],
2323
},
2424
],
25-
max_tokens: 500,
25+
max_tokens: 512,
2626
});
2727

2828
console.log(chatCompletion.choices[0].message);

packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.hf-inference.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ const chatCompletion = await client.chatCompletion({
2222
],
2323
},
2424
],
25-
max_tokens: 500,
25+
max_tokens: 512,
2626
});
2727

2828
console.log(chatCompletion.choices[0].message);

packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/openai/0.fireworks-ai.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ const chatCompletion = await client.chat.completions.create({
2424
],
2525
},
2626
],
27-
max_tokens: 500,
27+
max_tokens: 512,
2828
});
2929

3030
console.log(chatCompletion.choices[0].message);

packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/openai/0.hf-inference.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ const chatCompletion = await client.chat.completions.create({
2424
],
2525
},
2626
],
27-
max_tokens: 500,
27+
max_tokens: 512,
2828
});
2929

3030
console.log(chatCompletion.choices[0].message);

packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.fireworks-ai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
]
2525
}
2626
],
27-
max_tokens=500,
27+
max_tokens=512,
2828
)
2929

3030
print(completion.choices[0].message)

packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.hf-inference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
]
2525
}
2626
],
27-
max_tokens=500,
27+
max_tokens=512,
2828
)
2929

3030
print(completion.choices[0].message)

packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/openai/0.fireworks-ai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
]
2525
}
2626
],
27-
max_tokens=500,
27+
max_tokens=512,
2828
)
2929

3030
print(completion.choices[0].message)

packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/openai/0.hf-inference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
]
2525
}
2626
],
27-
max_tokens=500,
27+
max_tokens=512,
2828
)
2929

3030
print(completion.choices[0].message)

packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/requests/0.fireworks-ai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def query(payload):
2525
]
2626
}
2727
],
28-
"max_tokens": 500,
28+
"max_tokens": 512,
2929
"model": "<fireworks-ai alias for meta-llama/Llama-3.2-11B-Vision-Instruct>"
3030
})
3131

packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/requests/0.hf-inference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def query(payload):
2525
]
2626
}
2727
],
28-
"max_tokens": 500,
28+
"max_tokens": 512,
2929
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct"
3030
})
3131

0 commit comments

Comments
 (0)