Fix openai stream js snippet (#1344)

Wauplin · web-flow · commit 4ee9b89440c4 · 2025-04-09T15:27:20.000+02:00
_Originally from @cfahlgren1 in [moon-landing](huggingface-internal/moon-landing#13272) (private link):_ > In inference snippets > - **[Bug]** We don't add `stream` flag on javascript / openai snippet >- **[Bug]** We pass `provider` param in openai api which causes error > - **[Improvement]** Maybe we should have example not have `\n` after each chunk EXample: https://huggingface.co/deepseek-ai/DeepSeek-V3-0324?inference_api=true&inference_provider=fireworks-ai&language=js This PR addresses all 3 points. Fix is in https://github.com/huggingface/huggingface.js/pull/1344/files#diff-593c31a95f2e065cf06d977764fdc415600f67c5cc4a8204d73337ff6efd99c3. _Originally from @gary149 on slack ([private link](https://huggingface.slack.com/archives/C07KX53FZTK/p1743771312130449)):_ > micro nit: "max_tokens": 512 everywhere instead of 500 (500 looks quite weird imo) This PR addresses that as well.
diff --git a/packages/inference/src/snippets/getInferenceSnippets.ts b/packages/inference/src/snippets/getInferenceSnippets.ts
@@ -254,7 +254,7 @@ const prepareConversationalInput = (
 	return {
 		messages: opts?.messages ?? getModelInputSnippet(model),
 		...(opts?.temperature ? { temperature: opts?.temperature } : undefined),
-		max_tokens: opts?.max_tokens ?? 500,
+		max_tokens: opts?.max_tokens ?? 512,
 		...(opts?.top_p ? { top_p: opts?.top_p } : undefined),
 	};
 };
diff --git a/packages/inference/src/snippets/templates/js/openai/conversationalStream.jinja b/packages/inference/src/snippets/templates/js/openai/conversationalStream.jinja
@@ -5,18 +5,12 @@ const client = new OpenAI({
 	apiKey: "{{ accessToken }}",
 });
 
-let out = "";
-
 const stream = await client.chat.completions.create({
-    provider: "{{ provider }}",
-    model: "{{ model.id }}",
+    model: "{{ providerModelId }}",
 {{ inputs.asTsString }}
+    stream: true,
 });
 
 for await (const chunk of stream) {
-	if (chunk.choices && chunk.choices.length > 0) {
-		const newContent = chunk.choices[0].delta.content;
-		out += newContent;
-		console.log(newContent);
-	}  
+    process.stdout.write(chunk.choices[0]?.delta?.content || "");
 }
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.hf-inference.js
@@ -11,7 +11,7 @@ const chatCompletion = await client.chatCompletion({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 500,
+    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.together.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.together.js
@@ -11,7 +11,7 @@ const chatCompletion = await client.chatCompletion({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 500,
+    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.hf-inference.js
@@ -13,7 +13,7 @@ const chatCompletion = await client.chat.completions.create({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 500,
+    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.together.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.together.js
@@ -13,7 +13,7 @@ const chatCompletion = await client.chat.completions.create({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 500,
+    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.hf-inference.py
@@ -13,7 +13,7 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=500,
+    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.together.py
@@ -13,7 +13,7 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=500,
+    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/openai/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/openai/0.hf-inference.py
@@ -13,7 +13,7 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=500,
+    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/openai/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/openai/0.together.py
@@ -13,7 +13,7 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=500,
+    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/requests/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/requests/0.hf-inference.py
@@ -14,7 +14,7 @@ def query(payload):
             "content": "What is the capital of France?"
         }
     ],
-    "max_tokens": 500,
+    "max_tokens": 512,
     "model": "meta-llama/Llama-3.1-8B-Instruct"
 })
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/requests/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/requests/0.together.py
@@ -14,7 +14,7 @@ def query(payload):
             "content": "What is the capital of France?"
         }
     ],
-    "max_tokens": 500,
+    "max_tokens": 512,
     "model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>"
 })
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/sh/curl/0.hf-inference.sh b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/sh/curl/0.hf-inference.sh
@@ -8,7 +8,7 @@ curl https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-I
                 "content": "What is the capital of France?"
             }
         ],
-        "max_tokens": 500,
+        "max_tokens": 512,
         "model": "meta-llama/Llama-3.1-8B-Instruct",
         "stream": false
     }'
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/sh/curl/0.together.sh b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/sh/curl/0.together.sh
@@ -8,7 +8,7 @@ curl https://api.together.xyz/v1/chat/completions \
                 "content": "What is the capital of France?"
             }
         ],
-        "max_tokens": 500,
+        "max_tokens": 512,
         "model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>",
         "stream": false
     }'
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.hf-inference.js
@@ -13,7 +13,7 @@ const stream = await client.chatCompletionStream({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 500,
+    max_tokens: 512,
 });
 
 for await (const chunk of stream) {
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.together.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.together.js
@@ -13,7 +13,7 @@ const stream = await client.chatCompletionStream({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 500,
+    max_tokens: 512,
 });
 
 for await (const chunk of stream) {
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/openai/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/openai/0.hf-inference.js
@@ -5,24 +5,18 @@ const client = new OpenAI({
 	apiKey: "api_token",
 });
 
-let out = "";
-
 const stream = await client.chat.completions.create({
-    provider: "hf-inference",
     model: "meta-llama/Llama-3.1-8B-Instruct",
     messages: [
         {
             role: "user",
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 500,
+    max_tokens: 512,
+    stream: true,
 });
 
 for await (const chunk of stream) {
-	if (chunk.choices && chunk.choices.length > 0) {
-		const newContent = chunk.choices[0].delta.content;
-		out += newContent;
-		console.log(newContent);
-	}  
+    process.stdout.write(chunk.choices[0]?.delta?.content || "");
 }
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/openai/0.together.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/openai/0.together.js
@@ -5,24 +5,18 @@ const client = new OpenAI({
 	apiKey: "api_token",
 });
 
-let out = "";
-
 const stream = await client.chat.completions.create({
-    provider: "together",
-    model: "meta-llama/Llama-3.1-8B-Instruct",
+    model: "<together alias for meta-llama/Llama-3.1-8B-Instruct>",
     messages: [
         {
             role: "user",
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 500,
+    max_tokens: 512,
+    stream: true,
 });
 
 for await (const chunk of stream) {
-	if (chunk.choices && chunk.choices.length > 0) {
-		const newContent = chunk.choices[0].delta.content;
-		out += newContent;
-		console.log(newContent);
-	}  
+    process.stdout.write(chunk.choices[0]?.delta?.content || "");
 }
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.hf-inference.py
@@ -13,7 +13,7 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=500,
+    max_tokens=512,
     stream=True,
 )
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.together.py
@@ -13,7 +13,7 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=500,
+    max_tokens=512,
     stream=True,
 )
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/openai/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/openai/0.hf-inference.py
@@ -13,7 +13,7 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=500,
+    max_tokens=512,
     stream=True,
 )
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/openai/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/openai/0.together.py
@@ -13,7 +13,7 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=500,
+    max_tokens=512,
     stream=True,
 )
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/requests/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/requests/0.hf-inference.py
@@ -20,7 +20,7 @@ def query(payload):
             "content": "What is the capital of France?"
         }
     ],
-    "max_tokens": 500,
+    "max_tokens": 512,
     "model": "meta-llama/Llama-3.1-8B-Instruct",
     "stream": True,
 })
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/requests/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/requests/0.together.py
@@ -20,7 +20,7 @@ def query(payload):
             "content": "What is the capital of France?"
         }
     ],
-    "max_tokens": 500,
+    "max_tokens": 512,
     "model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>",
     "stream": True,
 })
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/sh/curl/0.hf-inference.sh b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/sh/curl/0.hf-inference.sh
@@ -8,7 +8,7 @@ curl https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-I
                 "content": "What is the capital of France?"
             }
         ],
-        "max_tokens": 500,
+        "max_tokens": 512,
         "model": "meta-llama/Llama-3.1-8B-Instruct",
         "stream": true
     }'
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/sh/curl/0.together.sh b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/sh/curl/0.together.sh
@@ -8,7 +8,7 @@ curl https://api.together.xyz/v1/chat/completions \
                 "content": "What is the capital of France?"
             }
         ],
-        "max_tokens": 500,
+        "max_tokens": 512,
         "model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>",
         "stream": true
     }'
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.fireworks-ai.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.fireworks-ai.js
@@ -22,7 +22,7 @@ const chatCompletion = await client.chatCompletion({
             ],
         },
     ],
-    max_tokens: 500,
+    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.hf-inference.js
@@ -22,7 +22,7 @@ const chatCompletion = await client.chatCompletion({
             ],
         },
     ],
-    max_tokens: 500,
+    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/openai/0.fireworks-ai.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/openai/0.fireworks-ai.js
@@ -24,7 +24,7 @@ const chatCompletion = await client.chat.completions.create({
             ],
         },
     ],
-    max_tokens: 500,
+    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/openai/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/openai/0.hf-inference.js
@@ -24,7 +24,7 @@ const chatCompletion = await client.chat.completions.create({
             ],
         },
     ],
-    max_tokens: 500,
+    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.fireworks-ai.py
@@ -24,7 +24,7 @@
             ]
         }
     ],
-    max_tokens=500,
+    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.hf-inference.py
@@ -24,7 +24,7 @@
             ]
         }
     ],
-    max_tokens=500,
+    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/openai/0.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/openai/0.fireworks-ai.py
@@ -24,7 +24,7 @@
             ]
         }
     ],
-    max_tokens=500,
+    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/openai/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/openai/0.hf-inference.py
@@ -24,7 +24,7 @@
             ]
         }
     ],
-    max_tokens=500,
+    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/requests/0.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/requests/0.fireworks-ai.py
@@ -25,7 +25,7 @@ def query(payload):
             ]
         }
     ],
-    "max_tokens": 500,
+    "max_tokens": 512,
     "model": "<fireworks-ai alias for meta-llama/Llama-3.2-11B-Vision-Instruct>"
 })
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/requests/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/requests/0.hf-inference.py
@@ -25,7 +25,7 @@ def query(payload):
             ]
         }
     ],
-    "max_tokens": 500,
+    "max_tokens": 512,
     "model": "meta-llama/Llama-3.2-11B-Vision-Instruct"
 })
 
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/sh/curl/0.fireworks-ai.sh b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/sh/curl/0.fireworks-ai.sh
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/sh/curl/0.hf-inference.sh b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/sh/curl/0.hf-inference.sh
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/huggingface.js/0.fireworks-ai.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/huggingface.js/0.fireworks-ai.js
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/huggingface.js/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/huggingface.js/0.hf-inference.js
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/openai/0.fireworks-ai.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/openai/0.fireworks-ai.js
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/openai/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/openai/0.hf-inference.js
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/huggingface_hub/0.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/huggingface_hub/0.fireworks-ai.py
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/huggingface_hub/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/huggingface_hub/0.hf-inference.py
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/openai/0.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/openai/0.fireworks-ai.py
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/openai/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/openai/0.hf-inference.py
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/requests/0.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/requests/0.fireworks-ai.py
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/requests/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/requests/0.hf-inference.py
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/sh/curl/0.fireworks-ai.sh b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/sh/curl/0.fireworks-ai.sh
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/sh/curl/0.hf-inference.sh b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/sh/curl/0.hf-inference.sh

Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,7 @@`
`13`	`13`	`"content": "What is the capital of France?"`
`14`	`14`	`}`
`15`	`15`	`],`
`16`		`- max_tokens=500,`
	`16`	`+ max_tokens=512,`
`17`	`17`	`)`
`18`	`18`
`19`	`19`	`print(completion.choices[0].message)`
Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,7 @@ def query(payload):`
`14`	`14`	`"content": "What is the capital of France?"`
`15`	`15`	`}`
`16`	`16`	`],`
`17`		`- "max_tokens": 500,`
	`17`	`+ "max_tokens": 512,`
`18`	`18`	`"model": "meta-llama/Llama-3.1-8B-Instruct"`
`19`	`19`	`})`
`20`	`20`
Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@ curl https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-I`
`8`	`8`	`"content": "What is the capital of France?"`
`9`	`9`	`}`
`10`	`10`	`],`
`11`		`- "max_tokens": 500,`
	`11`	`+ "max_tokens": 512,`
`12`	`12`	`"model": "meta-llama/Llama-3.1-8B-Instruct",`
`13`	`13`	`"stream": false`
`14`	`14`	`}'`
Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@ curl https://api.together.xyz/v1/chat/completions \`
`8`	`8`	`"content": "What is the capital of France?"`
`9`	`9`	`}`
`10`	`10`	`],`
`11`		`- "max_tokens": 500,`
	`11`	`+ "max_tokens": 512,`
`12`	`12`	`"model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>",`
`13`	`13`	`"stream": false`
`14`	`14`	`}'`
Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@ def query(payload):`
`20`	`20`	`"content": "What is the capital of France?"`
`21`	`21`	`}`
`22`	`22`	`],`
`23`		`- "max_tokens": 500,`
	`23`	`+ "max_tokens": 512,`
`24`	`24`	`"model": "meta-llama/Llama-3.1-8B-Instruct",`
`25`	`25`	`"stream": True,`
`26`	`26`	`})`
Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@`
`24`	`24`	`]`
`25`	`25`	`}`
`26`	`26`	`],`
`27`		`- max_tokens=500,`
	`27`	`+ max_tokens=512,`
`28`	`28`	`)`
`29`	`29`
`30`	`30`	`print(completion.choices[0].message)`
Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,7 @@ def query(payload):`
`25`	`25`	`]`
`26`	`26`	`}`
`27`	`27`	`],`
`28`		`- "max_tokens": 500,`
	`28`	`+ "max_tokens": 512,`
`29`	`29`	`"model": "<fireworks-ai alias for meta-llama/Llama-3.2-11B-Vision-Instruct>"`
`30`	`30`	`})`
`31`	`31`