huggingface · Wauplin · Apr 30, 2025 · Apr 30, 2025
@@ -272,7 +272,7 @@ const prepareConversationalInput = (
 	return {
 		messages: opts?.messages ?? getModelInputSnippet(model),
 		...(opts?.temperature ? { temperature: opts?.temperature } : undefined),
-		max_tokens: opts?.max_tokens ?? 512,
+		...(opts?.max_tokens ? { max_tokens: opts?.max_tokens } : undefined),
 		...(opts?.top_p ? { top_p: opts?.top_p } : undefined),
 	};
 };

diff --git a/packages/tasks-gen/snippets-fixtures/bill-to-param/js/huggingface.js/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/bill-to-param/js/huggingface.js/0.hf-inference.js
@@ -11,7 +11,6 @@ const chatCompletion = await client.chatCompletion({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
 }, {
     billTo: "huggingface",
 });

diff --git a/packages/tasks-gen/snippets-fixtures/bill-to-param/js/openai/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/bill-to-param/js/openai/0.hf-inference.js
@@ -16,7 +16,6 @@ const chatCompletion = await client.chat.completions.create({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/bill-to-param/python/huggingface_hub/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/bill-to-param/python/huggingface_hub/0.hf-inference.py
@@ -14,7 +14,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/bill-to-param/python/openai/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/bill-to-param/python/openai/0.hf-inference.py
@@ -16,7 +16,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/bill-to-param/python/requests/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/bill-to-param/python/requests/0.hf-inference.py
@@ -17,7 +17,6 @@ def query(payload):
             "content": "What is the capital of France?"
         }
     ],
-    "max_tokens": 512,
     "model": "meta-llama/Llama-3.1-8B-Instruct"
 })
 

diff --git a/packages/tasks-gen/snippets-fixtures/bill-to-param/sh/curl/0.hf-inference.sh b/packages/tasks-gen/snippets-fixtures/bill-to-param/sh/curl/0.hf-inference.sh
@@ -9,7 +9,6 @@ curl https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-I
                 "content": "What is the capital of France?"
             }
         ],
-        "max_tokens": 512,
         "model": "meta-llama/Llama-3.1-8B-Instruct",
         "stream": false
     }'
diff --git a/...s-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.hf-inference.js b/...s-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.hf-inference.js
@@ -11,7 +11,6 @@ const chatCompletion = await client.chatCompletion({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/...tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.together.js b/...tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/huggingface.js/0.together.js
@@ -11,7 +11,6 @@ const chatCompletion = await client.chatCompletion({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/...ges/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.hf-inference.js b/...ges/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.hf-inference.js
@@ -13,7 +13,6 @@ const chatCompletion = await client.chat.completions.create({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.together.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/js/openai/0.together.js
@@ -13,7 +13,6 @@ const chatCompletion = await client.chat.completions.create({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/.../snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.hf-inference.py b/.../snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.hf-inference.py
@@ -13,7 +13,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/...-gen/snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.together.py b/...-gen/snippets-fixtures/conversational-llm-non-stream/python/huggingface_hub/0.together.py
@@ -13,7 +13,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/...tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/openai/0.hf-inference.py b/...tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/openai/0.hf-inference.py
@@ -13,7 +13,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/...ges/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/openai/0.together.py b/...ges/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/openai/0.together.py
@@ -13,7 +13,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/...sks-gen/snippets-fixtures/conversational-llm-non-stream/python/requests/0.hf-inference.py b/...sks-gen/snippets-fixtures/conversational-llm-non-stream/python/requests/0.hf-inference.py
@@ -16,7 +16,6 @@ def query(payload):
             "content": "What is the capital of France?"
         }
     ],
-    "max_tokens": 512,
     "model": "meta-llama/Llama-3.1-8B-Instruct"
 })
 

diff --git a/...s/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/requests/0.together.py b/...s/tasks-gen/snippets-fixtures/conversational-llm-non-stream/python/requests/0.together.py
@@ -16,7 +16,6 @@ def query(payload):
             "content": "What is the capital of France?"
         }
     ],
-    "max_tokens": 512,
     "model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>"
 })
 

diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/sh/curl/0.hf-inference.sh b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/sh/curl/0.hf-inference.sh
@@ -8,7 +8,6 @@ curl https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-I
                 "content": "What is the capital of France?"
             }
         ],
-        "max_tokens": 512,
         "model": "meta-llama/Llama-3.1-8B-Instruct",
         "stream": false
     }'
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/sh/curl/0.together.sh b/packages/tasks-gen/snippets-fixtures/conversational-llm-non-stream/sh/curl/0.together.sh
@@ -8,7 +8,6 @@ curl https://api.together.xyz/v1/chat/completions \
                 "content": "What is the capital of France?"
             }
         ],
-        "max_tokens": 512,
         "model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>",
         "stream": false
     }'
diff --git a/...tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.hf-inference.js b/...tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.hf-inference.js
@@ -13,7 +13,6 @@ const stream = client.chatCompletionStream({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
 });
 
 for await (const chunk of stream) {

diff --git a/...ges/tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.together.js b/...ges/tasks-gen/snippets-fixtures/conversational-llm-stream/js/huggingface.js/0.together.js
@@ -13,7 +13,6 @@ const stream = client.chatCompletionStream({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
 });
 
 for await (const chunk of stream) {

diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/openai/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/openai/0.hf-inference.js
@@ -13,7 +13,6 @@ const stream = await client.chat.completions.create({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
     stream: true,
 });
 

diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/openai/0.together.js b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/js/openai/0.together.js
@@ -13,7 +13,6 @@ const stream = await client.chat.completions.create({
             content: "What is the capital of France?",
         },
     ],
-    max_tokens: 512,
     stream: true,
 });
 

diff --git a/...-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.hf-inference.py b/...-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.hf-inference.py
@@ -13,7 +13,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
     stream=True,
 )
 

diff --git a/...asks-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.together.py b/...asks-gen/snippets-fixtures/conversational-llm-stream/python/huggingface_hub/0.together.py
@@ -13,7 +13,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
     stream=True,
 )
 

diff --git a/...ges/tasks-gen/snippets-fixtures/conversational-llm-stream/python/openai/0.hf-inference.py b/...ges/tasks-gen/snippets-fixtures/conversational-llm-stream/python/openai/0.hf-inference.py
@@ -13,7 +13,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
     stream=True,
 )
 

diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/openai/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/openai/0.together.py
@@ -13,7 +13,6 @@
             "content": "What is the capital of France?"
         }
     ],
-    max_tokens=512,
     stream=True,
 )
 

diff --git a/...s/tasks-gen/snippets-fixtures/conversational-llm-stream/python/requests/0.hf-inference.py b/...s/tasks-gen/snippets-fixtures/conversational-llm-stream/python/requests/0.hf-inference.py
@@ -22,7 +22,6 @@ def query(payload):
             "content": "What is the capital of France?"
         }
     ],
-    "max_tokens": 512,
     "model": "meta-llama/Llama-3.1-8B-Instruct",
     "stream": True,
 })

diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/requests/0.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/python/requests/0.together.py
@@ -22,7 +22,6 @@ def query(payload):
             "content": "What is the capital of France?"
         }
     ],
-    "max_tokens": 512,
     "model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>",
     "stream": True,
 })

diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/sh/curl/0.hf-inference.sh b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/sh/curl/0.hf-inference.sh
@@ -8,7 +8,6 @@ curl https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-I
                 "content": "What is the capital of France?"
             }
         ],
-        "max_tokens": 512,
         "model": "meta-llama/Llama-3.1-8B-Instruct",
         "stream": true
     }'
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/sh/curl/0.together.sh b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/sh/curl/0.together.sh
@@ -8,7 +8,6 @@ curl https://api.together.xyz/v1/chat/completions \
                 "content": "What is the capital of France?"
             }
         ],
-        "max_tokens": 512,
         "model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>",
         "stream": true
     }'
diff --git a/...s-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.fireworks-ai.js b/...s-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.fireworks-ai.js
@@ -22,7 +22,6 @@ const chatCompletion = await client.chatCompletion({
             ],
         },
     ],
-    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/...s-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.hf-inference.js b/...s-gen/snippets-fixtures/conversational-vlm-non-stream/js/huggingface.js/0.hf-inference.js
@@ -22,7 +22,6 @@ const chatCompletion = await client.chatCompletion({
             ],
         },
     ],
-    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/...ges/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/openai/0.fireworks-ai.js b/...ges/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/openai/0.fireworks-ai.js
@@ -24,7 +24,6 @@ const chatCompletion = await client.chat.completions.create({
             ],
         },
     ],
-    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/...ges/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/openai/0.hf-inference.js b/...ges/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/js/openai/0.hf-inference.js
@@ -24,7 +24,6 @@ const chatCompletion = await client.chat.completions.create({
             ],
         },
     ],
-    max_tokens: 512,
 });
 
 console.log(chatCompletion.choices[0].message);
diff --git a/.../snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.fireworks-ai.py b/.../snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.fireworks-ai.py
@@ -24,7 +24,6 @@
             ]
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/.../snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.hf-inference.py b/.../snippets-fixtures/conversational-vlm-non-stream/python/huggingface_hub/0.hf-inference.py
@@ -24,7 +24,6 @@
             ]
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/...tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/openai/0.fireworks-ai.py b/...tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/openai/0.fireworks-ai.py
@@ -24,7 +24,6 @@
             ]
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/...tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/openai/0.hf-inference.py b/...tasks-gen/snippets-fixtures/conversational-vlm-non-stream/python/openai/0.hf-inference.py
@@ -24,7 +24,6 @@
             ]
         }
     ],
-    max_tokens=512,
 )
 
 print(completion.choices[0].message)
diff --git a/...sks-gen/snippets-fixtures/conversational-vlm-non-stream/python/requests/0.fireworks-ai.py b/...sks-gen/snippets-fixtures/conversational-vlm-non-stream/python/requests/0.fireworks-ai.py
@@ -27,7 +27,6 @@ def query(payload):
             ]
         }
     ],
-    "max_tokens": 512,
     "model": "<fireworks-ai alias for meta-llama/Llama-3.2-11B-Vision-Instruct>"
 })
 

diff --git a/...sks-gen/snippets-fixtures/conversational-vlm-non-stream/python/requests/0.hf-inference.py b/...sks-gen/snippets-fixtures/conversational-vlm-non-stream/python/requests/0.hf-inference.py
@@ -27,7 +27,6 @@ def query(payload):
             ]
         }
     ],
-    "max_tokens": 512,
     "model": "meta-llama/Llama-3.2-11B-Vision-Instruct"
 })
 

diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/sh/curl/0.fireworks-ai.sh b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/sh/curl/0.fireworks-ai.sh
@@ -19,7 +19,6 @@ curl https://api.fireworks.ai/inference/v1/chat/completions \
                 ]
             }
         ],
-        "max_tokens": 512,
         "model": "<fireworks-ai alias for meta-llama/Llama-3.2-11B-Vision-Instruct>",
         "stream": false
     }'
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/sh/curl/0.hf-inference.sh b/packages/tasks-gen/snippets-fixtures/conversational-vlm-non-stream/sh/curl/0.hf-inference.sh
@@ -19,7 +19,6 @@ curl https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.2-11B-
                 ]
             }
         ],
-        "max_tokens": 512,
         "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
         "stream": false
     }'
diff --git a/...tasks-gen/snippets-fixtures/conversational-vlm-stream/js/huggingface.js/0.fireworks-ai.js b/...tasks-gen/snippets-fixtures/conversational-vlm-stream/js/huggingface.js/0.fireworks-ai.js
@@ -24,7 +24,6 @@ const stream = client.chatCompletionStream({
             ],
         },
     ],
-    max_tokens: 512,
 });
 
 for await (const chunk of stream) {

diff --git a/...tasks-gen/snippets-fixtures/conversational-vlm-stream/js/huggingface.js/0.hf-inference.js b/...tasks-gen/snippets-fixtures/conversational-vlm-stream/js/huggingface.js/0.hf-inference.js
@@ -24,7 +24,6 @@ const stream = client.chatCompletionStream({
             ],
         },
     ],
-    max_tokens: 512,
 });
 
 for await (const chunk of stream) {

diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/openai/0.fireworks-ai.js b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/js/openai/0.fireworks-ai.js
@@ -24,7 +24,6 @@ const stream = await client.chat.completions.create({
             ],
         },
     ],
-    max_tokens: 512,
     stream: true,
 });
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,7 +11,6 @@ const chatCompletion = await client.chatCompletion({ @@
                 content: "What is the capital of France?",
             },
         ],
-        max_tokens: 512,
     }, {
         billTo: "huggingface",
     });
@@ Expand Down @@