Add conversational snippets for requests

Wauplin · Wauplin · commit c26b0bbb3650 · 2025-03-13T10:16:36.000+01:00
diff --git a/packages/inference/src/snippets/python.ts b/packages/inference/src/snippets/python.ts
@@ -22,6 +22,7 @@ interface TemplateParams {
 	providerModelId?: string;
 	methodName?: string; // specific to snippetBasic
 	importBase64?: boolean; // specific to snippetImportRequests
+	importJson?: boolean; // specific to snippetImportRequests
 }
 
 // Helpers to find + load templates
@@ -157,6 +158,7 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
 				const importSection = snippetImportRequests({
 					...params,
 					importBase64: snippet.includes("base64"),
+					importJson: snippet.includes("json."),
 				});
 				snippet = `${importSection}\n\n${snippet}`;
 			}
diff --git a/packages/inference/src/snippets/templates/python/openai/conversationalStream.jinja b/packages/inference/src/snippets/templates/python/openai/conversationalStream.jinja
@@ -12,4 +12,4 @@ stream = client.chat.completions.create(
 )
 
 for chunk in stream:
-    print(chunk.choices[0].delta.content, end="") 
+    print(chunk.choices[0].delta.content, end="")
diff --git a/packages/inference/src/snippets/templates/python/requests/conversational.jinja b/packages/inference/src/snippets/templates/python/requests/conversational.jinja
@@ -7,4 +7,4 @@ response = query({
 {{ inputs.asJsonString }}
 })
 
-print(response["choices"][0]["message"]) 
+print(response["choices"][0]["message"])
diff --git a/packages/inference/src/snippets/templates/python/requests/conversationalStream.jinja b/packages/inference/src/snippets/templates/python/requests/conversationalStream.jinja
@@ -0,0 +1,15 @@
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload, stream=True)
+    for line in response.iter_lines():
+        if not line.startswith(b"data:"):
+            continue
+        if line.strip() == b"data: [DONE]":
+            return
+        yield json.loads(line.decode("utf-8").lstrip("data:").rstrip("/n"))
+
+for chunk in query({
+    "model": "{{ providerModelId }}",
+{{ inputs.asJsonString }},
+    "stream": True,
+}):
+    print(chunk["choices"][0]["delta"]["content"], end="")
diff --git a/packages/inference/src/snippets/templates/python/requests/importRequests.jinja b/packages/inference/src/snippets/templates/python/requests/importRequests.jinja
@@ -1,6 +1,9 @@
 {% if importBase64 %}
 import base64
 {% endif %}
+{% if importJson %}
+import json
+{% endif %}
 import requests
 
 API_URL = "{{ fullUrl }}"
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/1.requests.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/1.requests.hf-inference.py
@@ -0,0 +1,27 @@
+import json
+import requests
+
+API_URL = "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-Instruct/v1/chat/completions"
+headers = {"Authorization": "Bearer api_token"}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload, stream=True)
+    for line in response.iter_lines():
+        if not line.startswith(b"data:"):
+            continue
+        if line.strip() == b"data: [DONE]":
+            return
+        yield json.loads(line.decode("utf-8").lstrip("data:").rstrip("/n"))
+
+for chunk in query({
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "messages": [
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+    "max_tokens": 500,
+    "stream": True,
+}):
+    print(chunk["choices"][0]["delta"]["content"], end="")
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/1.requests.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/1.requests.together.py
@@ -0,0 +1,27 @@
+import json
+import requests
+
+API_URL = "https://api.together.xyz/v1/chat/completions"
+headers = {"Authorization": "Bearer api_token"}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload, stream=True)
+    for line in response.iter_lines():
+        if not line.startswith(b"data:"):
+            continue
+        if line.strip() == b"data: [DONE]":
+            return
+        yield json.loads(line.decode("utf-8").lstrip("data:").rstrip("/n"))
+
+for chunk in query({
+    "model": "<together alias for meta-llama/Llama-3.1-8B-Instruct>",
+    "messages": [
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+    "max_tokens": 500,
+    "stream": True,
+}):
+    print(chunk["choices"][0]["delta"]["content"], end="")
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/2.openai.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/2.openai.hf-inference.py
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/2.openai.together.py b/packages/tasks-gen/snippets-fixtures/conversational-llm-stream/2.openai.together.py
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/1.requests.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/1.requests.fireworks-ai.py
@@ -0,0 +1,38 @@
+import json
+import requests
+
+API_URL = "https://api.fireworks.ai/inference/v1/chat/completions"
+headers = {"Authorization": "Bearer api_token"}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload, stream=True)
+    for line in response.iter_lines():
+        if not line.startswith(b"data:"):
+            continue
+        if line.strip() == b"data: [DONE]":
+            return
+        yield json.loads(line.decode("utf-8").lstrip("data:").rstrip("/n"))
+
+for chunk in query({
+    "model": "<fireworks-ai alias for meta-llama/Llama-3.2-11B-Vision-Instruct>",
+    "messages": [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    "max_tokens": 500,
+    "stream": True,
+}):
+    print(chunk["choices"][0]["delta"]["content"], end="")
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/1.requests.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/1.requests.hf-inference.py
@@ -0,0 +1,38 @@
+import json
+import requests
+
+API_URL = "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1/chat/completions"
+headers = {"Authorization": "Bearer api_token"}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload, stream=True)
+    for line in response.iter_lines():
+        if not line.startswith(b"data:"):
+            continue
+        if line.strip() == b"data: [DONE]":
+            return
+        yield json.loads(line.decode("utf-8").lstrip("data:").rstrip("/n"))
+
+for chunk in query({
+    "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
+    "messages": [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    "max_tokens": 500,
+    "stream": True,
+}):
+    print(chunk["choices"][0]["delta"]["content"], end="")
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/2.openai.fireworks-ai.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/2.openai.fireworks-ai.py
diff --git a/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/2.openai.hf-inference.py b/packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/2.openai.hf-inference.py

Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,7 @@ interface TemplateParams {`
`22`	`22`	`providerModelId?: string;`
`23`	`23`	`methodName?: string; // specific to snippetBasic`
`24`	`24`	`importBase64?: boolean; // specific to snippetImportRequests`
	`25`	`+ importJson?: boolean; // specific to snippetImportRequests`
`25`	`26`	`}`
`26`	`27`
`27`	`28`	`// Helpers to find + load templates`
`@@ -157,6 +158,7 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar`
`157`	`158`	`const importSection = snippetImportRequests({`
`158`	`159`	`...params,`
`159`	`160`	`importBase64: snippet.includes("base64"),`
	`161`	`+ importJson: snippet.includes("json."),`
`160`	`162`	`});`
`161`	`163`	snippet = `${importSection}\n\n${snippet}`;
`162`	`164`	`}`
Original file line number	Diff line number	Diff line change
`@@ -12,4 +12,4 @@ stream = client.chat.completions.create(`
`12`	`12`	`)`
`13`	`13`
`14`	`14`	`for chunk in stream:`
`15`		`- print(chunk.choices[0].delta.content, end="")`
	`15`	`+ print(chunk.choices[0].delta.content, end="")`