openai · gbmarc1 · Jun 5, 2025
@@ -100,8 +100,8 @@ def create(
             "/audio/speech",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "input": input,
-                    "model": model,
                     "voice": voice,
                     "instructions": instructions,
                     "response_format": response_format,
@@ -191,8 +191,8 @@ async def create(
             "/audio/speech",
             body=await async_maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "input": input,
-                    "model": model,
                     "voice": voice,
                     "instructions": instructions,
                     "response_format": response_format,

@@ -313,8 +313,8 @@ def create(
     ) -> str | Transcription | TranscriptionVerbose | Stream[TranscriptionStreamEvent]:
         body = deepcopy_minimal(
             {
+                "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                 "file": file,
-                "model": model,
                 "chunking_strategy": chunking_strategy,
                 "include": include,
                 "language": language,
@@ -692,8 +692,8 @@ async def create(
     ) -> Transcription | TranscriptionVerbose | str | AsyncStream[TranscriptionStreamEvent]:
         body = deepcopy_minimal(
             {
+                "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                 "file": file,
-                "model": model,
                 "chunking_strategy": chunking_strategy,
                 "include": include,
                 "language": language,

@@ -146,8 +146,8 @@ def create(
         """
         body = deepcopy_minimal(
             {
+                "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                 "file": file,
-                "model": model,
                 "prompt": prompt,
                 "response_format": response_format,
                 "temperature": temperature,
@@ -289,8 +289,8 @@ async def create(
         """
         body = deepcopy_minimal(
             {
+                "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                 "file": file,
-                "model": model,
                 "prompt": prompt,
                 "response_format": response_format,
                 "temperature": temperature,

@@ -156,7 +156,7 @@ def create(
             "/assistants",
             body=maybe_transform(
                 {
-                    "model": model,
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "description": description,
                     "instructions": instructions,
                     "metadata": metadata,
@@ -360,10 +360,10 @@ def update(
             f"/assistants/{assistant_id}",
             body=maybe_transform(
                 {
+                    "model": model, # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "description": description,
                     "instructions": instructions,
                     "metadata": metadata,
-                    "model": model,
                     "name": name,
                     "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
@@ -605,7 +605,7 @@ async def create(
             "/assistants",
             body=await async_maybe_transform(
                 {
-                    "model": model,
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "description": description,
                     "instructions": instructions,
                     "metadata": metadata,
@@ -809,10 +809,10 @@ async def update(
             f"/assistants/{assistant_id}",
             body=await async_maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "description": description,
                     "instructions": instructions,
                     "metadata": metadata,
-                    "model": model,
                     "name": name,
                     "reasoning_effort": reasoning_effort,
                     "response_format": response_format,

@@ -159,8 +159,8 @@ def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseForma
             "/chat/completions",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "messages": messages,
-                    "model": model,
                     "audio": audio,
                     "frequency_penalty": frequency_penalty,
                     "function_call": function_call,
@@ -438,8 +438,8 @@ def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseForma
             "/chat/completions",
             body=await async_maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "messages": messages,
-                    "model": model,
                     "audio": audio,
                     "frequency_penalty": frequency_penalty,
                     "function_call": function_call,

@@ -179,14 +179,14 @@ def create(
             "/realtime/sessions",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "client_secret": client_secret,
                     "input_audio_format": input_audio_format,
                     "input_audio_noise_reduction": input_audio_noise_reduction,
                     "input_audio_transcription": input_audio_transcription,
                     "instructions": instructions,
                     "max_response_output_tokens": max_response_output_tokens,
                     "modalities": modalities,
-                    "model": model,
                     "output_audio_format": output_audio_format,
                     "speed": speed,
                     "temperature": temperature,
@@ -364,14 +364,14 @@ async def create(
             "/realtime/sessions",
             body=await async_maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "client_secret": client_secret,
                     "input_audio_format": input_audio_format,
                     "input_audio_noise_reduction": input_audio_noise_reduction,
                     "input_audio_transcription": input_audio_transcription,
                     "instructions": instructions,
                     "max_response_output_tokens": max_response_output_tokens,
                     "modalities": modalities,
-                    "model": model,
                     "output_audio_format": output_audio_format,
                     "speed": speed,
                     "temperature": temperature,

@@ -573,14 +573,14 @@ def create(
             f"/threads/{thread_id}/runs",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "parallel_tool_calls": parallel_tool_calls,
                     "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
@@ -976,14 +976,14 @@ def create_and_stream(
             f"/threads/{thread_id}/runs",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "response_format": response_format,
                     "temperature": temperature,
                     "tool_choice": tool_choice,
@@ -1163,14 +1163,14 @@ def stream(
             f"/threads/{thread_id}/runs",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "response_format": response_format,
                     "temperature": temperature,
                     "tool_choice": tool_choice,
@@ -2007,14 +2007,14 @@ async def create(
             f"/threads/{thread_id}/runs",
             body=await async_maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "parallel_tool_calls": parallel_tool_calls,
                     "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
@@ -2409,14 +2409,14 @@ def create_and_stream(
             f"/threads/{thread_id}/runs",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "response_format": response_format,
                     "temperature": temperature,
                     "tool_choice": tool_choice,
@@ -2596,14 +2596,14 @@ def stream(
             f"/threads/{thread_id}/runs",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "response_format": response_format,
                     "temperature": temperature,
                     "tool_choice": tool_choice,

@@ -707,12 +707,12 @@ def create_and_run(
             "/threads/runs",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "instructions": instructions,
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "parallel_tool_calls": parallel_tool_calls,
                     "response_format": response_format,
                     "stream": stream,
@@ -888,12 +888,12 @@ def create_and_run_stream(
             "/threads/runs",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "instructions": instructions,
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "parallel_tool_calls": parallel_tool_calls,
                     "response_format": response_format,
                     "temperature": temperature,
@@ -1565,12 +1565,12 @@ async def create_and_run(
             "/threads/runs",
             body=await async_maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "instructions": instructions,
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "parallel_tool_calls": parallel_tool_calls,
                     "response_format": response_format,
                     "stream": stream,
@@ -1750,12 +1750,12 @@ def create_and_run_stream(
             "/threads/runs",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "assistant_id": assistant_id,
                     "instructions": instructions,
                     "max_completion_tokens": max_completion_tokens,
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
-                    "model": model,
                     "parallel_tool_calls": parallel_tool_calls,
                     "response_format": response_format,
                     "temperature": temperature,

@@ -926,8 +926,8 @@ def create(
             "/chat/completions",
             body=maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "messages": messages,
-                    "model": model,
                     "audio": audio,
                     "frequency_penalty": frequency_penalty,
                     "function_call": function_call,
@@ -2029,8 +2029,8 @@ async def create(
             "/chat/completions",
             body=await async_maybe_transform(
                 {
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "messages": messages,
-                    "model": model,
                     "audio": audio,
                     "frequency_penalty": frequency_penalty,
                     "function_call": function_call,

@@ -542,7 +542,7 @@ def create(
             "/completions",
             body=maybe_transform(
                 {
-                    "model": model,
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "prompt": prompt,
                     "best_of": best_of,
                     "echo": echo,
@@ -1092,7 +1092,7 @@ async def create(
             "/completions",
             body=await async_maybe_transform(
                 {
-                    "model": model,
+                    "model": model,  # Always set model as the first field in the payload. In some proxies, this is used for routing. We don't want to read all messages specifically big ones for routing.
                     "prompt": prompt,
                     "best_of": best_of,
                     "echo": echo,