feat(api): Add tools and structured outputs to evals

stainless-app[bot] · stainless-app[bot] · commit 002cc7bb3c31 · 2025-06-09T15:47:29.000Z
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 111
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-0205acb1015d29b2312a48526734c0399f93026d4fe2dff5c7768f566e333fd2.yml
-openapi_spec_hash: 1772cc9056c2f6dfb2a4e9cb77ee6343
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-4865dda2b62927bd141cbc85f81be3d88602f103e2c581e15eb1caded3e3aaa2.yml
+openapi_spec_hash: 7d14a9b23ef4ac93ea46d629601b6f6b
 config_hash: ed1e6b3c5f93d12b80d31167f55c557c
diff --git a/src/openai/types/chat/__init__.py b/src/openai/types/chat/__init__.py
@@ -4,6 +4,7 @@
 
 from .chat_completion import ChatCompletion as ChatCompletion
 from .chat_completion_role import ChatCompletionRole as ChatCompletionRole
+from .chat_completion_tool import ChatCompletionTool as ChatCompletionTool
 from .chat_completion_audio import ChatCompletionAudio as ChatCompletionAudio
 from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk
 from .completion_list_params import CompletionListParams as CompletionListParams
diff --git a/src/openai/types/chat/chat_completion_tool.py b/src/openai/types/chat/chat_completion_tool.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from ..shared.function_definition import FunctionDefinition
+
+__all__ = ["ChatCompletionTool"]
+
+
+class ChatCompletionTool(BaseModel):
+    function: FunctionDefinition
+
+    type: Literal["function"]
+    """The type of the tool. Currently, only `function` is supported."""
diff --git a/src/openai/types/evals/create_eval_completions_run_data_source.py b/src/openai/types/evals/create_eval_completions_run_data_source.py
@@ -6,8 +6,12 @@
 from ..._utils import PropertyInfo
 from ..._models import BaseModel
 from ..shared.metadata import Metadata
+from ..chat.chat_completion_tool import ChatCompletionTool
+from ..shared.response_format_text import ResponseFormatText
 from ..responses.easy_input_message import EasyInputMessage
 from ..responses.response_input_text import ResponseInputText
+from ..shared.response_format_json_object import ResponseFormatJSONObject
+from ..shared.response_format_json_schema import ResponseFormatJSONSchema
 
 __all__ = [
     "CreateEvalCompletionsRunDataSource",
@@ -24,6 +28,7 @@
     "InputMessagesTemplateTemplateMessageContentOutputText",
     "InputMessagesItemReference",
     "SamplingParams",
+    "SamplingParamsResponseFormat",
 ]
 
 
@@ -136,17 +141,40 @@ class InputMessagesItemReference(BaseModel):
     Union[InputMessagesTemplate, InputMessagesItemReference], PropertyInfo(discriminator="type")
 ]
 
+SamplingParamsResponseFormat: TypeAlias = Union[ResponseFormatText, ResponseFormatJSONSchema, ResponseFormatJSONObject]
+
 
 class SamplingParams(BaseModel):
     max_completion_tokens: Optional[int] = None
     """The maximum number of tokens in the generated output."""
 
+    response_format: Optional[SamplingParamsResponseFormat] = None
+    """An object specifying the format that the model must output.
+
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
     seed: Optional[int] = None
     """A seed value to initialize the randomness, during sampling."""
 
     temperature: Optional[float] = None
     """A higher temperature increases randomness in the outputs."""
 
+    tools: Optional[List[ChatCompletionTool]] = None
+    """A list of tools the model may call.
+
+    Currently, only functions are supported as a tool. Use this to provide a list of
+    functions the model may generate JSON inputs for. A max of 128 functions are
+    supported.
+    """
+
     top_p: Optional[float] = None
     """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
 
diff --git a/src/openai/types/evals/create_eval_completions_run_data_source_param.py b/src/openai/types/evals/create_eval_completions_run_data_source_param.py
@@ -6,8 +6,12 @@
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from ..shared_params.metadata import Metadata
+from ..chat.chat_completion_tool_param import ChatCompletionToolParam
 from ..responses.easy_input_message_param import EasyInputMessageParam
+from ..shared_params.response_format_text import ResponseFormatText
 from ..responses.response_input_text_param import ResponseInputTextParam
+from ..shared_params.response_format_json_object import ResponseFormatJSONObject
+from ..shared_params.response_format_json_schema import ResponseFormatJSONSchema
 
 __all__ = [
     "CreateEvalCompletionsRunDataSourceParam",
@@ -24,6 +28,7 @@
     "InputMessagesTemplateTemplateMessageContentOutputText",
     "InputMessagesItemReference",
     "SamplingParams",
+    "SamplingParamsResponseFormat",
 ]
 
 
@@ -130,17 +135,40 @@ class InputMessagesItemReference(TypedDict, total=False):
 
 InputMessages: TypeAlias = Union[InputMessagesTemplate, InputMessagesItemReference]
 
+SamplingParamsResponseFormat: TypeAlias = Union[ResponseFormatText, ResponseFormatJSONSchema, ResponseFormatJSONObject]
+
 
 class SamplingParams(TypedDict, total=False):
     max_completion_tokens: int
     """The maximum number of tokens in the generated output."""
 
+    response_format: SamplingParamsResponseFormat
+    """An object specifying the format that the model must output.
+
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
     seed: int
     """A seed value to initialize the randomness, during sampling."""
 
     temperature: float
     """A higher temperature increases randomness in the outputs."""
 
+    tools: Iterable[ChatCompletionToolParam]
+    """A list of tools the model may call.
+
+    Currently, only functions are supported as a tool. Use this to provide a list of
+    functions the model may generate JSON inputs for. A max of 128 functions are
+    supported.
+    """
+
     top_p: float
     """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
 
diff --git a/src/openai/types/evals/run_cancel_response.py b/src/openai/types/evals/run_cancel_response.py
@@ -8,10 +8,12 @@
 from ..._utils import PropertyInfo
 from ..._models import BaseModel
 from .eval_api_error import EvalAPIError
+from ..responses.tool import Tool
 from ..shared.metadata import Metadata
 from ..shared.reasoning_effort import ReasoningEffort
 from ..responses.response_input_text import ResponseInputText
 from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from ..responses.response_format_text_config import ResponseFormatTextConfig
 from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
 
 __all__ = [
@@ -32,6 +34,7 @@
     "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText",
     "DataSourceResponsesInputMessagesItemReference",
     "DataSourceResponsesSamplingParams",
+    "DataSourceResponsesSamplingParamsText",
     "PerModelUsage",
     "PerTestingCriteriaResult",
     "ResultCounts",
@@ -185,6 +188,24 @@ class DataSourceResponsesInputMessagesItemReference(BaseModel):
 ]
 
 
+class DataSourceResponsesSamplingParamsText(BaseModel):
+    format: Optional[ResponseFormatTextConfig] = None
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+
 class DataSourceResponsesSamplingParams(BaseModel):
     max_completion_tokens: Optional[int] = None
     """The maximum number of tokens in the generated output."""
@@ -195,6 +216,33 @@ class DataSourceResponsesSamplingParams(BaseModel):
     temperature: Optional[float] = None
     """A higher temperature increases randomness in the outputs."""
 
+    text: Optional[DataSourceResponsesSamplingParamsText] = None
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    tools: Optional[List[Tool]] = None
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+
+    The two categories of tools you can provide the model are:
+
+    - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+      capabilities, like
+      [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+      [file search](https://platform.openai.com/docs/guides/tools-file-search).
+      Learn more about
+      [built-in tools](https://platform.openai.com/docs/guides/tools).
+    - **Function calls (custom tools)**: Functions that are defined by you, enabling
+      the model to call your own code. Learn more about
+      [function calling](https://platform.openai.com/docs/guides/function-calling).
+    """
+
     top_p: Optional[float] = None
     """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
 
diff --git a/src/openai/types/evals/run_create_params.py b/src/openai/types/evals/run_create_params.py
@@ -5,10 +5,12 @@
 from typing import Dict, List, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
+from ..responses.tool_param import ToolParam
 from ..shared_params.metadata import Metadata
 from ..shared.reasoning_effort import ReasoningEffort
 from ..responses.response_input_text_param import ResponseInputTextParam
 from .create_eval_jsonl_run_data_source_param import CreateEvalJSONLRunDataSourceParam
+from ..responses.response_format_text_config_param import ResponseFormatTextConfigParam
 from .create_eval_completions_run_data_source_param import CreateEvalCompletionsRunDataSourceParam
 
 __all__ = [
@@ -29,6 +31,7 @@
     "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContentOutputText",
     "DataSourceCreateEvalResponsesRunDataSourceInputMessagesItemReference",
     "DataSourceCreateEvalResponsesRunDataSourceSamplingParams",
+    "DataSourceCreateEvalResponsesRunDataSourceSamplingParamsText",
 ]
 
 
@@ -202,6 +205,24 @@ class DataSourceCreateEvalResponsesRunDataSourceInputMessagesItemReference(Typed
 ]
 
 
+class DataSourceCreateEvalResponsesRunDataSourceSamplingParamsText(TypedDict, total=False):
+    format: ResponseFormatTextConfigParam
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+
 class DataSourceCreateEvalResponsesRunDataSourceSamplingParams(TypedDict, total=False):
     max_completion_tokens: int
     """The maximum number of tokens in the generated output."""
@@ -212,6 +233,33 @@ class DataSourceCreateEvalResponsesRunDataSourceSamplingParams(TypedDict, total=
     temperature: float
     """A higher temperature increases randomness in the outputs."""
 
+    text: DataSourceCreateEvalResponsesRunDataSourceSamplingParamsText
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    tools: Iterable[ToolParam]
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+
+    The two categories of tools you can provide the model are:
+
+    - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+      capabilities, like
+      [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+      [file search](https://platform.openai.com/docs/guides/tools-file-search).
+      Learn more about
+      [built-in tools](https://platform.openai.com/docs/guides/tools).
+    - **Function calls (custom tools)**: Functions that are defined by you, enabling
+      the model to call your own code. Learn more about
+      [function calling](https://platform.openai.com/docs/guides/function-calling).
+    """
+
     top_p: float
     """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
 
diff --git a/src/openai/types/evals/run_create_response.py b/src/openai/types/evals/run_create_response.py
@@ -8,10 +8,12 @@
 from ..._utils import PropertyInfo
 from ..._models import BaseModel
 from .eval_api_error import EvalAPIError
+from ..responses.tool import Tool
 from ..shared.metadata import Metadata
 from ..shared.reasoning_effort import ReasoningEffort
 from ..responses.response_input_text import ResponseInputText
 from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from ..responses.response_format_text_config import ResponseFormatTextConfig
 from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
 
 __all__ = [
@@ -32,6 +34,7 @@
     "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText",
     "DataSourceResponsesInputMessagesItemReference",
     "DataSourceResponsesSamplingParams",
+    "DataSourceResponsesSamplingParamsText",
     "PerModelUsage",
     "PerTestingCriteriaResult",
     "ResultCounts",
@@ -185,6 +188,24 @@ class DataSourceResponsesInputMessagesItemReference(BaseModel):
 ]
 
 
+class DataSourceResponsesSamplingParamsText(BaseModel):
+    format: Optional[ResponseFormatTextConfig] = None
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+
 class DataSourceResponsesSamplingParams(BaseModel):
     max_completion_tokens: Optional[int] = None
     """The maximum number of tokens in the generated output."""
@@ -195,6 +216,33 @@ class DataSourceResponsesSamplingParams(BaseModel):
     temperature: Optional[float] = None
     """A higher temperature increases randomness in the outputs."""
 
+    text: Optional[DataSourceResponsesSamplingParamsText] = None
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    tools: Optional[List[Tool]] = None
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+
+    The two categories of tools you can provide the model are:
+
+    - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+      capabilities, like
+      [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+      [file search](https://platform.openai.com/docs/guides/tools-file-search).
+      Learn more about
+      [built-in tools](https://platform.openai.com/docs/guides/tools).
+    - **Function calls (custom tools)**: Functions that are defined by you, enabling
+      the model to call your own code. Learn more about
+      [function calling](https://platform.openai.com/docs/guides/function-calling).
+    """
+
     top_p: Optional[float] = None
     """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
 
diff --git a/src/openai/types/evals/run_list_response.py b/src/openai/types/evals/run_list_response.py
diff --git a/src/openai/types/evals/run_retrieve_response.py b/src/openai/types/evals/run_retrieve_response.py