Don't cache agent tools during a run

rm-openai · rm-openai · commit b63a1575a38f · 2025-06-02T14:10:15.000-04:00
### Summary: Towards #767. We were caching the list of tools for an agent, so if you did `agent.tools.append(...)` from a tool call, the next call to the model wouldn't include the new tool. THis is a bug. ### Test Plan: Unit tests. Note that now MCP tools are listed each time the agent runs (users can still cache the `list_tools` however).
diff --git a/src/agents/run.py b/src/agents/run.py
@@ -1,4 +1,3 @@
-
 from __future__ import annotations
 
 import asyncio
@@ -182,6 +181,8 @@ async def run(
 
             try:
                 while True:
+                    all_tools = await cls._get_all_tools(current_agent)
+
                     # Start an agent span if we don't have one. This span is ended if the current
                     # agent changes, or if the agent loop ends.
                     if current_span is None:
@@ -197,8 +198,6 @@ async def run(
                             output_type=output_type_name,
                         )
                         current_span.start(mark_as_current=True)
-
-                        all_tools = await cls._get_all_tools(current_agent)
                         current_span.span_data.tools = [t.name for t in all_tools]
 
                     current_turn += 1
@@ -210,9 +209,7 @@ async def run(
                                 data={"max_turns": max_turns},
                             ),
                         )
-                        raise MaxTurnsExceeded(
-                            f"Max turns ({max_turns}) exceeded"
-                        )
+                        raise MaxTurnsExceeded(f"Max turns ({max_turns}) exceeded")
 
                     logger.debug(
                         f"Running agent {current_agent.name} (turn {current_turn})",
@@ -295,7 +292,7 @@ async def run(
                     last_agent=current_agent,
                     context_wrapper=context_wrapper,
                     input_guardrail_results=input_guardrail_results,
-                    output_guardrail_results=[]
+                    output_guardrail_results=[],
                 )
                 raise
             finally:
@@ -528,6 +525,8 @@ async def _run_streamed_impl(
                 if streamed_result.is_complete:
                     break
 
+                all_tools = await cls._get_all_tools(current_agent)
+
                 # Start an agent span if we don't have one. This span is ended if the current
                 # agent changes, or if the agent loop ends.
                 if current_span is None:
@@ -543,8 +542,6 @@ async def _run_streamed_impl(
                         output_type=output_type_name,
                     )
                     current_span.start(mark_as_current=True)
-
-                    all_tools = await cls._get_all_tools(current_agent)
                     tool_names = [t.name for t in all_tools]
                     current_span.span_data.tools = tool_names
                 current_turn += 1
diff --git a/tests/mcp/test_mcp_tracing.py b/tests/mcp/test_mcp_tracing.py
@@ -44,6 +44,10 @@ async def test_mcp_tracing():
             {
                 "workflow_name": "Agent workflow",
                 "children": [
+                    {
+                        "type": "mcp_tools",
+                        "data": {"server": "fake_mcp_server", "result": ["test_tool_1"]},
+                    },
                     {
                         "type": "agent",
                         "data": {
@@ -53,21 +57,21 @@ async def test_mcp_tracing():
                             "output_type": "str",
                         },
                         "children": [
-                            {
-                                "type": "mcp_tools",
-                                "data": {"server": "fake_mcp_server", "result": ["test_tool_1"]},
-                            },
                             {
                                 "type": "function",
                                 "data": {
                                     "name": "test_tool_1",
                                     "input": "",
-                                    "output": '{"type":"text","text":"result_test_tool_1_{}","annotations":null}',  # noqa: E501
+                                    "output": '{"type":"text","text":"result_test_tool_1_{}","annotations":null}',
                                     "mcp_data": {"server": "fake_mcp_server"},
                                 },
                             },
+                            {
+                                "type": "mcp_tools",
+                                "data": {"server": "fake_mcp_server", "result": ["test_tool_1"]},
+                            },
                         ],
-                    }
+                    },
                 ],
             }
         ]
@@ -100,6 +104,13 @@ async def test_mcp_tracing():
             {
                 "workflow_name": "Agent workflow",
                 "children": [
+                    {
+                        "type": "mcp_tools",
+                        "data": {
+                            "server": "fake_mcp_server",
+                            "result": ["test_tool_1", "test_tool_2"],
+                        },
+                    },
                     {
                         "type": "agent",
                         "data": {
@@ -109,13 +120,6 @@ async def test_mcp_tracing():
                             "output_type": "str",
                         },
                         "children": [
-                            {
-                                "type": "mcp_tools",
-                                "data": {
-                                    "server": "fake_mcp_server",
-                                    "result": ["test_tool_1", "test_tool_2"],
-                                },
-                            },
                             {
                                 "type": "function",
                                 "data": {
@@ -129,12 +133,19 @@ async def test_mcp_tracing():
                                 "data": {
                                     "name": "test_tool_2",
                                     "input": "",
-                                    "output": '{"type":"text","text":"result_test_tool_2_{}","annotations":null}',  # noqa: E501
+                                    "output": '{"type":"text","text":"result_test_tool_2_{}","annotations":null}',
                                     "mcp_data": {"server": "fake_mcp_server"},
                                 },
                             },
+                            {
+                                "type": "mcp_tools",
+                                "data": {
+                                    "server": "fake_mcp_server",
+                                    "result": ["test_tool_1", "test_tool_2"],
+                                },
+                            },
                         ],
-                    }
+                    },
                 ],
             }
         ]
@@ -165,6 +176,13 @@ async def test_mcp_tracing():
             {
                 "workflow_name": "Agent workflow",
                 "children": [
+                    {
+                        "type": "mcp_tools",
+                        "data": {
+                            "server": "fake_mcp_server",
+                            "result": ["test_tool_1", "test_tool_2", "test_tool_3"],
+                        },
+                    },
                     {
                         "type": "agent",
                         "data": {
@@ -174,24 +192,24 @@ async def test_mcp_tracing():
                             "output_type": "str",
                         },
                         "children": [
-                            {
-                                "type": "mcp_tools",
-                                "data": {
-                                    "server": "fake_mcp_server",
-                                    "result": ["test_tool_1", "test_tool_2", "test_tool_3"],
-                                },
-                            },
                             {
                                 "type": "function",
                                 "data": {
                                     "name": "test_tool_3",
                                     "input": "",
-                                    "output": '{"type":"text","text":"result_test_tool_3_{}","annotations":null}',  # noqa: E501
+                                    "output": '{"type":"text","text":"result_test_tool_3_{}","annotations":null}',
                                     "mcp_data": {"server": "fake_mcp_server"},
                                 },
                             },
+                            {
+                                "type": "mcp_tools",
+                                "data": {
+                                    "server": "fake_mcp_server",
+                                    "result": ["test_tool_1", "test_tool_2", "test_tool_3"],
+                                },
+                            },
                         ],
-                    }
+                    },
                 ],
             }
         ]
diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py
@@ -745,3 +745,38 @@ async def test_previous_response_id_passed_between_runs_streamed_multi_turn():
         pass
 
     assert model.last_turn_args.get("previous_response_id") == "resp-stream-test"
+
+
+@pytest.mark.asyncio
+async def test_dynamic_tool_addition_run() -> None:
+    """Test that tools can be added to an agent during a run."""
+    model = FakeModel()
+
+    executed: dict[str, bool] = {"called": False}
+
+    agent = Agent(name="test", model=model, tool_use_behavior="run_llm_again")
+
+    @function_tool(name_override="tool2")
+    def tool2() -> str:
+        executed["called"] = True
+        return "result2"
+
+    @function_tool(name_override="add_tool")
+    async def add_tool() -> str:
+        agent.tools.append(tool2)
+        return "added"
+
+    agent.tools.append(add_tool)
+
+    model.add_multiple_turn_outputs(
+        [
+            [get_function_tool_call("add_tool", json.dumps({}))],
+            [get_function_tool_call("tool2", json.dumps({}))],
+            [get_text_message("done")],
+        ]
+    )
+
+    result = await Runner.run(agent, input="start")
+
+    assert executed["called"] is True
+    assert result.final_output == "done"
diff --git a/tests/test_agent_runner_streamed.py b/tests/test_agent_runner_streamed.py
@@ -18,6 +18,7 @@
     RunContextWrapper,
     Runner,
     UserError,
+    function_tool,
     handoff,
 )
 from agents.items import RunItem
@@ -684,3 +685,39 @@ async def test_streaming_events():
     assert len(agent_data) == 2, "should have 2 agent updated events"
     assert agent_data[0].new_agent == agent_2, "should have started with agent_2"
     assert agent_data[1].new_agent == agent_1, "should have handed off to agent_1"
+
+
+@pytest.mark.asyncio
+async def test_dynamic_tool_addition_run_streamed() -> None:
+    model = FakeModel()
+
+    executed: dict[str, bool] = {"called": False}
+
+    agent = Agent(name="test", model=model, tool_use_behavior="run_llm_again")
+
+    @function_tool(name_override="tool2")
+    def tool2() -> str:
+        executed["called"] = True
+        return "result2"
+
+    @function_tool(name_override="add_tool")
+    async def add_tool() -> str:
+        agent.tools.append(tool2)
+        return "added"
+
+    agent.tools.append(add_tool)
+
+    model.add_multiple_turn_outputs(
+        [
+            [get_function_tool_call("add_tool", json.dumps({}))],
+            [get_function_tool_call("tool2", json.dumps({}))],
+            [get_text_message("done")],
+        ]
+    )
+
+    result = Runner.run_streamed(agent, input="start")
+    async for _ in result.stream_events():
+        pass
+
+    assert executed["called"] is True
+    assert result.final_output == "done"