more robust usage tracker in async (#8329)

chenmoneygithub · web-flow · commit a4043be0bc43 · 2025-06-04T12:16:32.000-07:00
diff --git a/dspy/primitives/program.py b/dspy/primitives/program.py
@@ -2,7 +2,7 @@
 
 import magicattr
 
-from dspy.dsp.utils.settings import settings
+from dspy.dsp.utils.settings import settings, thread_local_overrides
 from dspy.predict.parallel import Parallel
 from dspy.primitives.module import BaseModule
 from dspy.utils.callback import with_callbacks
@@ -51,7 +51,7 @@ def __call__(self, *args, **kwargs):
         caller_modules.append(self)
 
         with settings.context(caller_modules=caller_modules):
-            if settings.track_usage and settings.usage_tracker is None:
+            if settings.track_usage and thread_local_overrides.get().get("usage_tracker") is None:
                 with track_usage() as usage_tracker:
                     output = self.forward(*args, **kwargs)
                 output.set_lm_usage(usage_tracker.get_total_tokens())
@@ -66,7 +66,7 @@ async def acall(self, *args, **kwargs):
         caller_modules.append(self)
 
         with settings.context(caller_modules=caller_modules):
-            if settings.track_usage and settings.usage_tracker is None:
+            if settings.track_usage and thread_local_overrides.get().get("usage_tracker") is None:
                 with track_usage() as usage_tracker:
                     output = await self.aforward(*args, **kwargs)
                     output.set_lm_usage(usage_tracker.get_total_tokens())
diff --git a/tests/primitives/test_module.py b/tests/primitives/test_module.py
@@ -5,6 +5,8 @@
 
 import pytest
 from litellm import Choices, Message, ModelResponse
+from litellm.types.utils import Usage
+import asyncio
 
 import dspy
 from dspy.utils.dummies import DummyLM
@@ -307,6 +309,54 @@ def __call__(self, question: str) -> str:
     assert results[1].get_lm_usage().keys() == set(["openai/gpt-3.5-turbo"])
 
 
+@pytest.mark.asyncio
+async def test_usage_tracker_async_parallel():
+    program = dspy.Predict("question -> answer")
+
+    with patch("litellm.acompletion") as mock_completion:
+        mock_completion.return_value = ModelResponse(
+            choices=[Choices(message=Message(content="{'answer': 'Paris'}"))],
+            usage=Usage(
+                **{
+                    "prompt_tokens": 1117,
+                    "completion_tokens": 46,
+                    "total_tokens": 1163,
+                    "prompt_tokens_details": {"cached_tokens": 0, "audio_tokens": 0},
+                    "completion_tokens_details": {
+                        "reasoning_tokens": 0,
+                        "audio_tokens": 0,
+                        "accepted_prediction_tokens": 0,
+                        "rejected_prediction_tokens": 0,
+                    },
+                },
+            ),
+            model="openai/gpt-4o-mini",
+        )
+
+        coroutines = [
+            program.acall(question="What is the capital of France?"),
+            program.acall(question="What is the capital of France?"),
+            program.acall(question="What is the capital of France?"),
+            program.acall(question="What is the capital of France?"),
+        ]
+        with dspy.settings.context(
+            lm=dspy.LM("openai/gpt-4o-mini", cache=False), track_usage=True, adapter=dspy.JSONAdapter()
+        ):
+            results = await asyncio.gather(*coroutines)
+
+        assert results[0].get_lm_usage() is not None
+        assert results[1].get_lm_usage() is not None
+
+        lm_usage0 = results[0].get_lm_usage()["openai/gpt-4o-mini"]
+        lm_usage1 = results[1].get_lm_usage()["openai/gpt-4o-mini"]
+        assert lm_usage0["prompt_tokens"] == 1117
+        assert lm_usage1["prompt_tokens"] == 1117
+        assert lm_usage0["completion_tokens"] == 46
+        assert lm_usage1["completion_tokens"] == 46
+        assert lm_usage0["total_tokens"] == 1163
+        assert lm_usage1["total_tokens"] == 1163
+
+
 def test_module_history():
     class MyProgram(dspy.Module):
         def __init__(self, **kwargs):