diff --git a/docs/usage.md b/docs/usage.md index db15c83ab..6f4d21305 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -55,7 +55,7 @@ The SDK automatically tracks usage for each API request in `request_usage_entrie result = await Runner.run(agent, "What's the weather in Tokyo?") for i, request in enumerate(result.context_wrapper.usage.request_usage_entries): - print(f"Request {i + 1}: {request.input_tokens} in, {request.output_tokens} out") + print(f"Request {i + 1}: Input={request.input_tokens} tokens, Output={request.output_tokens} tokens, Model={request.model_name}, Agent={request.agent_name}, ResponseID={request.response_id}") ``` ## Accessing usage with sessions diff --git a/src/agents/models/interface.py b/src/agents/models/interface.py index f25934780..4df4d0a97 100644 --- a/src/agents/models/interface.py +++ b/src/agents/models/interface.py @@ -36,6 +36,9 @@ def include_data(self) -> bool: class Model(abc.ABC): """The base interface for calling an LLM.""" + # The model name. Subclasses can set this in __init__. + model: str = "" + @abc.abstractmethod async def get_response( self, diff --git a/src/agents/run.py b/src/agents/run.py index 5b5e6fdfa..ac817c4d3 100644 --- a/src/agents/run.py +++ b/src/agents/run.py @@ -1473,7 +1473,12 @@ async def _run_single_turn_streamed( usage=usage, response_id=event.response.id, ) - context_wrapper.usage.add(usage) + context_wrapper.usage.add( + usage, + model_name=model.model, + agent_name=agent.name, + response_id=event.response.id, + ) if isinstance(event, ResponseOutputItemDoneEvent): output_item = event.item @@ -1896,7 +1901,12 @@ async def _get_new_response( prompt=prompt_config, ) - context_wrapper.usage.add(new_response.usage) + context_wrapper.usage.add( + new_response.usage, + model_name=model.model, + agent_name=agent.name, + response_id=new_response.response_id, + ) # If we have run hooks, or if the agent has hooks, we need to call them after the LLM call await asyncio.gather( diff --git a/src/agents/usage.py b/src/agents/usage.py index 216981e91..af011736f 100644 --- a/src/agents/usage.py +++ b/src/agents/usage.py @@ -50,6 +50,15 @@ class RequestUsage: output_tokens_details: OutputTokensDetails """Details about the output tokens for this individual request.""" + model_name: str = "" + """The model name used for this request.""" + + agent_name: str = "" + """The agent name that made this request.""" + + response_id: str | None = None + """The response ID from the API for this request.""" + @dataclass class Usage: @@ -106,13 +115,22 @@ def __post_init__(self) -> None: if output_details_none or output_reasoning_none: self.output_tokens_details = OutputTokensDetails(reasoning_tokens=0) - def add(self, other: Usage) -> None: + def add( + self, + other: Usage, + model_name: str = "", + agent_name: str = "", + response_id: str | None = None, + ) -> None: """Add another Usage object to this one, aggregating all fields. This method automatically preserves request_usage_entries. Args: other: The Usage object to add to this one. + model_name: The model name used for this request. + agent_name: The agent name that made this request. + response_id: The response ID from the API for this request. """ self.requests += other.requests if other.requests else 0 self.input_tokens += other.input_tokens if other.input_tokens else 0 @@ -158,6 +176,9 @@ def add(self, other: Usage) -> None: total_tokens=other.total_tokens, input_tokens_details=input_details, output_tokens_details=output_details, + model_name=model_name, + agent_name=agent_name, + response_id=response_id, ) self.request_usage_entries.append(request_usage) elif other.request_usage_entries: diff --git a/tests/test_usage.py b/tests/test_usage.py index fbe26c98d..25fde3d5a 100644 --- a/tests/test_usage.py +++ b/tests/test_usage.py @@ -22,7 +22,12 @@ def test_usage_add_aggregates_all_fields(): total_tokens=15, ) - u1.add(u2) + u1.add( + u2, + model_name="gpt-5", + agent_name="test-agent", + response_id="resp-1", + ) assert u1.requests == 3 assert u1.input_tokens == 17 @@ -43,7 +48,12 @@ def test_usage_add_aggregates_with_none_values(): total_tokens=15, ) - u1.add(u2) + u1.add( + u2, + model_name="gpt-5", + agent_name="test-agent", + response_id="resp-1", + ) assert u1.requests == 2 assert u1.input_tokens == 7 @@ -61,6 +71,9 @@ def test_request_usage_creation(): total_tokens=300, input_tokens_details=InputTokensDetails(cached_tokens=10), output_tokens_details=OutputTokensDetails(reasoning_tokens=20), + model_name="gpt-5", + agent_name="test-agent", + response_id="resp-123", ) assert request_usage.input_tokens == 100 @@ -68,6 +81,9 @@ def test_request_usage_creation(): assert request_usage.total_tokens == 300 assert request_usage.input_tokens_details.cached_tokens == 10 assert request_usage.output_tokens_details.reasoning_tokens == 20 + assert request_usage.model_name == "gpt-5" + assert request_usage.agent_name == "test-agent" + assert request_usage.response_id == "resp-123" def test_usage_add_preserves_single_request(): @@ -82,7 +98,12 @@ def test_usage_add_preserves_single_request(): total_tokens=300, ) - u1.add(u2) + u1.add( + u2, + model_name="gpt-5", + agent_name="test-agent", + response_id="resp-1", + ) # Should preserve the request usage details assert len(u1.request_usage_entries) == 1 @@ -92,6 +113,9 @@ def test_usage_add_preserves_single_request(): assert request_usage.total_tokens == 300 assert request_usage.input_tokens_details.cached_tokens == 10 assert request_usage.output_tokens_details.reasoning_tokens == 20 + assert request_usage.model_name == "gpt-5" + assert request_usage.agent_name == "test-agent" + assert request_usage.response_id == "resp-1" def test_usage_add_ignores_zero_token_requests(): @@ -106,7 +130,12 @@ def test_usage_add_ignores_zero_token_requests(): total_tokens=0, ) - u1.add(u2) + u1.add( + u2, + model_name="gpt-5", + agent_name="test-agent", + response_id="resp-1", + ) # Should not create a request_usage_entry for zero tokens assert len(u1.request_usage_entries) == 0 @@ -124,7 +153,12 @@ def test_usage_add_ignores_multi_request_usage(): total_tokens=300, ) - u1.add(u2) + u1.add( + u2, + model_name="gpt-5", + agent_name="test-agent", + response_id="resp-1", + ) # Should not create a request usage entry for multi-request usage assert len(u1.request_usage_entries) == 0 @@ -142,7 +176,12 @@ def test_usage_add_merges_existing_request_usage_entries(): output_tokens_details=OutputTokensDetails(reasoning_tokens=20), total_tokens=300, ) - u1.add(u2) + u1.add( + u2, + model_name="gpt-5", + agent_name="agent-1", + response_id="resp-1", + ) # Create second usage with request_usage_entries u3 = Usage( @@ -154,7 +193,12 @@ def test_usage_add_merges_existing_request_usage_entries(): total_tokens=125, ) - u1.add(u3) + u1.add( + u3, + model_name="gpt-5", + agent_name="agent-2", + response_id="resp-2", + ) # Should have both request_usage_entries assert len(u1.request_usage_entries) == 2 @@ -164,12 +208,16 @@ def test_usage_add_merges_existing_request_usage_entries(): assert first.input_tokens == 100 assert first.output_tokens == 200 assert first.total_tokens == 300 + assert first.agent_name == "agent-1" + assert first.response_id == "resp-1" # Second request second = u1.request_usage_entries[1] assert second.input_tokens == 50 assert second.output_tokens == 75 assert second.total_tokens == 125 + assert second.agent_name == "agent-2" + assert second.response_id == "resp-2" def test_usage_add_with_pre_existing_request_usage_entries(): @@ -185,7 +233,12 @@ def test_usage_add_with_pre_existing_request_usage_entries(): output_tokens_details=OutputTokensDetails(reasoning_tokens=20), total_tokens=300, ) - u1.add(u2) + u1.add( + u2, + model_name="gpt-5", + agent_name="agent-1", + response_id="resp-1", + ) # Create another usage with request_usage_entries u3 = Usage( @@ -198,7 +251,12 @@ def test_usage_add_with_pre_existing_request_usage_entries(): ) # Add u3 to u1 - u1.add(u3) + u1.add( + u3, + model_name="gpt-5", + agent_name="agent-2", + response_id="resp-2", + ) # Should have both request_usage_entries assert len(u1.request_usage_entries) == 2 @@ -228,7 +286,12 @@ def test_anthropic_cost_calculation_scenario(): output_tokens_details=OutputTokensDetails(reasoning_tokens=0), total_tokens=150_000, ) - usage.add(req1) + usage.add( + req1, + model_name="gpt-5", + agent_name="test-agent", + response_id="resp-1", + ) # Second request: 150K input tokens req2 = Usage( @@ -239,7 +302,12 @@ def test_anthropic_cost_calculation_scenario(): output_tokens_details=OutputTokensDetails(reasoning_tokens=0), total_tokens=225_000, ) - usage.add(req2) + usage.add( + req2, + model_name="gpt-5", + agent_name="test-agent", + response_id="resp-2", + ) # Third request: 80K input tokens req3 = Usage( @@ -250,7 +318,12 @@ def test_anthropic_cost_calculation_scenario(): output_tokens_details=OutputTokensDetails(reasoning_tokens=0), total_tokens=120_000, ) - usage.add(req3) + usage.add( + req3, + model_name="gpt-5", + agent_name="test-agent", + response_id="resp-3", + ) # Verify aggregated totals assert usage.requests == 3