From b927aad1bb5deed6183d5f946d16d3ec4ee93b9b Mon Sep 17 00:00:00 2001 From: Aman Jaiswal <66757799+amanjaiswal73892@users.noreply.github.com> Date: Wed, 25 Jun 2025 20:53:16 -0400 Subject: [PATCH 1/2] update openai cache tracking usage to support Chat Completion and Responses API --- src/agentlab/llm/tracking.py | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/src/agentlab/llm/tracking.py b/src/agentlab/llm/tracking.py index ad846a71..ab033b22 100644 --- a/src/agentlab/llm/tracking.py +++ b/src/agentlab/llm/tracking.py @@ -163,6 +163,10 @@ def __call__(self, *args, **kwargs): response = self._call_api(*args, **kwargs) usage = dict(getattr(response, "usage", {})) + if 'prompt_tokens_details' in usage: + usage['cached_tokens'] = usage['prompt_token_details'].cached_tokens + if 'input_tokens_details' in usage: + usage['cached_tokens'] = usage['input_tokens_details'].cached_tokens usage = {f"usage_{k}": v for k, v in usage.items() if isinstance(v, (int, float))} usage |= {"n_api_calls": 1} usage |= {"effective_cost": self.get_effective_cost(response)} @@ -298,21 +302,29 @@ def get_effective_cost_from_openai_api(self, response) -> float: Returns: float: The effective cost calculated from the response. """ - usage = getattr(response, "usage", {}) - prompt_token_details = getattr(response, "prompt_tokens_details", {}) - - total_input_tokens = getattr( - prompt_token_details, "prompt_tokens", 0 - ) # Cache read tokens + new input tokens - output_tokens = getattr(usage, "completion_tokens", 0) - cache_read_tokens = getattr(prompt_token_details, "cached_tokens", 0) - - non_cached_input_tokens = total_input_tokens - cache_read_tokens + usage = getattr(response, "usage", None) + if usage is None: + logging.warning("No usage information found in the response. Defaulting cost to 0.0.") + return 0.0 + api_type = 'chatcompletion' if hasattr(usage, "prompt_tokens_details") else 'response' + if api_type == 'chatcompletion': + total_input_tokens = usage.prompt_tokens + output_tokens = usage.completion_tokens + cached_input_tokens = usage.prompt_tokens_details.cached_tokens + non_cached_input_tokens = total_input_tokens - cached_input_tokens + elif api_type == 'response': + total_input_tokens = usage.input_tokens + output_tokens = usage.output_tokens + cached_input_tokens = usage.input_tokens_details.cached_tokens + non_cached_input_tokens = total_input_tokens - cached_input_tokens + else: + logging.warning(f"Unsupported API type: {api_type}. Defaulting cost to 0.0.") + return 0.0 + cache_read_cost = self.input_cost * OPENAI_CACHE_PRICING_FACTOR["cache_read_tokens"] - effective_cost = ( self.input_cost * non_cached_input_tokens - + cache_read_tokens * cache_read_cost + + cached_input_tokens * cache_read_cost + self.output_cost * output_tokens ) return effective_cost From 36b092ad4b958b21fa3aeb211d36d93a2b05106a Mon Sep 17 00:00:00 2001 From: Aman Jaiswal <66757799+amanjaiswal73892@users.noreply.github.com> Date: Mon, 7 Jul 2025 13:59:39 -0400 Subject: [PATCH 2/2] black formatting --- src/agentlab/llm/tracking.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/agentlab/llm/tracking.py b/src/agentlab/llm/tracking.py index ab033b22..23e48930 100644 --- a/src/agentlab/llm/tracking.py +++ b/src/agentlab/llm/tracking.py @@ -163,10 +163,10 @@ def __call__(self, *args, **kwargs): response = self._call_api(*args, **kwargs) usage = dict(getattr(response, "usage", {})) - if 'prompt_tokens_details' in usage: - usage['cached_tokens'] = usage['prompt_token_details'].cached_tokens - if 'input_tokens_details' in usage: - usage['cached_tokens'] = usage['input_tokens_details'].cached_tokens + if "prompt_tokens_details" in usage: + usage["cached_tokens"] = usage["prompt_token_details"].cached_tokens + if "input_tokens_details" in usage: + usage["cached_tokens"] = usage["input_tokens_details"].cached_tokens usage = {f"usage_{k}": v for k, v in usage.items() if isinstance(v, (int, float))} usage |= {"n_api_calls": 1} usage |= {"effective_cost": self.get_effective_cost(response)} @@ -306,13 +306,13 @@ def get_effective_cost_from_openai_api(self, response) -> float: if usage is None: logging.warning("No usage information found in the response. Defaulting cost to 0.0.") return 0.0 - api_type = 'chatcompletion' if hasattr(usage, "prompt_tokens_details") else 'response' - if api_type == 'chatcompletion': + api_type = "chatcompletion" if hasattr(usage, "prompt_tokens_details") else "response" + if api_type == "chatcompletion": total_input_tokens = usage.prompt_tokens output_tokens = usage.completion_tokens cached_input_tokens = usage.prompt_tokens_details.cached_tokens non_cached_input_tokens = total_input_tokens - cached_input_tokens - elif api_type == 'response': + elif api_type == "response": total_input_tokens = usage.input_tokens output_tokens = usage.output_tokens cached_input_tokens = usage.input_tokens_details.cached_tokens @@ -320,7 +320,7 @@ def get_effective_cost_from_openai_api(self, response) -> float: else: logging.warning(f"Unsupported API type: {api_type}. Defaulting cost to 0.0.") return 0.0 - + cache_read_cost = self.input_cost * OPENAI_CACHE_PRICING_FACTOR["cache_read_tokens"] effective_cost = ( self.input_cost * non_cached_input_tokens