fix(openai): Token reporting

alexander-alderman-webb · alexander-alderman-webb · commit 64546a998e1d · 2026-02-02T10:41:04.000+01:00
diff --git a/sentry_sdk/ai/_openai_completions_api.py b/sentry_sdk/ai/_openai_completions_api.py
@@ -4,10 +4,12 @@
 
 if TYPE_CHECKING:
     from sentry_sdk._types import TextPart
+    from typing import Union
 
     from openai.types.chat import (
         ChatCompletionMessageParam,
         ChatCompletionSystemMessageParam,
+        ChatCompletionContentPartParam,
     )
 
 
@@ -24,6 +26,25 @@ def _get_system_instructions(
     return [message for message in messages if _is_system_instruction(message)]
 
 
+def _get_text_items(
+    content: "Union[str, Iterable[ChatCompletionContentPartParam]]",
+) -> "list[str]":
+    if isinstance(content, str):
+        return [content]
+
+    if not isinstance(content, Iterable):
+        return []
+
+    text_items = []
+    for part in content:
+        if isinstance(part, dict) and part.get("type") == "text":
+            text = part.get("text", None)
+            if text is not None:
+                text_items.append(text)
+
+    return text_items
+
+
 def _transform_system_instructions(
     system_instructions: "list[ChatCompletionSystemMessageParam]",
 ) -> "list[TextPart]":
@@ -34,15 +55,12 @@ def _transform_system_instructions(
             continue
 
         content = instruction.get("content")
+        if content is None:
+            continue
 
-        if isinstance(content, str):
-            instruction_text_parts.append({"type": "text", "content": content})
-
-        elif isinstance(content, list):
-            for part in content:
-                if isinstance(part, dict) and part.get("type") == "text":
-                    text = part.get("text", None)
-                    if text is not None:
-                        instruction_text_parts.append({"type": "text", "content": text})
+        text_parts: "list[TextPart]" = [
+            {"type": "text", "content": content} for content in _get_text_items(content)
+        ]
+        instruction_text_parts += text_parts
 
     return instruction_text_parts
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
@@ -17,6 +17,7 @@
     _is_system_instruction as _is_system_instruction_completions,
     _get_system_instructions as _get_system_instructions_completions,
     _transform_system_instructions,
+    _get_text_items,
 )
 from sentry_sdk.ai._openai_responses_api import (
     _is_system_instruction as _is_system_instruction_responses,
@@ -181,10 +182,17 @@ def _calculate_token_usage(
     # Manually count tokens
     if input_tokens == 0:
         for message in messages or []:
-            if isinstance(message, dict) and "content" in message:
-                input_tokens += count_tokens(message["content"])
-            elif isinstance(message, str):
+            if isinstance(message, str):
                 input_tokens += count_tokens(message)
+                continue
+            elif isinstance(message, dict):
+                message_content = message.get("content")
+                if message_content is None:
+                    continue
+                # Deliberate use of Completions function for both Completions and Responses input format.
+                text_items = _get_text_items(message_content)
+                input_tokens += sum(count_tokens(text) for text in text_items)
+                continue
 
     if output_tokens == 0:
         if streaming_message_responses is not None:
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
@@ -621,8 +621,8 @@ def test_streaming_chat_completion(sentry_init, capture_events, messages, reques
             assert span["data"]["gen_ai.usage.total_tokens"] == 9
         else:
             assert span["data"]["gen_ai.usage.output_tokens"] == 2
-            assert span["data"]["gen_ai.usage.input_tokens"] == 1
-            assert span["data"]["gen_ai.usage.total_tokens"] == 3
+            assert span["data"]["gen_ai.usage.input_tokens"] == 12
+            assert span["data"]["gen_ai.usage.total_tokens"] == 14
     except ImportError:
         pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
 
@@ -865,8 +865,8 @@ async def test_streaming_chat_completion_async(
             assert span["data"]["gen_ai.usage.total_tokens"] == 9
         else:
             assert span["data"]["gen_ai.usage.output_tokens"] == 2
-            assert span["data"]["gen_ai.usage.input_tokens"] == 1
-            assert span["data"]["gen_ai.usage.total_tokens"] == 3
+            assert span["data"]["gen_ai.usage.input_tokens"] == 12
+            assert span["data"]["gen_ai.usage.total_tokens"] == 14
 
     except ImportError:
         pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly