From 5dcbfbd3c09c075890b43d10d37db0715e6f315b Mon Sep 17 00:00:00 2001
From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com>
Date: Tue, 3 Dec 2024 13:57:23 +0100
Subject: [PATCH 1/9] update fern

---
 .../api/resources/ingestion/types/create_generation_body.py | 6 ++++++
 .../api/resources/ingestion/types/update_generation_body.py | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/langfuse/api/resources/ingestion/types/create_generation_body.py b/langfuse/api/resources/ingestion/types/create_generation_body.py
index 65905e78e..6bf5c3b28 100644
--- a/langfuse/api/resources/ingestion/types/create_generation_body.py
+++ b/langfuse/api/resources/ingestion/types/create_generation_body.py
@@ -19,6 +19,12 @@ class CreateGenerationBody(CreateSpanBody):
         alias="modelParameters", default=None
     )
     usage: typing.Optional[IngestionUsage] = None
+    usage_details: typing.Optional[typing.Dict[str, int]] = pydantic_v1.Field(
+        alias="usageDetails", default=None
+    )
+    cost_details: typing.Optional[typing.Dict[str, float]] = pydantic_v1.Field(
+        alias="costDetails", default=None
+    )
     prompt_name: typing.Optional[str] = pydantic_v1.Field(
         alias="promptName", default=None
     )
diff --git a/langfuse/api/resources/ingestion/types/update_generation_body.py b/langfuse/api/resources/ingestion/types/update_generation_body.py
index e216e4604..4c85f748c 100644
--- a/langfuse/api/resources/ingestion/types/update_generation_body.py
+++ b/langfuse/api/resources/ingestion/types/update_generation_body.py
@@ -22,6 +22,12 @@ class UpdateGenerationBody(UpdateSpanBody):
     prompt_name: typing.Optional[str] = pydantic_v1.Field(
         alias="promptName", default=None
     )
+    usage_details: typing.Optional[typing.Dict[str, int]] = pydantic_v1.Field(
+        alias="usageDetails", default=None
+    )
+    cost_details: typing.Optional[typing.Dict[str, float]] = pydantic_v1.Field(
+        alias="costDetails", default=None
+    )
     prompt_version: typing.Optional[int] = pydantic_v1.Field(
         alias="promptVersion", default=None
     )

From a26c2e1e43c726d512fb92f0a5127f2ef4db77bd Mon Sep 17 00:00:00 2001
From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com>
Date: Tue, 3 Dec 2024 16:14:54 +0100
Subject: [PATCH 2/9] add token detail extraction

---
 langfuse/client.py                  |  42 +++++++--
 langfuse/utils/__init__.py          |  77 ++++++++++++++++-
 tests/test_extract_usage_details.py | 129 ++++++++++++++++++++++++++++
 3 files changed, 242 insertions(+), 6 deletions(-)
 create mode 100644 tests/test_extract_usage_details.py

diff --git a/langfuse/client.py b/langfuse/client.py
index 6137c2c02..75c4765f8 100644
--- a/langfuse/client.py
+++ b/langfuse/client.py
@@ -76,7 +76,12 @@
 from langfuse.model import Dataset, MapValue, Observation, TraceWithFullDetails
 from langfuse.request import LangfuseClient
 from langfuse.types import MaskFunction, ScoreDataType, SpanLevel
-from langfuse.utils import _convert_usage_input, _create_prompt_context, _get_timestamp
+from langfuse.utils import (
+    _convert_usage_input,
+    _create_prompt_context,
+    _get_timestamp,
+    _extract_usage_details,
+)
 
 from .version import __version__ as version
 
@@ -1738,6 +1743,8 @@ def generation(
         input: typing.Optional[typing.Any] = None,
         output: typing.Optional[typing.Any] = None,
         usage: typing.Optional[typing.Union[pydantic.BaseModel, ModelUsage]] = None,
+        usage_details: typing.Optional[typing.Dict[str, int]] = None,
+        cost_details: typing.Optional[typing.Dict[str, float]] = None,
         prompt: typing.Optional[PromptClient] = None,
         **kwargs,
     ) -> "StatefulGenerationClient":
@@ -1765,7 +1772,9 @@ def generation(
             model_parameters (Optional[dict]): The parameters of the model used for the generation; can be any key-value pairs.
             input (Optional[dict]): The prompt used for the generation. Can be any string or JSON object.
             output (Optional[dict]): The completion generated by the model. Can be any string or JSON object.
-            usage (Optional[dict]): The usage object supports the OpenAi structure with {`promptTokens`, `completionTokens`, `totalTokens`} and a more generic version {`input`, `output`, `total`, `unit`, `inputCost`, `outputCost`, `totalCost`} where unit can be of value `"TOKENS"`, `"CHARACTERS"`, `"MILLISECONDS"`, `"SECONDS"`, or `"IMAGES"`. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse.
+            usage (Optional[dict]): [DEPRECATED, use usage_details and cost_details instead] The usage object supports the OpenAi structure with {`promptTokens`, `completionTokens`, `totalTokens`} and a more generic version {`input`, `output`, `total`, `unit`, `inputCost`, `outputCost`, `totalCost`} where unit can be of value `"TOKENS"`, `"CHARACTERS"`, `"MILLISECONDS"`, `"SECONDS"`, or `"IMAGES"`. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse.
+            usage_details (Optional[dict]): The usage details of the generation. Also accepts OpenAI usage details. Keys are the usage type (e.g. "input", "input_cached", "output") and values are integers representing the number of units used. For accurate cost calculations in Langfuse, ensure each usage type has a corresponding price configured in the Langfuse models table. Example: {"input": 500, "output": 150}.
+            cost_details (Optional[dict]): The cost details of the generation. Keys are the usage type (e.g. "input", "input_cached", "output") and values are floats representing the cost in USD. Example: {"input": 0.0015, "output": 0.002}.
             prompt (Optional[PromptClient]): The Langfuse prompt object used for the generation.
             **kwargs: Additional keyword arguments to include in the generation.
 
@@ -1811,6 +1820,9 @@ def generation(
                 "model": model,
                 "model_parameters": model_parameters,
                 "usage": _convert_usage_input(usage) if usage is not None else None,
+                "usage_details": usage_details
+                and _extract_usage_details(usage_details),
+                "cost_details": cost_details,
                 "trace": {"release": self.release},
                 **_create_prompt_context(prompt),
                 **kwargs,
@@ -2000,6 +2012,8 @@ def generation(
         input: typing.Optional[typing.Any] = None,
         output: typing.Optional[typing.Any] = None,
         usage: typing.Optional[typing.Union[pydantic.BaseModel, ModelUsage]] = None,
+        usage_details: typing.Optional[typing.Dict[str, int]] = None,
+        cost_details: typing.Optional[typing.Dict[str, float]] = None,
         prompt: typing.Optional[PromptClient] = None,
         **kwargs,
     ) -> "StatefulGenerationClient":
@@ -2021,7 +2035,9 @@ def generation(
             model_parameters (Optional[dict]): The parameters of the model used for the generation; can be any key-value pairs.
             input (Optional[dict]): The prompt used for the generation. Can be any string or JSON object.
             output (Optional[dict]): The completion generated by the model. Can be any string or JSON object.
-            usage (Optional[dict]): The usage object supports the OpenAi structure with {`promptTokens`, `completionTokens`, `totalTokens`} and a more generic version {`input`, `output`, `total`, `unit`, `inputCost`, `outputCost`, `totalCost`} where unit can be of value `"TOKENS"`, `"CHARACTERS"`, `"MILLISECONDS"`, `"SECONDS"`, or `"IMAGES"`. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse.
+            usage (Optional[dict]): [DEPRECATED, use usage_details and cost_details instead] The usage object supports the OpenAi structure with {`promptTokens`, `completionTokens`, `totalTokens`} and a more generic version {`input`, `output`, `total`, `unit`, `inputCost`, `outputCost`, `totalCost`} where unit can be of value `"TOKENS"`, `"CHARACTERS"`, `"MILLISECONDS"`, `"SECONDS"`, or `"IMAGES"`. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse.
+            usage_details (Optional[dict]): The usage details of the generation. Also accepts OpenAI usage details. Keys are the usage type (e.g. "input", "input_cached", "output") and values are integers representing the number of units used. For accurate cost calculations in Langfuse, ensure each usage type has a corresponding price configured in the Langfuse models table. Example: {"input": 500, "output": 150}.
+            cost_details (Optional[dict]): The cost details of the generation. Keys are the usage type (e.g. "input", "input_cached", "output") and values are floats representing the cost in USD. Example: {"input": 0.0015, "output": 0.002}.
             prompt (Optional[PromptClient]): The Langfuse prompt object used for the generation.
             **kwargs: Additional keyword arguments to include in the generation.
 
@@ -2065,6 +2081,9 @@ def generation(
                 "input": input,
                 "output": output,
                 "usage": _convert_usage_input(usage) if usage is not None else None,
+                "usage_details": usage_details
+                and _extract_usage_details(usage_details),
+                "cost_details": cost_details,
                 **_create_prompt_context(prompt),
                 **kwargs,
             }
@@ -2432,6 +2451,8 @@ def update(
         input: typing.Optional[typing.Any] = None,
         output: typing.Optional[typing.Any] = None,
         usage: typing.Optional[typing.Union[pydantic.BaseModel, ModelUsage]] = None,
+        usage_details: typing.Optional[typing.Dict[str, int]] = None,
+        cost_details: typing.Optional[typing.Dict[str, float]] = None,
         prompt: typing.Optional[PromptClient] = None,
         **kwargs,
     ) -> "StatefulGenerationClient":
@@ -2450,7 +2471,9 @@ def update(
             model_parameters (Optional[dict]): The parameters of the model used for the generation; can be any key-value pairs.
             input (Optional[dict]): The prompt used for the generation. Can be any string or JSON object.
             output (Optional[dict]): The completion generated by the model. Can be any string or JSON object.
-            usage (Optional[dict]): The usage object supports the OpenAi structure with {`promptTokens`, `completionTokens`, `totalTokens`} and a more generic version {`input`, `output`, `total`, `unit`, `inputCost`, `outputCost`, `totalCost`} where unit can be of value `"TOKENS"`, `"CHARACTERS"`, `"MILLISECONDS"`, `"SECONDS"`, or `"IMAGES"`. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse.
+            usage (Optional[dict]): [DEPRECATED, use usage_details and cost_details instead] The usage object supports the OpenAi structure with {`promptTokens`, `completionTokens`, `totalTokens`} and a more generic version {`input`, `output`, `total`, `unit`, `inputCost`, `outputCost`, `totalCost`} where unit can be of value `"TOKENS"`, `"CHARACTERS"`, `"MILLISECONDS"`, `"SECONDS"`, or `"IMAGES"`. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse.
+            usage_details (Optional[dict]): The usage details of the generation. Also accepts OpenAI usage details. Keys are the usage type (e.g. "input", "input_cached", "output") and values are integers representing the number of units used. For accurate cost calculations in Langfuse, ensure each usage type has a corresponding price configured in the Langfuse models table. Example: {"input": 500, "output": 150}.
+            cost_details (Optional[dict]): The cost details of the generation. Keys are the usage type (e.g. "input", "input_cached", "output") and values are floats representing the cost in USD. Example: {"input": 0.0015, "output": 0.002}.
             prompt (Optional[PromptClient]): The Langfuse prompt object used for the generation.
             **kwargs: Additional keyword arguments to include in the generation.
 
@@ -2490,6 +2513,9 @@ def update(
                 "input": input,
                 "output": output,
                 "usage": _convert_usage_input(usage) if usage is not None else None,
+                "usage_details": usage_details
+                and _extract_usage_details(usage_details),
+                "cost_details": cost_details,
                 **_create_prompt_context(prompt),
                 **kwargs,
             }
@@ -2538,6 +2564,8 @@ def end(
         input: typing.Optional[typing.Any] = None,
         output: typing.Optional[typing.Any] = None,
         usage: typing.Optional[typing.Union[pydantic.BaseModel, ModelUsage]] = None,
+        usage_details: typing.Optional[typing.Dict[str, int]] = None,
+        cost_details: typing.Optional[typing.Dict[str, float]] = None,
         prompt: typing.Optional[PromptClient] = None,
         **kwargs,
     ) -> "StatefulGenerationClient":
@@ -2556,7 +2584,9 @@ def end(
             model_parameters (Optional[dict]): The parameters of the model used for the generation; can be any key-value pairs.
             input (Optional[dict]): The prompt used for the generation. Can be any string or JSON object.
             output (Optional[dict]): The completion generated by the model. Can be any string or JSON object.
-            usage (Optional[dict]): The usage object supports the OpenAi structure with {`promptTokens`, `completionTokens`, `totalTokens`} and a more generic version {`input`, `output`, `total`, `unit`, `inputCost`, `outputCost`, `totalCost`} where unit can be of value `"TOKENS"`, `"CHARACTERS"`, `"MILLISECONDS"`, `"SECONDS"`, or `"IMAGES"`. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse.
+            usage (Optional[dict]): [DEPRECATED, use usage_details and cost_details instead] The usage object supports the OpenAi structure with {`promptTokens`, `completionTokens`, `totalTokens`} and a more generic version {`input`, `output`, `total`, `unit`, `inputCost`, `outputCost`, `totalCost`} where unit can be of value `"TOKENS"`, `"CHARACTERS"`, `"MILLISECONDS"`, `"SECONDS"`, or `"IMAGES"`. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse.
+            usage_details (Optional[dict]): The usage details of the generation. Also accepts OpenAI usage details. Keys are the usage type (e.g. "input", "input_cached", "output") and values are integers representing the number of units used. For accurate cost calculations in Langfuse, ensure each usage type has a corresponding price configured in the Langfuse models table. Example: {"input": 500, "output": 150}.
+            cost_details (Optional[dict]): The cost details of the generation. Keys are the usage type (e.g. "input", "input_cached", "output") and values are floats representing the cost in USD. Example: {"input": 0.0015, "output": 0.002}.
             prompt (Optional[PromptClient]): The Langfuse prompt object used for the generation.
             **kwargs: Additional keyword arguments to include in the generation.
 
@@ -2593,6 +2623,8 @@ def end(
             input=input,
             output=output,
             usage=usage,
+            usage_details=usage_details and _extract_usage_details(usage_details),
+            cost_details=cost_details,
             prompt=prompt,
             **kwargs,
         )
diff --git a/langfuse/utils/__init__.py b/langfuse/utils/__init__.py
index 7a97d589f..ea1194d30 100644
--- a/langfuse/utils/__init__.py
+++ b/langfuse/utils/__init__.py
@@ -47,7 +47,10 @@ def extract_by_priority(
 
 
 def _convert_usage_input(usage: typing.Union[pydantic.BaseModel, ModelUsage]):
-    """Converts any usage input to a usage object"""
+    """Convert any usage input to a usage object.
+
+    Deprecated, only used for backwards compatibility with legacy 'usage' objects in generation create / update
+    """
     if isinstance(usage, pydantic.BaseModel):
         usage = usage.dict()
 
@@ -103,3 +106,75 @@ def _convert_usage_input(usage: typing.Union[pydantic.BaseModel, ModelUsage]):
         raise ValueError(
             "Usage object must have either {input, output, total, unit} or {promptTokens, completionTokens, totalTokens}"
         )
+
+
+def _extract_usage_details(usage_details: typing.Dict[str, typing.Any]):
+    if isinstance(usage_details, pydantic.BaseModel):
+        usage_details = usage_details.dict()
+
+    if hasattr(usage_details, "__dict__"):
+        usage_details = usage_details.__dict__
+
+    # Handle openai usage details
+    if all(
+        k in usage_details
+        for k in ("prompt_tokens", "completion_tokens", "total_tokens")
+    ) or all(
+        k in usage_details for k in ("promptTokens", "completionTokens", "totalTokens")
+    ):
+        openai_usage_details = {
+            "input": usage_details.get("prompt_tokens", None)
+            or usage_details.get("promptTokens", None),
+            "output": usage_details.get("completion_tokens", None)
+            or usage_details.get("completionTokens", None),
+            "total": usage_details.get("total_tokens", None)
+            or usage_details.get("totalTokens", None),
+        }
+
+        prompt_token_details = usage_details.get("prompt_token_details", {})
+        if isinstance(prompt_token_details, dict):
+            if "cached_tokens" in prompt_token_details:
+                openai_usage_details["input_cached"] = prompt_token_details[
+                    "cached_tokens"
+                ]
+                openai_usage_details["input"] = max(
+                    openai_usage_details.get("input", 0)
+                    - openai_usage_details["input_cached"],
+                    0,
+                )
+
+            if "audio_tokens" in prompt_token_details:
+                openai_usage_details["input_audio"] = prompt_token_details[
+                    "audio_tokens"
+                ]
+                openai_usage_details["input"] = max(
+                    openai_usage_details.get("input", 0)
+                    - openai_usage_details["input_audio"],
+                    0,
+                )
+
+        output_token_details = usage_details.get("completion_token_details", {})
+        if isinstance(output_token_details, dict):
+            if "audio_tokens" in output_token_details:
+                openai_usage_details["output_audio"] = output_token_details[
+                    "audio_tokens"
+                ]
+                openai_usage_details["output"] = max(
+                    openai_usage_details.get("output", 0)
+                    - openai_usage_details["output_audio"],
+                    0,
+                )
+
+            if "reasoning_tokens" in output_token_details:
+                openai_usage_details["output_reasoning"] = output_token_details[
+                    "reasoning_tokens"
+                ]
+                openai_usage_details["output"] = max(
+                    openai_usage_details.get("output", 0)
+                    - openai_usage_details["output_reasoning"],
+                    0,
+                )
+
+        return openai_usage_details
+
+    return usage_details
diff --git a/tests/test_extract_usage_details.py b/tests/test_extract_usage_details.py
new file mode 100644
index 000000000..9fc8cd7c2
--- /dev/null
+++ b/tests/test_extract_usage_details.py
@@ -0,0 +1,129 @@
+from langfuse.client import _extract_usage_details
+from tests.utils import CompletionUsage, LlmUsage, LlmUsageWithCost
+
+
+def test_extract_usage_details_openai_style():
+    usage = {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30}
+
+    result = _extract_usage_details(usage)
+
+    assert result == {"input": 10, "output": 20, "total": 30}
+
+
+def test_extract_usage_details_openai_with_token_details():
+    usage = {
+        "prompt_tokens": 10,
+        "completion_tokens": 20,
+        "total_tokens": 30,
+        "completion_token_details": {"audio_tokens": 5},
+        "prompt_token_details": {"audio_tokens": 3, "cached_tokens": 2},
+    }
+
+    result = _extract_usage_details(usage)
+
+    assert result == {
+        "input": 5,
+        "input_audio": 3,
+        "input_cached": 2,
+        "output": 15,
+        "output_audio": 5,
+        "total": 30,
+    }
+
+
+def test_extract_usage_details_openai_with_completion_token_details_only():
+    usage = {
+        "prompt_tokens": 10,
+        "completion_tokens": 20,
+        "total_tokens": 30,
+        "completion_token_details": {"audio_tokens": 5},
+    }
+
+    result = _extract_usage_details(usage)
+
+    assert result == {
+        "input": 10,
+        "output": 15,
+        "output_audio": 5,
+        "total": 30,
+    }
+
+
+def test_extract_usage_details_openai_with_prompt_token_details_only():
+    usage = {
+        "prompt_tokens": 10,
+        "completion_tokens": 20,
+        "total_tokens": 30,
+        "prompt_token_details": {"cached_tokens": 3, "audio_tokens": 7},
+    }
+
+    result = _extract_usage_details(usage)
+
+    assert result == {
+        "input": 0,
+        "input_cached": 3,
+        "input_audio": 7,
+        "output": 20,
+        "total": 30,
+    }
+
+
+def test_extract_usage_details_pydantic_openai():
+    usage = CompletionUsage(prompt_tokens=10, completion_tokens=20, total_tokens=30)
+
+    result = _extract_usage_details(usage.dict())
+
+    assert result == {"input": 10, "output": 20, "total": 30}
+
+
+def test_extract_usage_details_llm_usage():
+    usage = LlmUsage(promptTokens=10, completionTokens=20, totalTokens=30)
+
+    result = _extract_usage_details(usage.dict())
+
+    assert result == {"input": 10, "output": 20, "total": 30}
+
+
+def test_extract_usage_details_llm_usage_with_cost():
+    usage = LlmUsageWithCost(
+        promptTokens=10,
+        completionTokens=20,
+        totalTokens=30,
+        inputCost=0.0001,
+        outputCost=0.0002,
+        totalCost=0.0003,
+    )
+
+    result = _extract_usage_details(usage.dict())
+
+    assert result == {"input": 10, "output": 20, "total": 30}
+
+
+def test_extract_usage_details_raw():
+    usage = {"input": 100, "output": 200, "total": 300}
+
+    result = _extract_usage_details(usage)
+
+    assert result == usage
+
+
+def test_extract_usage_details_raw_with_cached():
+    usage = {"input": 100, "input_cached": 50, "output": 200, "total": 300}
+
+    result = _extract_usage_details(usage)
+
+    assert result == usage
+
+
+def test_extract_usage_details_empty():
+    result = _extract_usage_details({})
+
+    assert result == {}
+
+
+def test_extract_usage_details_invalid_keys():
+    usage = {"foo": 10, "bar": 20}
+
+    result = _extract_usage_details(usage)
+
+    assert result == usage

From 04fc053d106bbffad83c2d7e58dfe2ca5b3d27d9 Mon Sep 17 00:00:00 2001
From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com>
Date: Wed, 4 Dec 2024 11:57:40 +0100
Subject: [PATCH 3/9] update openai integration

---
 langfuse/openai.py         | 11 +++--
 langfuse/utils/__init__.py | 89 +++++++++++++++++++++-----------------
 2 files changed, 56 insertions(+), 44 deletions(-)

diff --git a/langfuse/openai.py b/langfuse/openai.py
index da449dbb2..4f841cdbd 100644
--- a/langfuse/openai.py
+++ b/langfuse/openai.py
@@ -645,7 +645,10 @@ def _wrap(open_ai_resource: OpenAiDefinition, initialize, wrapped, args, kwargs)
                 else openai_response,
             )
             generation.update(
-                model=model, output=completion, end_time=_get_timestamp(), usage=usage
+                model=model,
+                output=completion,
+                end_time=_get_timestamp(),
+                usage_details=usage,
             )
 
             # Avoiding the trace-update if trace-id is provided by user.
@@ -661,7 +664,7 @@ def _wrap(open_ai_resource: OpenAiDefinition, initialize, wrapped, args, kwargs)
             status_message=str(ex),
             level="ERROR",
             model=model,
-            usage={"input_cost": 0, "output_cost": 0, "total_cost": 0},
+            cost_details={"input": 0, "output": 0, "total": 0},
         )
         raise ex
 
@@ -701,7 +704,7 @@ async def _wrap_async(
                 model=model,
                 output=completion,
                 end_time=_get_timestamp(),
-                usage=usage,
+                usage_details=usage,
             )
             # Avoiding the trace-update if trace-id is provided by user.
             if not is_nested_trace:
@@ -715,7 +718,7 @@ async def _wrap_async(
             status_message=str(ex),
             level="ERROR",
             model=model,
-            usage={"input_cost": 0, "output_cost": 0, "total_cost": 0},
+            cost_details={"input": 0, "output": 0, "total": 0},
         )
         raise ex
 
diff --git a/langfuse/utils/__init__.py b/langfuse/utils/__init__.py
index ea1194d30..7b180ba3e 100644
--- a/langfuse/utils/__init__.py
+++ b/langfuse/utils/__init__.py
@@ -4,13 +4,21 @@
 import typing
 from datetime import datetime, timezone
 
-try:
-    import pydantic.v1 as pydantic  # type: ignore
-except ImportError:
-    import pydantic  # type: ignore
+import pydantic
+
 
 from langfuse.model import ModelUsage, PromptClient
 
+IS_PYDANTIC_V2 = pydantic.VERSION.startswith("2.")
+
+if IS_PYDANTIC_V2:
+    import pydantic.v1 as pydantic_v1  # noqa
+    import pydantic as pydantic_v2  # noqa
+else:
+    import pydantic as pydantic_v1  # noqa
+
+    pydantic_v2 = None  # type: ignore
+
 log = logging.getLogger("langfuse")
 
 
@@ -46,12 +54,12 @@ def extract_by_priority(
     return None
 
 
-def _convert_usage_input(usage: typing.Union[pydantic.BaseModel, ModelUsage]):
+def _convert_usage_input(usage: typing.Union[pydantic_v1.BaseModel, ModelUsage]):
     """Convert any usage input to a usage object.
 
     Deprecated, only used for backwards compatibility with legacy 'usage' objects in generation create / update
     """
-    if isinstance(usage, pydantic.BaseModel):
+    if isinstance(usage, pydantic_v1.BaseModel):
         usage = usage.dict()
 
     # sometimes we do not match the pydantic usage object
@@ -109,9 +117,12 @@ def _convert_usage_input(usage: typing.Union[pydantic.BaseModel, ModelUsage]):
 
 
 def _extract_usage_details(usage_details: typing.Dict[str, typing.Any]):
-    if isinstance(usage_details, pydantic.BaseModel):
+    if isinstance(usage_details, pydantic_v1.BaseModel):
         usage_details = usage_details.dict()
 
+    if pydantic_v2 is not None and isinstance(usage_details, pydantic_v2.BaseModel):
+        usage_details = usage_details.model_dump()
+
     if hasattr(usage_details, "__dict__"):
         usage_details = usage_details.__dict__
 
@@ -131,49 +142,47 @@ def _extract_usage_details(usage_details: typing.Dict[str, typing.Any]):
             or usage_details.get("totalTokens", None),
         }
 
-        prompt_token_details = usage_details.get("prompt_token_details", {})
-        if isinstance(prompt_token_details, dict):
-            if "cached_tokens" in prompt_token_details:
-                openai_usage_details["input_cached"] = prompt_token_details[
-                    "cached_tokens"
-                ]
-                openai_usage_details["input"] = max(
-                    openai_usage_details.get("input", 0)
-                    - openai_usage_details["input_cached"],
-                    0,
-                )
-
-            if "audio_tokens" in prompt_token_details:
-                openai_usage_details["input_audio"] = prompt_token_details[
-                    "audio_tokens"
-                ]
+        # Handle input token details
+        prompt_tokens_details = usage_details.get("prompt_tokens_details", {})
+        if pydantic_v2 is not None and isinstance(
+            prompt_tokens_details, pydantic_v2.BaseModel
+        ):
+            prompt_tokens_details = prompt_tokens_details.model_dump()
+        elif hasattr(prompt_tokens_details, "__dict__"):
+            prompt_tokens_details = prompt_tokens_details.__dict__
+
+        if isinstance(prompt_tokens_details, dict):
+            for key in prompt_tokens_details:
+                openai_usage_details[f"input_{key}"] = prompt_tokens_details[key]
                 openai_usage_details["input"] = max(
                     openai_usage_details.get("input", 0)
-                    - openai_usage_details["input_audio"],
+                    - openai_usage_details[f"input_{key}"],
                     0,
                 )
 
-        output_token_details = usage_details.get("completion_token_details", {})
-        if isinstance(output_token_details, dict):
-            if "audio_tokens" in output_token_details:
-                openai_usage_details["output_audio"] = output_token_details[
-                    "audio_tokens"
-                ]
+        # Handle output token details
+        completion_tokens_details = usage_details.get("completion_tokens_details", {})
+        if pydantic_v2 is not None and isinstance(
+            completion_tokens_details, pydantic_v2.BaseModel
+        ):
+            completion_tokens_details = completion_tokens_details.model_dump()
+        elif hasattr(completion_tokens_details, "__dict__"):
+            completion_tokens_details = completion_tokens_details.__dict__
+
+        if isinstance(completion_tokens_details, dict):
+            for key in completion_tokens_details:
+                openai_usage_details[f"output_{key}"] = completion_tokens_details[key]
                 openai_usage_details["output"] = max(
                     openai_usage_details.get("output", 0)
-                    - openai_usage_details["output_audio"],
+                    - openai_usage_details[f"output_{key}"],
                     0,
                 )
 
-            if "reasoning_tokens" in output_token_details:
-                openai_usage_details["output_reasoning"] = output_token_details[
-                    "reasoning_tokens"
-                ]
-                openai_usage_details["output"] = max(
-                    openai_usage_details.get("output", 0)
-                    - openai_usage_details["output_reasoning"],
-                    0,
-                )
+        # Remove input and output if they are 0, i.e. all details add up to the total provided by OpenAI
+        if openai_usage_details["input"] == 0:
+            openai_usage_details.pop("input")
+        if openai_usage_details["output"] == 0:
+            openai_usage_details.pop("output")
 
         return openai_usage_details
 

From 42b7fe370652f3185552041605e4b7b77b3b1779 Mon Sep 17 00:00:00 2001
From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com>
Date: Wed, 4 Dec 2024 18:07:27 +0100
Subject: [PATCH 4/9] backward compat

---
 langfuse/openai.py         | 12 ++++++++++++
 langfuse/utils/__init__.py |  6 ------
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/langfuse/openai.py b/langfuse/openai.py
index 4f841cdbd..d674b8462 100644
--- a/langfuse/openai.py
+++ b/langfuse/openai.py
@@ -648,6 +648,7 @@ def _wrap(open_ai_resource: OpenAiDefinition, initialize, wrapped, args, kwargs)
                 model=model,
                 output=completion,
                 end_time=_get_timestamp(),
+                usage=usage,  # backward compat for all V2 self hosters
                 usage_details=usage,
             )
 
@@ -664,6 +665,11 @@ def _wrap(open_ai_resource: OpenAiDefinition, initialize, wrapped, args, kwargs)
             status_message=str(ex),
             level="ERROR",
             model=model,
+            usage={
+                "input_cost": 0,
+                "output_cost": 0,
+                "total_cost": 0,
+            },  # backward compat for all V2 self hosters
             cost_details={"input": 0, "output": 0, "total": 0},
         )
         raise ex
@@ -704,6 +710,7 @@ async def _wrap_async(
                 model=model,
                 output=completion,
                 end_time=_get_timestamp(),
+                usage=usage,  # backward compat for all V2 self hosters
                 usage_details=usage,
             )
             # Avoiding the trace-update if trace-id is provided by user.
@@ -718,6 +725,11 @@ async def _wrap_async(
             status_message=str(ex),
             level="ERROR",
             model=model,
+            usage={
+                "input_cost": 0,
+                "output_cost": 0,
+                "total_cost": 0,
+            },  # Backward compat for all V2 self hosters
             cost_details={"input": 0, "output": 0, "total": 0},
         )
         raise ex
diff --git a/langfuse/utils/__init__.py b/langfuse/utils/__init__.py
index 7b180ba3e..66555966b 100644
--- a/langfuse/utils/__init__.py
+++ b/langfuse/utils/__init__.py
@@ -178,12 +178,6 @@ def _extract_usage_details(usage_details: typing.Dict[str, typing.Any]):
                     0,
                 )
 
-        # Remove input and output if they are 0, i.e. all details add up to the total provided by OpenAI
-        if openai_usage_details["input"] == 0:
-            openai_usage_details.pop("input")
-        if openai_usage_details["output"] == 0:
-            openai_usage_details.pop("output")
-
         return openai_usage_details
 
     return usage_details

From 808d05ad6ff0cd582188937b1a094873cbd8a374 Mon Sep 17 00:00:00 2001
From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com>
Date: Tue, 10 Dec 2024 13:49:21 +0100
Subject: [PATCH 5/9] update fern

---
 langfuse/api/__init__.py                      |   4 +
 langfuse/api/resources/__init__.py            |   4 +
 .../comments/types/create_comment_request.py  |   2 +-
 .../resources/commons/types/observation.py    |  16 +-
 .../commons/types/observations_view.py        |   6 +-
 langfuse/api/resources/commons/types/usage.py |   2 +-
 langfuse/api/resources/ingestion/__init__.py  |   4 +
 .../api/resources/ingestion/types/__init__.py |   4 +
 .../ingestion/types/create_generation_body.py |   3 +-
 .../ingestion/types/open_ai_usage_schema.py   |  46 +++++
 .../ingestion/types/update_generation_body.py |   3 +-
 .../ingestion/types/usage_details.py          |   7 +
 langfuse/api/resources/media/client.py        |   6 +-
 .../media/types/media_content_type.py         | 160 ++++++++++++++----
 14 files changed, 226 insertions(+), 41 deletions(-)
 create mode 100644 langfuse/api/resources/ingestion/types/open_ai_usage_schema.py
 create mode 100644 langfuse/api/resources/ingestion/types/usage_details.py

diff --git a/langfuse/api/__init__.py b/langfuse/api/__init__.py
index df6daeace..1bdfac254 100644
--- a/langfuse/api/__init__.py
+++ b/langfuse/api/__init__.py
@@ -87,6 +87,7 @@
     ObservationsView,
     ObservationsViews,
     OpenAiUsage,
+    OpenAiUsageSchema,
     OptionalObservationBody,
     PaginatedDatasetItems,
     PaginatedDatasetRuns,
@@ -133,6 +134,7 @@
     UpdateSpanEvent,
     Usage,
     UsageByModel,
+    UsageDetails,
     comments,
     commons,
     dataset_items,
@@ -240,6 +242,7 @@
     "ObservationsView",
     "ObservationsViews",
     "OpenAiUsage",
+    "OpenAiUsageSchema",
     "OptionalObservationBody",
     "PaginatedDatasetItems",
     "PaginatedDatasetRuns",
@@ -286,6 +289,7 @@
     "UpdateSpanEvent",
     "Usage",
     "UsageByModel",
+    "UsageDetails",
     "comments",
     "commons",
     "dataset_items",
diff --git a/langfuse/api/resources/__init__.py b/langfuse/api/resources/__init__.py
index 4a6cc0319..f838c2f8c 100644
--- a/langfuse/api/resources/__init__.py
+++ b/langfuse/api/resources/__init__.py
@@ -92,6 +92,7 @@
     ObservationBody,
     ObservationType,
     OpenAiUsage,
+    OpenAiUsageSchema,
     OptionalObservationBody,
     ScoreBody,
     ScoreEvent,
@@ -105,6 +106,7 @@
     UpdateObservationEvent,
     UpdateSpanBody,
     UpdateSpanEvent,
+    UsageDetails,
 )
 from .media import (
     GetMediaResponse,
@@ -237,6 +239,7 @@
     "ObservationsView",
     "ObservationsViews",
     "OpenAiUsage",
+    "OpenAiUsageSchema",
     "OptionalObservationBody",
     "PaginatedDatasetItems",
     "PaginatedDatasetRuns",
@@ -283,6 +286,7 @@
     "UpdateSpanEvent",
     "Usage",
     "UsageByModel",
+    "UsageDetails",
     "comments",
     "commons",
     "dataset_items",
diff --git a/langfuse/api/resources/comments/types/create_comment_request.py b/langfuse/api/resources/comments/types/create_comment_request.py
index 98e25e2e1..9ba6081ee 100644
--- a/langfuse/api/resources/comments/types/create_comment_request.py
+++ b/langfuse/api/resources/comments/types/create_comment_request.py
@@ -25,7 +25,7 @@ class CreateCommentRequest(pydantic_v1.BaseModel):
 
     content: str = pydantic_v1.Field()
     """
-    The content of the comment. May include markdown. Currently limited to 500 characters.
+    The content of the comment. May include markdown. Currently limited to 3000 characters.
     """
 
     author_user_id: typing.Optional[str] = pydantic_v1.Field(
diff --git a/langfuse/api/resources/commons/types/observation.py b/langfuse/api/resources/commons/types/observation.py
index 130fe732d..93fabb754 100644
--- a/langfuse/api/resources/commons/types/observation.py
+++ b/langfuse/api/resources/commons/types/observation.py
@@ -84,7 +84,7 @@ class Observation(pydantic_v1.BaseModel):
 
     usage: typing.Optional[Usage] = pydantic_v1.Field(default=None)
     """
-    The usage data of the observation
+    (Deprecated. Use usageDetails and costDetails instead.) The usage data of the observation
     """
 
     level: ObservationLevel = pydantic_v1.Field()
@@ -111,6 +111,20 @@ class Observation(pydantic_v1.BaseModel):
     The prompt ID associated with the observation
     """
 
+    usage_details: typing.Optional[typing.Dict[str, int]] = pydantic_v1.Field(
+        alias="usageDetails", default=None
+    )
+    """
+    The usage details of the observation. Key is the name of the usage metric, value is the number of units consumed. The total key is the sum of all (non-total) usage metrics or the total value ingested.
+    """
+
+    cost_details: typing.Optional[typing.Dict[str, float]] = pydantic_v1.Field(
+        alias="costDetails", default=None
+    )
+    """
+    The cost details of the observation. Key is the name of the cost metric, value is the cost in USD. The total key is the sum of all (non-total) cost metrics or the total value ingested.
+    """
+
     def json(self, **kwargs: typing.Any) -> str:
         kwargs_with_defaults: typing.Any = {
             "by_alias": True,
diff --git a/langfuse/api/resources/commons/types/observations_view.py b/langfuse/api/resources/commons/types/observations_view.py
index 3e15909ea..e011fa32b 100644
--- a/langfuse/api/resources/commons/types/observations_view.py
+++ b/langfuse/api/resources/commons/types/observations_view.py
@@ -53,21 +53,21 @@ class ObservationsView(Observation):
         alias="calculatedInputCost", default=None
     )
     """
-    The calculated cost of the input in USD
+    (Deprecated. Use usageDetails and costDetails instead.) The calculated cost of the input in USD
     """
 
     calculated_output_cost: typing.Optional[float] = pydantic_v1.Field(
         alias="calculatedOutputCost", default=None
     )
     """
-    The calculated cost of the output in USD
+    (Deprecated. Use usageDetails and costDetails instead.) The calculated cost of the output in USD
     """
 
     calculated_total_cost: typing.Optional[float] = pydantic_v1.Field(
         alias="calculatedTotalCost", default=None
     )
     """
-    The calculated total cost in USD
+    (Deprecated. Use usageDetails and costDetails instead.) The calculated total cost in USD
     """
 
     latency: typing.Optional[float] = pydantic_v1.Field(default=None)
diff --git a/langfuse/api/resources/commons/types/usage.py b/langfuse/api/resources/commons/types/usage.py
index bc5041c5f..c38330494 100644
--- a/langfuse/api/resources/commons/types/usage.py
+++ b/langfuse/api/resources/commons/types/usage.py
@@ -10,7 +10,7 @@
 
 class Usage(pydantic_v1.BaseModel):
     """
-    Standard interface for usage and cost
+    (Deprecated. Use usageDetails and costDetails instead.) Standard interface for usage and cost
     """
 
     input: typing.Optional[int] = pydantic_v1.Field(default=None)
diff --git a/langfuse/api/resources/ingestion/__init__.py b/langfuse/api/resources/ingestion/__init__.py
index dde470ccc..6bd1373be 100644
--- a/langfuse/api/resources/ingestion/__init__.py
+++ b/langfuse/api/resources/ingestion/__init__.py
@@ -27,6 +27,7 @@
     ObservationBody,
     ObservationType,
     OpenAiUsage,
+    OpenAiUsageSchema,
     OptionalObservationBody,
     ScoreBody,
     ScoreEvent,
@@ -40,6 +41,7 @@
     UpdateObservationEvent,
     UpdateSpanBody,
     UpdateSpanEvent,
+    UsageDetails,
 )
 
 __all__ = [
@@ -69,6 +71,7 @@
     "ObservationBody",
     "ObservationType",
     "OpenAiUsage",
+    "OpenAiUsageSchema",
     "OptionalObservationBody",
     "ScoreBody",
     "ScoreEvent",
@@ -82,4 +85,5 @@
     "UpdateObservationEvent",
     "UpdateSpanBody",
     "UpdateSpanEvent",
+    "UsageDetails",
 ]
diff --git a/langfuse/api/resources/ingestion/types/__init__.py b/langfuse/api/resources/ingestion/types/__init__.py
index 0981aa841..95fa2559e 100644
--- a/langfuse/api/resources/ingestion/types/__init__.py
+++ b/langfuse/api/resources/ingestion/types/__init__.py
@@ -28,6 +28,7 @@
 from .observation_body import ObservationBody
 from .observation_type import ObservationType
 from .open_ai_usage import OpenAiUsage
+from .open_ai_usage_schema import OpenAiUsageSchema
 from .optional_observation_body import OptionalObservationBody
 from .score_body import ScoreBody
 from .score_event import ScoreEvent
@@ -41,6 +42,7 @@
 from .update_observation_event import UpdateObservationEvent
 from .update_span_body import UpdateSpanBody
 from .update_span_event import UpdateSpanEvent
+from .usage_details import UsageDetails
 
 __all__ = [
     "BaseEvent",
@@ -69,6 +71,7 @@
     "ObservationBody",
     "ObservationType",
     "OpenAiUsage",
+    "OpenAiUsageSchema",
     "OptionalObservationBody",
     "ScoreBody",
     "ScoreEvent",
@@ -82,4 +85,5 @@
     "UpdateObservationEvent",
     "UpdateSpanBody",
     "UpdateSpanEvent",
+    "UsageDetails",
 ]
diff --git a/langfuse/api/resources/ingestion/types/create_generation_body.py b/langfuse/api/resources/ingestion/types/create_generation_body.py
index 6bf5c3b28..428b58607 100644
--- a/langfuse/api/resources/ingestion/types/create_generation_body.py
+++ b/langfuse/api/resources/ingestion/types/create_generation_body.py
@@ -8,6 +8,7 @@
 from ...commons.types.map_value import MapValue
 from .create_span_body import CreateSpanBody
 from .ingestion_usage import IngestionUsage
+from .usage_details import UsageDetails
 
 
 class CreateGenerationBody(CreateSpanBody):
@@ -19,7 +20,7 @@ class CreateGenerationBody(CreateSpanBody):
         alias="modelParameters", default=None
     )
     usage: typing.Optional[IngestionUsage] = None
-    usage_details: typing.Optional[typing.Dict[str, int]] = pydantic_v1.Field(
+    usage_details: typing.Optional[UsageDetails] = pydantic_v1.Field(
         alias="usageDetails", default=None
     )
     cost_details: typing.Optional[typing.Dict[str, float]] = pydantic_v1.Field(
diff --git a/langfuse/api/resources/ingestion/types/open_ai_usage_schema.py b/langfuse/api/resources/ingestion/types/open_ai_usage_schema.py
new file mode 100644
index 000000000..ecf755bb3
--- /dev/null
+++ b/langfuse/api/resources/ingestion/types/open_ai_usage_schema.py
@@ -0,0 +1,46 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import datetime as dt
+import typing
+
+from ....core.datetime_utils import serialize_datetime
+from ....core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
+
+
+class OpenAiUsageSchema(pydantic_v1.BaseModel):
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+    prompt_tokens_details: typing.Optional[typing.Dict[str, int]] = None
+    completion_tokens_details: typing.Optional[typing.Dict[str, int]] = None
+
+    def json(self, **kwargs: typing.Any) -> str:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().json(**kwargs_with_defaults)
+
+    def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
+        kwargs_with_defaults_exclude_unset: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        kwargs_with_defaults_exclude_none: typing.Any = {
+            "by_alias": True,
+            "exclude_none": True,
+            **kwargs,
+        }
+
+        return deep_union_pydantic_dicts(
+            super().dict(**kwargs_with_defaults_exclude_unset),
+            super().dict(**kwargs_with_defaults_exclude_none),
+        )
+
+    class Config:
+        frozen = True
+        smart_union = True
+        extra = pydantic_v1.Extra.allow
+        json_encoders = {dt.datetime: serialize_datetime}
diff --git a/langfuse/api/resources/ingestion/types/update_generation_body.py b/langfuse/api/resources/ingestion/types/update_generation_body.py
index 4c85f748c..2058543af 100644
--- a/langfuse/api/resources/ingestion/types/update_generation_body.py
+++ b/langfuse/api/resources/ingestion/types/update_generation_body.py
@@ -8,6 +8,7 @@
 from ...commons.types.map_value import MapValue
 from .ingestion_usage import IngestionUsage
 from .update_span_body import UpdateSpanBody
+from .usage_details import UsageDetails
 
 
 class UpdateGenerationBody(UpdateSpanBody):
@@ -22,7 +23,7 @@ class UpdateGenerationBody(UpdateSpanBody):
     prompt_name: typing.Optional[str] = pydantic_v1.Field(
         alias="promptName", default=None
     )
-    usage_details: typing.Optional[typing.Dict[str, int]] = pydantic_v1.Field(
+    usage_details: typing.Optional[UsageDetails] = pydantic_v1.Field(
         alias="usageDetails", default=None
     )
     cost_details: typing.Optional[typing.Dict[str, float]] = pydantic_v1.Field(
diff --git a/langfuse/api/resources/ingestion/types/usage_details.py b/langfuse/api/resources/ingestion/types/usage_details.py
new file mode 100644
index 000000000..89c0fc2e9
--- /dev/null
+++ b/langfuse/api/resources/ingestion/types/usage_details.py
@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from .open_ai_usage_schema import OpenAiUsageSchema
+
+UsageDetails = typing.Union[typing.Dict[str, int], OpenAiUsageSchema]
diff --git a/langfuse/api/resources/media/client.py b/langfuse/api/resources/media/client.py
index a32916d5e..eb8eb16ac 100644
--- a/langfuse/api/resources/media/client.py
+++ b/langfuse/api/resources/media/client.py
@@ -197,7 +197,7 @@ def get_upload_url(
 
         Examples
         --------
-        from langfuse.api import GetMediaUploadUrlRequest
+        from langfuse.api import GetMediaUploadUrlRequest, MediaContentType
         from langfuse.api.client import FernLangfuse
 
         client = FernLangfuse(
@@ -212,6 +212,7 @@ def get_upload_url(
             request=GetMediaUploadUrlRequest(
                 trace_id="string",
                 observation_id="string",
+                content_type=MediaContentType.IMAGE_PNG,
                 content_length=1,
                 sha_256_hash="string",
                 field="string",
@@ -446,7 +447,7 @@ async def get_upload_url(
         --------
         import asyncio
 
-        from langfuse.api import GetMediaUploadUrlRequest
+        from langfuse.api import GetMediaUploadUrlRequest, MediaContentType
         from langfuse.api.client import AsyncFernLangfuse
 
         client = AsyncFernLangfuse(
@@ -464,6 +465,7 @@ async def main() -> None:
                 request=GetMediaUploadUrlRequest(
                     trace_id="string",
                     observation_id="string",
+                    content_type=MediaContentType.IMAGE_PNG,
                     content_length=1,
                     sha_256_hash="string",
                     field="string",
diff --git a/langfuse/api/resources/media/types/media_content_type.py b/langfuse/api/resources/media/types/media_content_type.py
index 9b0cea41a..e8fdeefa2 100644
--- a/langfuse/api/resources/media/types/media_content_type.py
+++ b/langfuse/api/resources/media/types/media_content_type.py
@@ -1,35 +1,133 @@
 # This file was auto-generated by Fern from our API Definition.
 
+import enum
 import typing
 
-MediaContentType = typing.Literal[
-    "image/png",
-    "image/jpeg",
-    "image/jpg",
-    "image/webp",
-    "image/gif",
-    "image/svg+xml",
-    "image/tiff",
-    "image/bmp",
-    "audio/mpeg",
-    "audio/mp3",
-    "audio/wav",
-    "audio/ogg",
-    "audio/oga",
-    "audio/aac",
-    "audio/mp4",
-    "audio/flac",
-    "video/mp4",
-    "video/webm",
-    "text/plain",
-    "text/html",
-    "text/css",
-    "text/csv",
-    "application/pdf",
-    "application/msword",
-    "application/vnd.ms-excel",
-    "application/zip",
-    "application/json",
-    "application/xml",
-    "application/octet-stream",
-]
+T_Result = typing.TypeVar("T_Result")
+
+
+class MediaContentType(str, enum.Enum):
+    """
+    The MIME type of the media record
+    """
+
+    IMAGE_PNG = "image/png"
+    IMAGE_JPEG = "image/jpeg"
+    IMAGE_JPG = "image/jpg"
+    IMAGE_WEBP = "image/webp"
+    IMAGE_GIF = "image/gif"
+    IMAGE_SVG_XML = "image/svg+xml"
+    IMAGE_TIFF = "image/tiff"
+    IMAGE_BMP = "image/bmp"
+    AUDIO_MPEG = "audio/mpeg"
+    AUDIO_MP_3 = "audio/mp3"
+    AUDIO_WAV = "audio/wav"
+    AUDIO_OGG = "audio/ogg"
+    AUDIO_OGA = "audio/oga"
+    AUDIO_AAC = "audio/aac"
+    AUDIO_MP_4 = "audio/mp4"
+    AUDIO_FLAC = "audio/flac"
+    VIDEO_MP_4 = "video/mp4"
+    VIDEO_WEBM = "video/webm"
+    TEXT_PLAIN = "text/plain"
+    TEXT_HTML = "text/html"
+    TEXT_CSS = "text/css"
+    TEXT_CSV = "text/csv"
+    APPLICATION_PDF = "application/pdf"
+    APPLICATION_MSWORD = "application/msword"
+    APPLICATION_MS_EXCEL = "application/vnd.ms-excel"
+    APPLICATION_ZIP = "application/zip"
+    APPLICATION_JSON = "application/json"
+    APPLICATION_XML = "application/xml"
+    APPLICATION_OCTET_STREAM = "application/octet-stream"
+
+    def visit(
+        self,
+        image_png: typing.Callable[[], T_Result],
+        image_jpeg: typing.Callable[[], T_Result],
+        image_jpg: typing.Callable[[], T_Result],
+        image_webp: typing.Callable[[], T_Result],
+        image_gif: typing.Callable[[], T_Result],
+        image_svg_xml: typing.Callable[[], T_Result],
+        image_tiff: typing.Callable[[], T_Result],
+        image_bmp: typing.Callable[[], T_Result],
+        audio_mpeg: typing.Callable[[], T_Result],
+        audio_mp_3: typing.Callable[[], T_Result],
+        audio_wav: typing.Callable[[], T_Result],
+        audio_ogg: typing.Callable[[], T_Result],
+        audio_oga: typing.Callable[[], T_Result],
+        audio_aac: typing.Callable[[], T_Result],
+        audio_mp_4: typing.Callable[[], T_Result],
+        audio_flac: typing.Callable[[], T_Result],
+        video_mp_4: typing.Callable[[], T_Result],
+        video_webm: typing.Callable[[], T_Result],
+        text_plain: typing.Callable[[], T_Result],
+        text_html: typing.Callable[[], T_Result],
+        text_css: typing.Callable[[], T_Result],
+        text_csv: typing.Callable[[], T_Result],
+        application_pdf: typing.Callable[[], T_Result],
+        application_msword: typing.Callable[[], T_Result],
+        application_ms_excel: typing.Callable[[], T_Result],
+        application_zip: typing.Callable[[], T_Result],
+        application_json: typing.Callable[[], T_Result],
+        application_xml: typing.Callable[[], T_Result],
+        application_octet_stream: typing.Callable[[], T_Result],
+    ) -> T_Result:
+        if self is MediaContentType.IMAGE_PNG:
+            return image_png()
+        if self is MediaContentType.IMAGE_JPEG:
+            return image_jpeg()
+        if self is MediaContentType.IMAGE_JPG:
+            return image_jpg()
+        if self is MediaContentType.IMAGE_WEBP:
+            return image_webp()
+        if self is MediaContentType.IMAGE_GIF:
+            return image_gif()
+        if self is MediaContentType.IMAGE_SVG_XML:
+            return image_svg_xml()
+        if self is MediaContentType.IMAGE_TIFF:
+            return image_tiff()
+        if self is MediaContentType.IMAGE_BMP:
+            return image_bmp()
+        if self is MediaContentType.AUDIO_MPEG:
+            return audio_mpeg()
+        if self is MediaContentType.AUDIO_MP_3:
+            return audio_mp_3()
+        if self is MediaContentType.AUDIO_WAV:
+            return audio_wav()
+        if self is MediaContentType.AUDIO_OGG:
+            return audio_ogg()
+        if self is MediaContentType.AUDIO_OGA:
+            return audio_oga()
+        if self is MediaContentType.AUDIO_AAC:
+            return audio_aac()
+        if self is MediaContentType.AUDIO_MP_4:
+            return audio_mp_4()
+        if self is MediaContentType.AUDIO_FLAC:
+            return audio_flac()
+        if self is MediaContentType.VIDEO_MP_4:
+            return video_mp_4()
+        if self is MediaContentType.VIDEO_WEBM:
+            return video_webm()
+        if self is MediaContentType.TEXT_PLAIN:
+            return text_plain()
+        if self is MediaContentType.TEXT_HTML:
+            return text_html()
+        if self is MediaContentType.TEXT_CSS:
+            return text_css()
+        if self is MediaContentType.TEXT_CSV:
+            return text_csv()
+        if self is MediaContentType.APPLICATION_PDF:
+            return application_pdf()
+        if self is MediaContentType.APPLICATION_MSWORD:
+            return application_msword()
+        if self is MediaContentType.APPLICATION_MS_EXCEL:
+            return application_ms_excel()
+        if self is MediaContentType.APPLICATION_ZIP:
+            return application_zip()
+        if self is MediaContentType.APPLICATION_JSON:
+            return application_json()
+        if self is MediaContentType.APPLICATION_XML:
+            return application_xml()
+        if self is MediaContentType.APPLICATION_OCTET_STREAM:
+            return application_octet_stream()

From 71a2b378a38a18e53fc3dc4535c576abde1ef5b1 Mon Sep 17 00:00:00 2001
From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com>
Date: Tue, 10 Dec 2024 14:41:11 +0100
Subject: [PATCH 6/9] add langchain support

---
 langfuse/callback/langchain.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/langfuse/callback/langchain.py b/langfuse/callback/langchain.py
index 7a4f90942..611a30759 100644
--- a/langfuse/callback/langchain.py
+++ b/langfuse/callback/langchain.py
@@ -845,6 +845,7 @@ def on_llm_end(
                 self.runs[run_id] = self.runs[run_id].end(
                     output=extracted_response,
                     usage=llm_usage,
+                    usage_details=llm_usage,
                     version=self.version,
                     input=kwargs.get("inputs"),
                     model=model,
@@ -1028,12 +1029,14 @@ def _parse_usage_model(usage: typing.Union[pydantic.BaseModel, dict]):
         # https://pypi.org/project/langchain-anthropic/ (works also for Bedrock-Anthropic)
         ("input_tokens", "input"),
         ("output_tokens", "output"),
+        ("total_tokens", "total"),
         # https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/get-token-count
         ("prompt_token_count", "input"),
         ("candidates_token_count", "output"),
         # Bedrock: https://docs.aws.amazon.com/bedrock/latest/userguide/monitoring-cw.html#runtime-cloudwatch-metrics
         ("inputTokenCount", "input"),
         ("outputTokenCount", "output"),
+        ("totalTokenCount", "total"),
         # langchain-ibm https://pypi.org/project/langchain-ibm/
         ("input_token_count", "input"),
         ("generated_token_count", "output"),
@@ -1051,6 +1054,19 @@ def _parse_usage_model(usage: typing.Union[pydantic.BaseModel, dict]):
 
             usage_model[langfuse_key] = final_count  # Translate key and keep the value
 
+    if isinstance(usage_model, dict):
+        if "input_token_details" in usage_model:
+            input_token_details = usage_model.pop("input_token_details", {})
+
+            for key, value in input_token_details.items():
+                usage_model[f"input_{key}"] = value
+
+        if "output_token_details" in usage_model:
+            output_token_details = usage_model.pop("output_token_details", {})
+
+            for key, value in output_token_details.items():
+                usage_model[f"output_{key}"] = value
+
     return usage_model if usage_model else None
 
 

From 652fd2e444ccecbe330aebbed69c0d8d2f2fc8c6 Mon Sep 17 00:00:00 2001
From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com>
Date: Tue, 10 Dec 2024 14:52:36 +0100
Subject: [PATCH 7/9] add llama support

---
 langfuse/llama_index/_event_handler.py | 48 +++++++++++++++++---------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/langfuse/llama_index/_event_handler.py b/langfuse/llama_index/_event_handler.py
index 8a9644053..c299643e3 100644
--- a/langfuse/llama_index/_event_handler.py
+++ b/langfuse/llama_index/_event_handler.py
@@ -6,7 +6,6 @@
     StateType,
 )
 from langfuse.utils import _get_timestamp
-from langfuse.model import ModelUsage
 from ._context import InstrumentorContext
 from uuid import uuid4 as create_uuid
 
@@ -119,12 +118,12 @@ def update_generation_from_end_event(
             }
 
         self._get_generation_client(event.span_id).update(
-            usage=usage, end_time=_get_timestamp()
+            usage=usage, usage_details=usage, end_time=_get_timestamp()
         )
 
     def _parse_token_usage(
         self, response: Union[ChatResponse, CompletionResponse]
-    ) -> Optional[ModelUsage]:
+    ) -> Optional[dict]:
         if (
             (raw := getattr(response, "raw", None))
             and hasattr(raw, "get")
@@ -154,15 +153,15 @@ def _get_generation_client(self, id: str) -> StatefulGenerationClient:
 
 def _parse_usage_from_mapping(
     usage: Union[object, Mapping[str, Any]],
-) -> ModelUsage:
+):
     if isinstance(usage, Mapping):
         return _get_token_counts_from_mapping(usage)
 
     return _parse_usage_from_object(usage)
 
 
-def _parse_usage_from_object(usage: object) -> ModelUsage:
-    model_usage: ModelUsage = {
+def _parse_usage_from_object(usage: object):
+    model_usage = {
         "unit": None,
         "input": None,
         "output": None,
@@ -179,21 +178,26 @@ def _parse_usage_from_object(usage: object) -> ModelUsage:
     if (total_tokens := getattr(usage, "total_tokens", None)) is not None:
         model_usage["total"] = total_tokens
 
+    if (
+        prompt_tokens_details := getattr(usage, "prompt_tokens_details", None)
+    ) is not None and isinstance(prompt_tokens_details, dict):
+        for key, value in prompt_tokens_details.items():
+            model_usage[f"input_{key}"] = value
+
+    if (
+        completion_tokens_details := getattr(usage, "completion_tokens_details", None)
+    ) is not None and isinstance(completion_tokens_details, dict):
+        for key, value in completion_tokens_details.items():
+            model_usage[f"output_{key}"] = value
+
     return model_usage
 
 
 def _get_token_counts_from_mapping(
     usage_mapping: Mapping[str, Any],
-) -> ModelUsage:
-    model_usage: ModelUsage = {
-        "unit": None,
-        "input": None,
-        "output": None,
-        "total": None,
-        "input_cost": None,
-        "output_cost": None,
-        "total_cost": None,
-    }
+):
+    model_usage = {}
+
     if (prompt_tokens := usage_mapping.get("prompt_tokens")) is not None:
         model_usage["input"] = prompt_tokens
     if (completion_tokens := usage_mapping.get("completion_tokens")) is not None:
@@ -201,4 +205,16 @@ def _get_token_counts_from_mapping(
     if (total_tokens := usage_mapping.get("total_tokens")) is not None:
         model_usage["total"] = total_tokens
 
+    if (
+        prompt_tokens_details := usage_mapping.get("prompt_tokens_details")
+    ) is not None and isinstance(prompt_tokens_details, dict):
+        for key, value in prompt_tokens_details.items():
+            model_usage[f"input_{key}"] = value
+
+    if (
+        completion_tokens_details := usage_mapping.get("completion_tokens_details")
+    ) is not None and isinstance(completion_tokens_details, dict):
+        for key, value in completion_tokens_details.items():
+            model_usage[f"output_{key}"] = value
+
     return model_usage

From 20e86f9fa88a7ec2bda60278bbbd46a39bb3fadb Mon Sep 17 00:00:00 2001
From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com>
Date: Tue, 10 Dec 2024 15:20:47 +0100
Subject: [PATCH 8/9] add decorator support

---
 langfuse/decorators/langfuse_decorator.py | 11 ++++++++++-
 langfuse/types/__init__.py                |  4 +++-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/langfuse/decorators/langfuse_decorator.py b/langfuse/decorators/langfuse_decorator.py
index fb4cd34cf..d5164e7c4 100644
--- a/langfuse/decorators/langfuse_decorator.py
+++ b/langfuse/decorators/langfuse_decorator.py
@@ -27,6 +27,7 @@
 
 from typing_extensions import ParamSpec
 
+from langfuse.api import UsageDetails
 from langfuse.client import (
     Langfuse,
     StatefulSpanClient,
@@ -71,6 +72,8 @@
                 "model": None,
                 "model_parameters": None,
                 "usage": None,
+                "usage_details": None,
+                "cost_details": None,
                 "prompt": None,
                 "public": None,
             },
@@ -823,6 +826,8 @@ def update_current_observation(
         model: Optional[str] = None,
         model_parameters: Optional[Dict[str, MapValue]] = None,
         usage: Optional[Union[BaseModel, ModelUsage]] = None,
+        usage_details: Optional[UsageDetails] = None,
+        cost_details: Optional[Dict[str, float]] = None,
         prompt: Optional[PromptClient] = None,
         public: Optional[bool] = None,
     ):
@@ -857,7 +862,9 @@ def update_current_observation(
         Generation-specific params:
             - `completion_start_time` (Optional[datetime]): The time at which the completion started (streaming). Set it to get latency analytics broken down into time until completion started and completion duration.
             - `model_parameters` (Optional[Dict[str, MapValue]]): The parameters of the model used for the generation; can be any key-value pairs.
-            - `usage` (Optional[Union[BaseModel, ModelUsage]]): The usage object supports the OpenAi structure with {promptTokens, completionTokens, totalTokens} and a more generic version {input, output, total, unit, inputCost, outputCost, totalCost} where unit can be of value "TOKENS", "CHARACTERS", "MILLISECONDS", "SECONDS", or "IMAGES". Refer to the docs on how to automatically infer token usage and costs in Langfuse.
+            - `usage` (Optional[Union[BaseModel, ModelUsage]]): (Deprecated. Use `usage_details` and `cost_details` instead.) The usage object supports the OpenAi structure with {promptTokens, completionTokens, totalTokens} and a more generic version {input, output, total, unit, inputCost, outputCost, totalCost} where unit can be of value "TOKENS", "CHARACTERS", "MILLISECONDS", "SECONDS", or "IMAGES". Refer to the docs on how to automatically infer token usage and costs in Langfuse.
+            - `usage_details` (Optional[Dict[str, int]]): The usage details of the observation. Reflects the number of units consumed per usage type. All keys must sum up to the total key value. The total key holds the total number of units consumed.
+            - `cost_details` (Optional[Dict[str, float]]): The cost details of the observation. Reflects the USD cost of the observation per cost type. All keys must sum up to the total key value. The total key holds the total cost of the observation.
             - `prompt`(Optional[PromptClient]): The prompt object used for the generation.
 
         Returns:
@@ -899,6 +906,8 @@ def update_current_observation(
                 "model": model,
                 "model_parameters": model_parameters,
                 "usage": usage,
+                "usage_details": usage_details,
+                "cost_details": cost_details,
                 "prompt": prompt,
                 "public": public,
             }.items()
diff --git a/langfuse/types/__init__.py b/langfuse/types/__init__.py
index 1cef199f7..888966259 100644
--- a/langfuse/types/__init__.py
+++ b/langfuse/types/__init__.py
@@ -4,7 +4,7 @@
 from typing import Any, Dict, List, Literal, Optional, Protocol, TypedDict, Union
 
 from pydantic import BaseModel
-from langfuse.api import MediaContentType
+from langfuse.api import MediaContentType, UsageDetails
 from langfuse.model import MapValue, ModelUsage, PromptClient
 
 SpanLevel = Literal["DEBUG", "DEFAULT", "WARNING", "ERROR"]
@@ -34,6 +34,8 @@ class ObservationParams(TraceMetadata, TypedDict):
     model: Optional[str]
     model_parameters: Optional[Dict[str, MapValue]]
     usage: Optional[Union[BaseModel, ModelUsage]]
+    usage_details: Optional[UsageDetails]
+    cost_details: Optional[Dict[str, float]]
     prompt: Optional[PromptClient]
 
 

From 54fac8e7e0a01b7adb3c4b8b9ea61461cbb8c2e9 Mon Sep 17 00:00:00 2001
From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com>
Date: Fri, 13 Dec 2024 09:41:15 +0100
Subject: [PATCH 9/9] remove openai parsing logic

---
 langfuse/client.py                  |  12 +--
 langfuse/utils/__init__.py          |  92 ++------------------
 tests/test_extract_usage_details.py | 129 ----------------------------
 3 files changed, 11 insertions(+), 222 deletions(-)
 delete mode 100644 tests/test_extract_usage_details.py

diff --git a/langfuse/client.py b/langfuse/client.py
index 0ec1314fd..1c8b42de1 100644
--- a/langfuse/client.py
+++ b/langfuse/client.py
@@ -82,7 +82,6 @@
     _convert_usage_input,
     _create_prompt_context,
     _get_timestamp,
-    _extract_usage_details,
 )
 
 from .version import __version__ as version
@@ -1914,8 +1913,7 @@ def generation(
                 "model": model,
                 "model_parameters": model_parameters,
                 "usage": _convert_usage_input(usage) if usage is not None else None,
-                "usage_details": usage_details
-                and _extract_usage_details(usage_details),
+                "usage_details": usage_details,
                 "cost_details": cost_details,
                 "trace": {"release": self.release},
                 **_create_prompt_context(prompt),
@@ -2175,8 +2173,7 @@ def generation(
                 "input": input,
                 "output": output,
                 "usage": _convert_usage_input(usage) if usage is not None else None,
-                "usage_details": usage_details
-                and _extract_usage_details(usage_details),
+                "usage_details": usage_details,
                 "cost_details": cost_details,
                 **_create_prompt_context(prompt),
                 **kwargs,
@@ -2607,8 +2604,7 @@ def update(
                 "input": input,
                 "output": output,
                 "usage": _convert_usage_input(usage) if usage is not None else None,
-                "usage_details": usage_details
-                and _extract_usage_details(usage_details),
+                "usage_details": usage_details,
                 "cost_details": cost_details,
                 **_create_prompt_context(prompt),
                 **kwargs,
@@ -2717,7 +2713,7 @@ def end(
             input=input,
             output=output,
             usage=usage,
-            usage_details=usage_details and _extract_usage_details(usage_details),
+            usage_details=usage_details,
             cost_details=cost_details,
             prompt=prompt,
             **kwargs,
diff --git a/langfuse/utils/__init__.py b/langfuse/utils/__init__.py
index 66555966b..7a97d589f 100644
--- a/langfuse/utils/__init__.py
+++ b/langfuse/utils/__init__.py
@@ -4,21 +4,13 @@
 import typing
 from datetime import datetime, timezone
 
-import pydantic
-
+try:
+    import pydantic.v1 as pydantic  # type: ignore
+except ImportError:
+    import pydantic  # type: ignore
 
 from langfuse.model import ModelUsage, PromptClient
 
-IS_PYDANTIC_V2 = pydantic.VERSION.startswith("2.")
-
-if IS_PYDANTIC_V2:
-    import pydantic.v1 as pydantic_v1  # noqa
-    import pydantic as pydantic_v2  # noqa
-else:
-    import pydantic as pydantic_v1  # noqa
-
-    pydantic_v2 = None  # type: ignore
-
 log = logging.getLogger("langfuse")
 
 
@@ -54,12 +46,9 @@ def extract_by_priority(
     return None
 
 
-def _convert_usage_input(usage: typing.Union[pydantic_v1.BaseModel, ModelUsage]):
-    """Convert any usage input to a usage object.
-
-    Deprecated, only used for backwards compatibility with legacy 'usage' objects in generation create / update
-    """
-    if isinstance(usage, pydantic_v1.BaseModel):
+def _convert_usage_input(usage: typing.Union[pydantic.BaseModel, ModelUsage]):
+    """Converts any usage input to a usage object"""
+    if isinstance(usage, pydantic.BaseModel):
         usage = usage.dict()
 
     # sometimes we do not match the pydantic usage object
@@ -114,70 +103,3 @@ def _convert_usage_input(usage: typing.Union[pydantic_v1.BaseModel, ModelUsage])
         raise ValueError(
             "Usage object must have either {input, output, total, unit} or {promptTokens, completionTokens, totalTokens}"
         )
-
-
-def _extract_usage_details(usage_details: typing.Dict[str, typing.Any]):
-    if isinstance(usage_details, pydantic_v1.BaseModel):
-        usage_details = usage_details.dict()
-
-    if pydantic_v2 is not None and isinstance(usage_details, pydantic_v2.BaseModel):
-        usage_details = usage_details.model_dump()
-
-    if hasattr(usage_details, "__dict__"):
-        usage_details = usage_details.__dict__
-
-    # Handle openai usage details
-    if all(
-        k in usage_details
-        for k in ("prompt_tokens", "completion_tokens", "total_tokens")
-    ) or all(
-        k in usage_details for k in ("promptTokens", "completionTokens", "totalTokens")
-    ):
-        openai_usage_details = {
-            "input": usage_details.get("prompt_tokens", None)
-            or usage_details.get("promptTokens", None),
-            "output": usage_details.get("completion_tokens", None)
-            or usage_details.get("completionTokens", None),
-            "total": usage_details.get("total_tokens", None)
-            or usage_details.get("totalTokens", None),
-        }
-
-        # Handle input token details
-        prompt_tokens_details = usage_details.get("prompt_tokens_details", {})
-        if pydantic_v2 is not None and isinstance(
-            prompt_tokens_details, pydantic_v2.BaseModel
-        ):
-            prompt_tokens_details = prompt_tokens_details.model_dump()
-        elif hasattr(prompt_tokens_details, "__dict__"):
-            prompt_tokens_details = prompt_tokens_details.__dict__
-
-        if isinstance(prompt_tokens_details, dict):
-            for key in prompt_tokens_details:
-                openai_usage_details[f"input_{key}"] = prompt_tokens_details[key]
-                openai_usage_details["input"] = max(
-                    openai_usage_details.get("input", 0)
-                    - openai_usage_details[f"input_{key}"],
-                    0,
-                )
-
-        # Handle output token details
-        completion_tokens_details = usage_details.get("completion_tokens_details", {})
-        if pydantic_v2 is not None and isinstance(
-            completion_tokens_details, pydantic_v2.BaseModel
-        ):
-            completion_tokens_details = completion_tokens_details.model_dump()
-        elif hasattr(completion_tokens_details, "__dict__"):
-            completion_tokens_details = completion_tokens_details.__dict__
-
-        if isinstance(completion_tokens_details, dict):
-            for key in completion_tokens_details:
-                openai_usage_details[f"output_{key}"] = completion_tokens_details[key]
-                openai_usage_details["output"] = max(
-                    openai_usage_details.get("output", 0)
-                    - openai_usage_details[f"output_{key}"],
-                    0,
-                )
-
-        return openai_usage_details
-
-    return usage_details
diff --git a/tests/test_extract_usage_details.py b/tests/test_extract_usage_details.py
deleted file mode 100644
index 9fc8cd7c2..000000000
--- a/tests/test_extract_usage_details.py
+++ /dev/null
@@ -1,129 +0,0 @@
-from langfuse.client import _extract_usage_details
-from tests.utils import CompletionUsage, LlmUsage, LlmUsageWithCost
-
-
-def test_extract_usage_details_openai_style():
-    usage = {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30}
-
-    result = _extract_usage_details(usage)
-
-    assert result == {"input": 10, "output": 20, "total": 30}
-
-
-def test_extract_usage_details_openai_with_token_details():
-    usage = {
-        "prompt_tokens": 10,
-        "completion_tokens": 20,
-        "total_tokens": 30,
-        "completion_token_details": {"audio_tokens": 5},
-        "prompt_token_details": {"audio_tokens": 3, "cached_tokens": 2},
-    }
-
-    result = _extract_usage_details(usage)
-
-    assert result == {
-        "input": 5,
-        "input_audio": 3,
-        "input_cached": 2,
-        "output": 15,
-        "output_audio": 5,
-        "total": 30,
-    }
-
-
-def test_extract_usage_details_openai_with_completion_token_details_only():
-    usage = {
-        "prompt_tokens": 10,
-        "completion_tokens": 20,
-        "total_tokens": 30,
-        "completion_token_details": {"audio_tokens": 5},
-    }
-
-    result = _extract_usage_details(usage)
-
-    assert result == {
-        "input": 10,
-        "output": 15,
-        "output_audio": 5,
-        "total": 30,
-    }
-
-
-def test_extract_usage_details_openai_with_prompt_token_details_only():
-    usage = {
-        "prompt_tokens": 10,
-        "completion_tokens": 20,
-        "total_tokens": 30,
-        "prompt_token_details": {"cached_tokens": 3, "audio_tokens": 7},
-    }
-
-    result = _extract_usage_details(usage)
-
-    assert result == {
-        "input": 0,
-        "input_cached": 3,
-        "input_audio": 7,
-        "output": 20,
-        "total": 30,
-    }
-
-
-def test_extract_usage_details_pydantic_openai():
-    usage = CompletionUsage(prompt_tokens=10, completion_tokens=20, total_tokens=30)
-
-    result = _extract_usage_details(usage.dict())
-
-    assert result == {"input": 10, "output": 20, "total": 30}
-
-
-def test_extract_usage_details_llm_usage():
-    usage = LlmUsage(promptTokens=10, completionTokens=20, totalTokens=30)
-
-    result = _extract_usage_details(usage.dict())
-
-    assert result == {"input": 10, "output": 20, "total": 30}
-
-
-def test_extract_usage_details_llm_usage_with_cost():
-    usage = LlmUsageWithCost(
-        promptTokens=10,
-        completionTokens=20,
-        totalTokens=30,
-        inputCost=0.0001,
-        outputCost=0.0002,
-        totalCost=0.0003,
-    )
-
-    result = _extract_usage_details(usage.dict())
-
-    assert result == {"input": 10, "output": 20, "total": 30}
-
-
-def test_extract_usage_details_raw():
-    usage = {"input": 100, "output": 200, "total": 300}
-
-    result = _extract_usage_details(usage)
-
-    assert result == usage
-
-
-def test_extract_usage_details_raw_with_cached():
-    usage = {"input": 100, "input_cached": 50, "output": 200, "total": 300}
-
-    result = _extract_usage_details(usage)
-
-    assert result == usage
-
-
-def test_extract_usage_details_empty():
-    result = _extract_usage_details({})
-
-    assert result == {}
-
-
-def test_extract_usage_details_invalid_keys():
-    usage = {"foo": 10, "bar": 20}
-
-    result = _extract_usage_details(usage)
-
-    assert result == usage