From 331cc4392d39bd8726ad9de299cb14ab560c1c9f Mon Sep 17 00:00:00 2001
From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com>
Date: Mon, 24 Feb 2025 14:29:36 +0100
Subject: [PATCH 1/4] fix(langchain): cached token usage

---
 langfuse/callback/langchain.py |  6 +++++
 tests/test_langchain.py        | 46 ++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+)

diff --git a/langfuse/callback/langchain.py b/langfuse/callback/langchain.py
index 8749ef914..0b9a8ab75 100644
--- a/langfuse/callback/langchain.py
+++ b/langfuse/callback/langchain.py
@@ -1096,12 +1096,18 @@ def _parse_usage_model(usage: typing.Union[pydantic.BaseModel, dict]):
             for key, value in input_token_details.items():
                 usage_model[f"input_{key}"] = value
 
+                if "input" in usage_model:
+                    usage_model["input"] -= value
+
         if "output_token_details" in usage_model:
             output_token_details = usage_model.pop("output_token_details", {})
 
             for key, value in output_token_details.items():
                 usage_model[f"output_{key}"] = value
 
+                if "output" in usage_model:
+                    usage_model["output"] -= value
+
     return usage_model if usage_model else None
 
 
diff --git a/tests/test_langchain.py b/tests/test_langchain.py
index 86e49b970..da31edabc 100644
--- a/tests/test_langchain.py
+++ b/tests/test_langchain.py
@@ -2318,3 +2318,49 @@ def call_model(state: MessagesState):
             assert observation.level == "DEFAULT"
 
     assert hidden_count > 0
+
+
+def test_cached_token_usage():
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            (
+                "system",
+                (
+                    "This is a test prompt to reproduce the issue. "
+                    "The prompt needs 1024 tokens to enable cache." * 100
+                ),
+            ),
+            ("user", "Reply to this message {test_param}."),
+        ]
+    )
+    chat = ChatOpenAI(model="gpt-4o-mini")
+    chain = prompt | chat
+    handler = CallbackHandler()
+    config = {"callbacks": [handler]} if handler else {}
+
+    chain.invoke({"test_param": "in a funny way"}, config)
+
+    # invoke again to force cached token usage
+    chain.invoke({"test_param": "in a funny way"}, config)
+
+    handler.flush()
+
+    trace = get_api().trace.get(handler.get_trace_id())
+
+    generation = next((o for o in trace.observations if o.type == "GENERATION"))
+
+    assert generation.usage_details["input_cache_read"] > 0
+    assert (
+        generation.usage_details["input"]
+        + generation.usage_details["input_cache_read"]
+        + generation.usage_details["output"]
+        == generation.usage_details["total"]
+    )
+
+    assert generation.cost_details["input_cache_read"] > 0
+    assert (
+        generation.cost_details["input"]
+        + generation.cost_details["input_cache_read"]
+        + generation.cost_details["output"]
+        == generation.cost_details["total"]
+    )

From 9e934a153b811d81aa80146270f6d1fdb0c18cfb Mon Sep 17 00:00:00 2001
From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com>
Date: Mon, 24 Feb 2025 14:40:26 +0100
Subject: [PATCH 2/4] Update langfuse/callback/langchain.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
---
 langfuse/callback/langchain.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/langfuse/callback/langchain.py b/langfuse/callback/langchain.py
index 0b9a8ab75..081c558b2 100644
--- a/langfuse/callback/langchain.py
+++ b/langfuse/callback/langchain.py
@@ -1097,7 +1097,7 @@ def _parse_usage_model(usage: typing.Union[pydantic.BaseModel, dict]):
                 usage_model[f"input_{key}"] = value
 
                 if "input" in usage_model:
-                    usage_model["input"] -= value
+                    usage_model["input"] = max(0, usage_model["input"] - value)
 
         if "output_token_details" in usage_model:
             output_token_details = usage_model.pop("output_token_details", {})

From 131f87fe53ac998062539233b1cb8eb02017d738 Mon Sep 17 00:00:00 2001
From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com>
Date: Mon, 24 Feb 2025 14:40:33 +0100
Subject: [PATCH 3/4] Update langfuse/callback/langchain.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
---
 langfuse/callback/langchain.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/langfuse/callback/langchain.py b/langfuse/callback/langchain.py
index 081c558b2..c697c718d 100644
--- a/langfuse/callback/langchain.py
+++ b/langfuse/callback/langchain.py
@@ -1106,7 +1106,7 @@ def _parse_usage_model(usage: typing.Union[pydantic.BaseModel, dict]):
                 usage_model[f"output_{key}"] = value
 
                 if "output" in usage_model:
-                    usage_model["output"] -= value
+                    usage_model["output"] = max(0, usage_model["output"] - value)
 
     return usage_model if usage_model else None
 

From 1a908864b6692764a4d1f6fc2ede92533190273a Mon Sep 17 00:00:00 2001
From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com>
Date: Mon, 24 Feb 2025 14:40:50 +0100
Subject: [PATCH 4/4] Update tests/test_langchain.py

Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>
---
 tests/test_langchain.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_langchain.py b/tests/test_langchain.py
index da31edabc..90cdec1f2 100644
--- a/tests/test_langchain.py
+++ b/tests/test_langchain.py
@@ -2336,7 +2336,7 @@ def test_cached_token_usage():
     chat = ChatOpenAI(model="gpt-4o-mini")
     chain = prompt | chat
     handler = CallbackHandler()
-    config = {"callbacks": [handler]} if handler else {}
+    config = {"callbacks": [handler]}
 
     chain.invoke({"test_param": "in a funny way"}, config)