From 331cc4392d39bd8726ad9de299cb14ab560c1c9f Mon Sep 17 00:00:00 2001 From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com> Date: Mon, 24 Feb 2025 14:29:36 +0100 Subject: [PATCH 1/4] fix(langchain): cached token usage --- langfuse/callback/langchain.py | 6 +++++ tests/test_langchain.py | 46 ++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/langfuse/callback/langchain.py b/langfuse/callback/langchain.py index 8749ef914..0b9a8ab75 100644 --- a/langfuse/callback/langchain.py +++ b/langfuse/callback/langchain.py @@ -1096,12 +1096,18 @@ def _parse_usage_model(usage: typing.Union[pydantic.BaseModel, dict]): for key, value in input_token_details.items(): usage_model[f"input_{key}"] = value + if "input" in usage_model: + usage_model["input"] -= value + if "output_token_details" in usage_model: output_token_details = usage_model.pop("output_token_details", {}) for key, value in output_token_details.items(): usage_model[f"output_{key}"] = value + if "output" in usage_model: + usage_model["output"] -= value + return usage_model if usage_model else None diff --git a/tests/test_langchain.py b/tests/test_langchain.py index 86e49b970..da31edabc 100644 --- a/tests/test_langchain.py +++ b/tests/test_langchain.py @@ -2318,3 +2318,49 @@ def call_model(state: MessagesState): assert observation.level == "DEFAULT" assert hidden_count > 0 + + +def test_cached_token_usage(): + prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + ( + "This is a test prompt to reproduce the issue. " + "The prompt needs 1024 tokens to enable cache." * 100 + ), + ), + ("user", "Reply to this message {test_param}."), + ] + ) + chat = ChatOpenAI(model="gpt-4o-mini") + chain = prompt | chat + handler = CallbackHandler() + config = {"callbacks": [handler]} if handler else {} + + chain.invoke({"test_param": "in a funny way"}, config) + + # invoke again to force cached token usage + chain.invoke({"test_param": "in a funny way"}, config) + + handler.flush() + + trace = get_api().trace.get(handler.get_trace_id()) + + generation = next((o for o in trace.observations if o.type == "GENERATION")) + + assert generation.usage_details["input_cache_read"] > 0 + assert ( + generation.usage_details["input"] + + generation.usage_details["input_cache_read"] + + generation.usage_details["output"] + == generation.usage_details["total"] + ) + + assert generation.cost_details["input_cache_read"] > 0 + assert ( + generation.cost_details["input"] + + generation.cost_details["input_cache_read"] + + generation.cost_details["output"] + == generation.cost_details["total"] + ) From 9e934a153b811d81aa80146270f6d1fdb0c18cfb Mon Sep 17 00:00:00 2001 From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com> Date: Mon, 24 Feb 2025 14:40:26 +0100 Subject: [PATCH 2/4] Update langfuse/callback/langchain.py Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- langfuse/callback/langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langfuse/callback/langchain.py b/langfuse/callback/langchain.py index 0b9a8ab75..081c558b2 100644 --- a/langfuse/callback/langchain.py +++ b/langfuse/callback/langchain.py @@ -1097,7 +1097,7 @@ def _parse_usage_model(usage: typing.Union[pydantic.BaseModel, dict]): usage_model[f"input_{key}"] = value if "input" in usage_model: - usage_model["input"] -= value + usage_model["input"] = max(0, usage_model["input"] - value) if "output_token_details" in usage_model: output_token_details = usage_model.pop("output_token_details", {}) From 131f87fe53ac998062539233b1cb8eb02017d738 Mon Sep 17 00:00:00 2001 From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com> Date: Mon, 24 Feb 2025 14:40:33 +0100 Subject: [PATCH 3/4] Update langfuse/callback/langchain.py Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- langfuse/callback/langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langfuse/callback/langchain.py b/langfuse/callback/langchain.py index 081c558b2..c697c718d 100644 --- a/langfuse/callback/langchain.py +++ b/langfuse/callback/langchain.py @@ -1106,7 +1106,7 @@ def _parse_usage_model(usage: typing.Union[pydantic.BaseModel, dict]): usage_model[f"output_{key}"] = value if "output" in usage_model: - usage_model["output"] -= value + usage_model["output"] = max(0, usage_model["output"] - value) return usage_model if usage_model else None From 1a908864b6692764a4d1f6fc2ede92533190273a Mon Sep 17 00:00:00 2001 From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com> Date: Mon, 24 Feb 2025 14:40:50 +0100 Subject: [PATCH 4/4] Update tests/test_langchain.py Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> --- tests/test_langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_langchain.py b/tests/test_langchain.py index da31edabc..90cdec1f2 100644 --- a/tests/test_langchain.py +++ b/tests/test_langchain.py @@ -2336,7 +2336,7 @@ def test_cached_token_usage(): chat = ChatOpenAI(model="gpt-4o-mini") chain = prompt | chat handler = CallbackHandler() - config = {"callbacks": [handler]} if handler else {} + config = {"callbacks": [handler]} chain.invoke({"test_param": "in a funny way"}, config)