From 3775292b188056d890dc7465aa12eb6a36679470 Mon Sep 17 00:00:00 2001 From: Rene Fabricius Date: Thu, 24 Apr 2025 12:57:49 +0200 Subject: [PATCH 1/2] Don't parse empty usage_metadata --- langfuse/callback/langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langfuse/callback/langchain.py b/langfuse/callback/langchain.py index 674ab0b21..7a1e983ba 100644 --- a/langfuse/callback/langchain.py +++ b/langfuse/callback/langchain.py @@ -1131,7 +1131,7 @@ def _parse_usage(response: LLMResult): for generation in response.generations: for generation_chunk in generation: if generation_chunk.generation_info and ( - "usage_metadata" in generation_chunk.generation_info + generation_chunk.generation_info.get("usage_metadata", None) ): llm_usage = _parse_usage_model( generation_chunk.generation_info["usage_metadata"] From 10ceef6f7261071d3ebd20f8290b25967248bcc7 Mon Sep 17 00:00:00 2001 From: Rene Fabricius Date: Fri, 25 Apr 2025 11:53:20 +0200 Subject: [PATCH 2/2] Parse vertexai usage details --- langfuse/callback/langchain.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/langfuse/callback/langchain.py b/langfuse/callback/langchain.py index 7a1e983ba..86862b789 100644 --- a/langfuse/callback/langchain.py +++ b/langfuse/callback/langchain.py @@ -1066,6 +1066,7 @@ def _parse_usage_model(usage: typing.Union[pydantic.BaseModel, dict]): # https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/get-token-count ("prompt_token_count", "input"), ("candidates_token_count", "output"), + ("total_token_count", "total"), # Bedrock: https://docs.aws.amazon.com/bedrock/latest/userguide/monitoring-cw.html#runtime-cloudwatch-metrics ("inputTokenCount", "input"), ("outputTokenCount", "output"), @@ -1114,6 +1115,38 @@ def _parse_usage_model(usage: typing.Union[pydantic.BaseModel, dict]): if "output" in usage_model: usage_model["output"] = max(0, usage_model["output"] - value) + if "prompt_tokens_details" in usage_model: + prompt_tokens_details = usage_model.pop("prompt_tokens_details", []) + + for entry in prompt_tokens_details: + if ( + isinstance(entry, dict) + and "modality" in entry + and "token_count" in entry + ): + value = entry["token_count"] + usage_model[f"input_{entry['modality']}"] = value + + if "input" in usage_model: + usage_model["input"] = max(0, usage_model["input"] - value) + + if "candidates_tokens_details" in usage_model: + candidates_tokens_details = usage_model.pop("candidates_tokens_details", []) + + for entry in candidates_tokens_details: + if ( + isinstance(entry, dict) + and "modality" in entry + and "token_count" in entry + ): + value = entry["token_count"] + usage_model[f"output_{entry['modality']}"] = value + + if "output" in usage_model: + usage_model["output"] = max(0, usage_model["output"] - value) + + _ = usage_model.pop("cache_tokens_details", []) + return usage_model if usage_model else None