langfuse · t-miyak · Apr 24, 2025 · Apr 24, 2025 · Apr 24, 2025 · Apr 24, 2025
diff --git a/.env.template b/.env.template
@@ -7,4 +7,5 @@ LANGFUSE_SECRET_KEY=
 OPENAI_API_KEY=
 SERPAPI_API_KEY=
 ANTHROPIC_API_KEY=
-HUGGINGFACEHUB_API_TOKEN=
+HUGGINGFACEHUB_API_TOKEN=
+GOOGLE_APPLICATION_CREDENTIALS=
diff --git a/langfuse/callback/langchain.py b/langfuse/callback/langchain.py
@@ -1066,6 +1066,7 @@ def _parse_usage_model(usage: typing.Union[pydantic.BaseModel, dict]):
         # https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/get-token-count
         ("prompt_token_count", "input"),
         ("candidates_token_count", "output"),
+        ("total_token_count", "total"),
         # Bedrock: https://docs.aws.amazon.com/bedrock/latest/userguide/monitoring-cw.html#runtime-cloudwatch-metrics
         ("inputTokenCount", "input"),
         ("outputTokenCount", "output"),
@@ -1114,6 +1115,25 @@ def _parse_usage_model(usage: typing.Union[pydantic.BaseModel, dict]):
                 if "output" in usage_model:
                     usage_model["output"] = max(0, usage_model["output"] - value)
 
+        # For VertexAI, the usage model has non integer values that are not necessary for the usage, so remove them.
+        # ref. https://cloud.google.com/vertex-ai/generative-ai/docs/reference/rpc/google.cloud.aiplatform.v1#google.cloud.aiplatform.v1.GenerateContentResponse.UsageMetadata
+        if all(
+            gemini_key in usage
+            for gemini_key in [
+                "prompt_token_count",
+                "candidates_token_count",
+                "total_token_count",
+            ]
+        ):
+            for key in [
+                "prompt_tokens_details",
+                "candidates_tokens_details",
+                "cache_tokens_details",
+            ]:
+                if key in usage_model:
+                    if isinstance(usage_model[key], list):
+                        del usage_model[key]
+
     return usage_model if usage_model else None
 
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -15,7 +15,7 @@ backoff = ">=1.10.0"
 openai = { version = ">=0.27.8", optional = true }
 wrapt = "^1.14"
 langchain = { version = ">=0.0.309", optional = true }
-llama-index = {version = ">=0.10.12, <2.0.0", optional = true}
+llama-index = { version = ">=0.10.12, <2.0.0", optional = true }
 packaging = ">=23.2,<25.0"
 idna = "^3.7"
 anyio = "^4.4.0"
@@ -73,4 +73,4 @@ log_cli = true
 [tool.poetry_bumpversion.file."langfuse/version.py"]
 
 [tool.poetry.scripts]
-release = "scripts.release:main"
+release = "scripts.release:main"
diff --git a/tests/test_langchain.py b/tests/test_langchain.py
@@ -429,11 +429,11 @@ def test_mistral():
 
 @pytest.mark.skip(reason="missing api key")
 def test_vertx():
-    from langchain.llms import VertexAI
+    from langchain_google_vertexai import VertexAI
 
     callback = CallbackHandler(debug=False)
 
-    llm = VertexAI(callbacks=[callback])
+    llm = VertexAI(model="gemini-2.0-flash-lite-001", callbacks=[callback])
     llm.predict("say a brief hello", callbacks=[callback])
 
     callback.flush()
@@ -443,10 +443,10 @@ def test_vertx():
     trace = get_api().trace.get(trace_id)
 
     assert trace.id == trace_id
-    assert len(trace.observations) == 2
+    assert len(trace.observations) == 1
 
     generation = list(filter(lambda o: o.type == "GENERATION", trace.observations))[0]
-    assert generation.model == "text-bison"
+    assert generation.model == "gemini-2.0-flash-lite-001"
 
 
 @pytest.mark.skip(reason="rate limits")