diff --git a/.env.template b/.env.template index 44e1fa81f..4a6fb27c0 100644 --- a/.env.template +++ b/.env.template @@ -7,4 +7,5 @@ LANGFUSE_SECRET_KEY= OPENAI_API_KEY= SERPAPI_API_KEY= ANTHROPIC_API_KEY= -HUGGINGFACEHUB_API_TOKEN= \ No newline at end of file +HUGGINGFACEHUB_API_TOKEN= +GOOGLE_APPLICATION_CREDENTIALS= diff --git a/langfuse/callback/langchain.py b/langfuse/callback/langchain.py index 674ab0b21..92b529cc9 100644 --- a/langfuse/callback/langchain.py +++ b/langfuse/callback/langchain.py @@ -1066,6 +1066,7 @@ def _parse_usage_model(usage: typing.Union[pydantic.BaseModel, dict]): # https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/get-token-count ("prompt_token_count", "input"), ("candidates_token_count", "output"), + ("total_token_count", "total"), # Bedrock: https://docs.aws.amazon.com/bedrock/latest/userguide/monitoring-cw.html#runtime-cloudwatch-metrics ("inputTokenCount", "input"), ("outputTokenCount", "output"), @@ -1114,6 +1115,25 @@ def _parse_usage_model(usage: typing.Union[pydantic.BaseModel, dict]): if "output" in usage_model: usage_model["output"] = max(0, usage_model["output"] - value) + # For VertexAI, the usage model has non integer values that are not necessary for the usage, so remove them. + # ref. https://cloud.google.com/vertex-ai/generative-ai/docs/reference/rpc/google.cloud.aiplatform.v1#google.cloud.aiplatform.v1.GenerateContentResponse.UsageMetadata + if all( + gemini_key in usage + for gemini_key in [ + "prompt_token_count", + "candidates_token_count", + "total_token_count", + ] + ): + for key in [ + "prompt_tokens_details", + "candidates_tokens_details", + "cache_tokens_details", + ]: + if key in usage_model: + if isinstance(usage_model[key], list): + del usage_model[key] + return usage_model if usage_model else None diff --git a/pyproject.toml b/pyproject.toml index 96da7a2df..190f3efe0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ backoff = ">=1.10.0" openai = { version = ">=0.27.8", optional = true } wrapt = "^1.14" langchain = { version = ">=0.0.309", optional = true } -llama-index = {version = ">=0.10.12, <2.0.0", optional = true} +llama-index = { version = ">=0.10.12, <2.0.0", optional = true } packaging = ">=23.2,<25.0" idna = "^3.7" anyio = "^4.4.0" @@ -73,4 +73,4 @@ log_cli = true [tool.poetry_bumpversion.file."langfuse/version.py"] [tool.poetry.scripts] -release = "scripts.release:main" \ No newline at end of file +release = "scripts.release:main" diff --git a/tests/test_langchain.py b/tests/test_langchain.py index 56a56f69d..88d24cb7b 100644 --- a/tests/test_langchain.py +++ b/tests/test_langchain.py @@ -429,11 +429,11 @@ def test_mistral(): @pytest.mark.skip(reason="missing api key") def test_vertx(): - from langchain.llms import VertexAI + from langchain_google_vertexai import VertexAI callback = CallbackHandler(debug=False) - llm = VertexAI(callbacks=[callback]) + llm = VertexAI(model="gemini-2.0-flash-lite-001", callbacks=[callback]) llm.predict("say a brief hello", callbacks=[callback]) callback.flush() @@ -443,10 +443,10 @@ def test_vertx(): trace = get_api().trace.get(trace_id) assert trace.id == trace_id - assert len(trace.observations) == 2 + assert len(trace.observations) == 1 generation = list(filter(lambda o: o.type == "GENERATION", trace.observations))[0] - assert generation.model == "text-bison" + assert generation.model == "gemini-2.0-flash-lite-001" @pytest.mark.skip(reason="rate limits")