diff --git a/.cursor/rules/langfuse.mdc b/.cursor/rules/langfuse.mdc index 14c4c34..86ce188 100644 --- a/.cursor/rules/langfuse.mdc +++ b/.cursor/rules/langfuse.mdc @@ -3,17 +3,28 @@ description: globs: *.py alwaysApply: false --- -LangFuse is an LLM observability tool, to see how LLMs are behaving in our application. It uses callbacks to reord LLM inference. You should always use it when using LLM applications, to track if the LLM is behaving as we want it to. +Langfuse is an LLM observability tool used to monitor LLM behavior in our application. It uses decorators and callbacks to record LLM inference. Always use it in LLM-related modules to ensure traceability. -Below is the most typical usage. +### Langfuse v3 Migration Guide +As of Langfuse SDK v3.x, the `langfuse.decorators` module structure has changed. + +**Typical Usage:** ```python -from langfuse.decorators import observe, langfuse_context +from langfuse import observe, get_client -@observe +@observe() def function_name(...): + # To rename the span/observation or update metadata: + get_client().update_current_span(name=f"descriptive_name_{id}") - # If we don't want the name of the span to be "function_name", and want to give a more descriptive name like with email, we should use the following - langfuse_context.update_current_observation(name=f"{email}") + # To get trace or observation IDs: + trace_id = get_client().get_current_trace_id() + observation_id = get_client().get_current_observation_id() ``` +**Key Changes from v2:** +1. Import `observe` and `get_client` directly from `langfuse`. +2. Replace `langfuse_context` with `get_client()`. +3. Use `update_current_span()` instead of `update_current_observation()`. + diff --git a/common/global_config.yaml b/common/global_config.yaml index 5c61d32..478a8d1 100644 --- a/common/global_config.yaml +++ b/common/global_config.yaml @@ -1,5 +1,6 @@ -model_name: gemini/gemini-3-flash +model_name: gemini/gemini-3-flash-preview dot_global_config_health_check: true +DEV_ENV: dev example_parent: example_child: "example_value" @@ -8,8 +9,8 @@ example_parent: # LLMs ######################################################## default_llm: - default_model: gemini/gemini-3-flash - fallback_model: gemini/gemini-2.5-flash + default_model: gemini/gemini-3-flash-preview + fallback_model: gemini/gemini-2.5-flash-preview default_temperature: 0.5 default_max_tokens: 100000 diff --git a/utils/llm/dspy_inference.py b/utils/llm/dspy_inference.py index 8f5c6d8..962535b 100644 --- a/utils/llm/dspy_inference.py +++ b/utils/llm/dspy_inference.py @@ -2,7 +2,7 @@ from typing import Any import dspy -from langfuse.decorators import observe # type: ignore +from langfuse import observe from litellm.exceptions import RateLimitError, ServiceUnavailableError from loguru import logger as log from tenacity import ( diff --git a/utils/llm/dspy_langfuse.py b/utils/llm/dspy_langfuse.py index d2bc70f..daf6685 100644 --- a/utils/llm/dspy_langfuse.py +++ b/utils/llm/dspy_langfuse.py @@ -4,8 +4,7 @@ from dspy.adapters import Image as dspy_Image from dspy.signatures import Signature as dspy_Signature from dspy.utils.callback import BaseCallback -from langfuse.client import Langfuse, StatefulGenerationClient # type: ignore -from langfuse.decorators import langfuse_context # type: ignore +from langfuse import Langfuse, LangfuseGeneration, get_client from litellm.cost_calculator import completion_cost from loguru import logger as log from pydantic import BaseModel, Field, ValidationError @@ -52,7 +51,7 @@ def __init__(self, signature: type[dspy_Signature]) -> None: ) self.current_prompt = contextvars.ContextVar[str]("current_prompt") self.current_completion = contextvars.ContextVar[str]("current_completion") - self.current_span = contextvars.ContextVar[StatefulGenerationClient | None]( + self.current_span = contextvars.ContextVar[LangfuseGeneration | None]( "current_span" ) self.model_name_at_span_creation = contextvars.ContextVar[str | None]( @@ -91,8 +90,8 @@ def on_module_end( # noqa exception: Exception | None = None, # noqa ) -> None: metadata = { - "existing_trace_id": langfuse_context.get_current_trace_id(), - "parent_observation_id": langfuse_context.get_current_observation_id(), + "existing_trace_id": get_client().get_current_trace_id(), + "parent_observation_id": get_client().get_current_observation_id(), } outputs_extracted = {} # Default to empty dict if outputs is not None: @@ -102,7 +101,7 @@ def on_module_end( # noqa outputs_extracted = {"value": outputs} except Exception as e: outputs_extracted = {"error_extracting_module_output": str(e)} - langfuse_context.update_current_observation( + get_client().update_current_span( input=self.input_field_values.get(None) or {}, output=outputs_extracted, metadata=metadata, @@ -134,9 +133,9 @@ def on_lm_start( # noqa self.current_system_prompt.set(system_prompt) self.current_prompt.set(user_input) self.model_name_at_span_creation.set(model_name) - trace_id = langfuse_context.get_current_trace_id() - parent_observation_id = langfuse_context.get_current_observation_id() - span_obj: StatefulGenerationClient | None = None + trace_id = get_client().get_current_trace_id() + parent_observation_id = get_client().get_current_observation_id() + span_obj: LangfuseGeneration | None = None if trace_id: span_obj = self.langfuse.generation( input=user_input, @@ -392,8 +391,8 @@ def on_tool_start( # noqa log.debug(f"Tool call started: {tool_name} with args: {tool_args}") - trace_id = langfuse_context.get_current_trace_id() - parent_observation_id = langfuse_context.get_current_observation_id() + trace_id = get_client().get_current_trace_id() + parent_observation_id = get_client().get_current_observation_id() if trace_id: # Create a span for the tool call