diff --git a/langfuse/_client/client.py b/langfuse/_client/client.py index 46586f3b0..a3f653ada 100644 --- a/langfuse/_client/client.py +++ b/langfuse/_client/client.py @@ -1972,6 +1972,7 @@ def create_score( comment: Optional[str] = None, config_id: Optional[str] = None, metadata: Optional[Any] = None, + timestamp: Optional[datetime] = None, ) -> None: ... @overload @@ -1989,6 +1990,7 @@ def create_score( comment: Optional[str] = None, config_id: Optional[str] = None, metadata: Optional[Any] = None, + timestamp: Optional[datetime] = None, ) -> None: ... def create_score( @@ -2005,6 +2007,7 @@ def create_score( comment: Optional[str] = None, config_id: Optional[str] = None, metadata: Optional[Any] = None, + timestamp: Optional[datetime] = None, ) -> None: """Create a score for a specific trace or observation. @@ -2023,6 +2026,7 @@ def create_score( comment: Optional comment or explanation for the score config_id: Optional ID of a score config defined in Langfuse metadata: Optional metadata to be attached to the score + timestamp: Optional timestamp for the score (defaults to current UTC time) Example: ```python @@ -2069,7 +2073,7 @@ def create_score( event = { "id": self.create_trace_id(), "type": "score-create", - "timestamp": _get_timestamp(), + "timestamp": timestamp or _get_timestamp(), "body": new_body, } diff --git a/langfuse/_client/span.py b/langfuse/_client/span.py index 72ebb6bee..2f0000e41 100644 --- a/langfuse/_client/span.py +++ b/langfuse/_client/span.py @@ -276,6 +276,7 @@ def score( data_type: Optional[Literal["NUMERIC", "BOOLEAN"]] = None, comment: Optional[str] = None, config_id: Optional[str] = None, + timestamp: Optional[datetime] = None, ) -> None: ... @overload @@ -288,6 +289,7 @@ def score( data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL", comment: Optional[str] = None, config_id: Optional[str] = None, + timestamp: Optional[datetime] = None, ) -> None: ... def score( @@ -299,6 +301,7 @@ def score( data_type: Optional[ScoreDataType] = None, comment: Optional[str] = None, config_id: Optional[str] = None, + timestamp: Optional[datetime] = None, ) -> None: """Create a score for this specific span. @@ -312,6 +315,7 @@ def score( data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL) comment: Optional comment or explanation for the score config_id: Optional ID of a score config defined in Langfuse + timestamp: Optional timestamp for the score (defaults to current UTC time) Example: ```python @@ -337,6 +341,7 @@ def score( data_type=cast(Literal["CATEGORICAL"], data_type), comment=comment, config_id=config_id, + timestamp=timestamp, ) @overload @@ -349,6 +354,7 @@ def score_trace( data_type: Optional[Literal["NUMERIC", "BOOLEAN"]] = None, comment: Optional[str] = None, config_id: Optional[str] = None, + timestamp: Optional[datetime] = None, ) -> None: ... @overload @@ -361,6 +367,7 @@ def score_trace( data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL", comment: Optional[str] = None, config_id: Optional[str] = None, + timestamp: Optional[datetime] = None, ) -> None: ... def score_trace( @@ -372,6 +379,7 @@ def score_trace( data_type: Optional[ScoreDataType] = None, comment: Optional[str] = None, config_id: Optional[str] = None, + timestamp: Optional[datetime] = None, ) -> None: """Create a score for the entire trace that this span belongs to. @@ -386,6 +394,7 @@ def score_trace( data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL) comment: Optional comment or explanation for the score config_id: Optional ID of a score config defined in Langfuse + timestamp: Optional timestamp for the score (defaults to current UTC time) Example: ```python @@ -410,6 +419,7 @@ def score_trace( data_type=cast(Literal["CATEGORICAL"], data_type), comment=comment, config_id=config_id, + timestamp=timestamp, ) def _set_processed_span_attributes( diff --git a/tests/test_core_sdk.py b/tests/test_core_sdk.py index 39c8de92d..b2661eef5 100644 --- a/tests/test_core_sdk.py +++ b/tests/test_core_sdk.py @@ -1,7 +1,7 @@ import os import time from asyncio import gather -from datetime import datetime, timezone +from datetime import datetime, timedelta, timezone from time import sleep import pytest @@ -257,6 +257,60 @@ def test_create_categorical_score(): assert trace["scores"][0]["stringValue"] == "high score" +def test_create_score_with_custom_timestamp(): + langfuse = Langfuse() + api_wrapper = LangfuseAPI() + + # Create a span and set trace properties + with langfuse.start_as_current_span(name="test-span") as span: + span.update_trace( + name="test-custom-timestamp", + user_id="test", + metadata="test", + ) + # Get trace ID for later use + trace_id = span.trace_id + + # Ensure data is sent + langfuse.flush() + sleep(2) + + custom_timestamp = datetime.now(timezone.utc) - timedelta(hours=1) + score_id = create_uuid() + langfuse.create_score( + score_id=score_id, + trace_id=trace_id, + name="custom-timestamp-score", + value=0.85, + data_type="NUMERIC", + timestamp=custom_timestamp, + ) + + # Ensure data is sent + langfuse.flush() + sleep(2) + + # Retrieve and verify + trace = api_wrapper.get_trace(trace_id) + + assert trace["scores"][0]["id"] == score_id + assert trace["scores"][0]["dataType"] == "NUMERIC" + assert trace["scores"][0]["value"] == 0.85 + + # Verify timestamp is close to our custom timestamp + # Parse the timestamp from the API response + response_timestamp = datetime.fromisoformat( + trace["scores"][0]["timestamp"].replace("Z", "+00:00") + ) + + # Check that the timestamps are within 1 second of each other + # (allowing for some processing time and rounding) + time_diff = abs((response_timestamp - custom_timestamp).total_seconds()) + assert time_diff < 1, ( + f"Timestamp difference too large: {time_diff}s. Expected < 1s. Custom: {custom_timestamp}, Response: {response_timestamp}" + ) + + def test_create_trace(): langfuse = Langfuse() trace_name = create_uuid()