diff --git a/langfuse/client.py b/langfuse/client.py index 30ef50ef9..0de2bdcb8 100644 --- a/langfuse/client.py +++ b/langfuse/client.py @@ -22,12 +22,14 @@ import urllib.parse import warnings from dataclasses import dataclass +from langfuse.api.core.request_options import RequestOptions from langfuse.api.resources.commons.types.dataset_run_with_items import ( DatasetRunWithItems, ) from langfuse.api.resources.commons.types.observations_view import ObservationsView +from langfuse.api.resources.commons.types.score_source import ScoreSource from langfuse.api.resources.commons.types.session import Session from langfuse.api.resources.commons.types.trace_with_details import TraceWithDetails from langfuse.api.resources.datasets.types.paginated_dataset_runs import ( @@ -126,6 +128,27 @@ class FetchSessionsResponse: data: typing.List[Session] meta: MetaResponse +@dataclass +class FetchScoreResponse: + """Response object for fetch_score method.""" + + data: ScoreBody + +@dataclass +class FetchScoresResponse: + """Response object for fetch_scores method.""" + + data: typing.List[ScoreBody] + meta: MetaResponse + + +@dataclass +class FetchPromptsResponse: + """Response object for fetch_prompts method.""" + + data: typing.List[Union[Prompt_Text, Prompt_Chat]] + meta: MetaResponse + class Langfuse(object): """Langfuse Python client. @@ -368,6 +391,26 @@ def get_dataset( except Exception as e: self.log.exception(e) raise e + + def get_datasets( + self, *, page: Optional[int] = None, limit: Optional[int] = None, request_options: Optional[RequestOptions] = None + ) -> typing.List[Dataset]: + """Get all datasets. + + Args: + page (Optional[int]): Page number of the datasets to return, starts at 1. Defaults to None. + limit (Optional[int]): Maximum number of datasets to return. Defaults to 50. + request_options (Optional[RequestOptions]): Request options. Defaults to None. + + Returns: + List[Dataset]: The datasets. + """ + try: + self.log.debug("Getting datasets") + return self.client.datasets.list(page=page, limit=limit, request_options=request_options) + except Exception as e: + self.log.exception(e) + raise e def get_dataset_item(self, id: str) -> "DatasetItemClient": """Get the dataset item with the given id.""" @@ -539,6 +582,38 @@ def create_dataset_item( except Exception as e: self.log.exception(e) raise e + + def get_dataset_items( + self, + dataset_name: typing.Optional[str] = None, + source_trace_id: typing.Optional[str] = None, + source_observation_id: typing.Optional[str] = None, + page: typing.Optional[int] = None, + limit: typing.Optional[int] = None, + request_options: typing.Optional[RequestOptions] = None, + ) -> typing.List[DatasetItem]: + """Get all dataset items. + + Args: + dataset_name (Optional[str]): Name of the dataset. + source_trace_id (Optional[str]): Source trace id. + source_observation_id (Optional[str]): Source observation id. + page (Optional[int]): Page number of the dataset items to return, starts at 1. Defaults to None. + limit (Optional[int]): Maximum number of dataset items to return. Defaults to 50. + request_options (Optional[RequestOptions]): Request options. Defaults to None. + + Returns: + List[DatasetItem]: The dataset items. + """ + try: + self.log.debug("Getting dataset items") + return self.client.dataset_items.list( + dataset_name=dataset_name, source_trace_id=source_trace_id, source_observation_id=source_observation_id, page=page, limit=limit, request_options=request_options + ) + except Exception as e: + self.log.exception(e) + raise e + def fetch_trace( self, @@ -932,6 +1007,137 @@ def fetch_sessions( except Exception as e: self.log.exception(e) raise e + + def fetch_score( + self, + id: str, + ) -> FetchScoreResponse: + """Get a score in the current project with the given identifier. + + Args: + id: The identifier of the score to fetch. + + Returns: + FetchScoreResponse: The score with the given id on `data`. + + Raises: + Exception: If the score with the given id could not be found within the authenticated project or if an error occurred during the request. + """ + try: + self.log.debug(f"Getting score {id}") + res = self.client.score.get_by_id(id) + return FetchScoreResponse(data=res) + except Exception as e: + self.log.exception(e) + raise e + + def fetch_scores( + self, + *, + page: typing.Optional[int] = None, + limit: typing.Optional[int] = None, + user_id: typing.Optional[str] = None, + name: typing.Optional[str] = None, + from_timestamp: typing.Optional[dt.datetime] = None, + to_timestamp: typing.Optional[dt.datetime] = None, + source: typing.Optional[ScoreSource] = None, + operator: typing.Optional[str] = None, + value: typing.Optional[float] = None, + score_ids: typing.Optional[str] = None, + config_id: typing.Optional[str] = None, + data_type: typing.Optional[ScoreDataType] = None, + request_options: typing.Optional[RequestOptions] = None, + + ) -> FetchScoresResponse: + """Get a list of scores in the current project. + + Args: + page (Optional[int]): Page number of the scores to return. Defaults to None. + limit (Optional[int]): Maximum number of scores to return. Defaults to None. + user_id (Optional[str]): User identifier. Defaults to None. + name (Optional[str]): Name of the scores to return. Defaults to None. + from_timestamp (Optional[dt.datetime]): Retrieve only scores with a timestamp on or after this datetime. Defaults to None. + to_timestamp (Optional[dt.datetime]): Retrieve only scores with a timestamp before this datetime. Defaults to None. + source (Optional[ScoreSource]): Source of the scores. Defaults to None. + operator (Optional[str]): Operator of the scores. Defaults to None. + value (Optional[float]): Value of the scores. Defaults to None. + score_ids (Optional[str]): Score identifier. Defaults to None. + config_id (Optional[str]): Configuration identifier. Defaults to None. + data_type (Optional[ScoreDataType]): Data type of the scores. Defaults to None. + request_options (Optional[RequestOptions]): Type of the score. Defaults to None. + + Returns: + FetchScoresResponse, list of scores on `data` and metadata on `meta`. + + Raises: + Exception: If an error occurred during the request. + """ + try: + self.log.debug( + f"Getting scores... {page}, {limit}, {user_id}, {name}, {from_timestamp}, {to_timestamp}, {source}, {operator}, {value}, {score_ids}, {config_id}, {data_type}, {request_options}" + ) + res = self.client.score.get( + page=page, + limit=limit, + user_id=user_id, + name=name, + from_timestamp=from_timestamp, + to_timestamp=to_timestamp, + source=source, + operator=operator, + value=value, + score_ids=score_ids, + config_id=config_id, + data_type=data_type, + request_options=request_options, + ) + return FetchScoresResponse(data=res.data, meta=res.meta) + except Exception as e: + self.log.exception(e) + raise e + + def fetch_prompts( + self, + *, + page: typing.Optional[int] = None, + limit: typing.Optional[int] = None, + name: typing.Optional[str] = None, + label: typing.Optional[str] = None, + tag: typing.Optional[str] = None, + request_options: typing.Optional[RequestOptions] = None, + ) -> FetchPromptsResponse: + """Get a list of prompts in the current project matching the given parameters. + + Args: + page (Optional[int]): Page number of the prompts to return. Defaults to None. + limit (Optional[int]): Maximum number of prompts to return. Defaults to None. + name (Optional[str]): Name of the prompts to return. Defaults to None. + label (Optional[str]): Label of the prompts to return. Defaults to None. + tag (Optional[str]): Tag of the prompts to return. Defaults to None. + request_options (Optional[RequestOptions]): Type of the prompt. Defaults to None. + + Returns: + FetchPromptsResponse, list of prompts on `data` and metadata on `meta`. + + Raises: + Exception: If an error occurred during the request. + """ + try: + self.log.debug( + f"Getting prompts... {page}, {limit}, {name}, {label}, {tag}, {request_options}" + ) + res = self.client.prompts.list( + page=page, + limit=limit, + name=name, + label=label, + tag=tag, + request_options=request_options, + ) + return FetchPromptsResponse(data=res.data, meta=res.meta) + except Exception as e: + self.log.exception(e) + raise e @overload def get_prompt( diff --git a/tests/test_core_sdk.py b/tests/test_core_sdk.py index ec0f69e2f..e59fe7dcc 100644 --- a/tests/test_core_sdk.py +++ b/tests/test_core_sdk.py @@ -6,6 +6,9 @@ from langfuse.client import ( FetchObservationResponse, FetchObservationsResponse, + FetchPromptsResponse, + FetchScoreResponse, + FetchScoresResponse, FetchSessionsResponse, FetchTraceResponse, FetchTracesResponse, @@ -1465,6 +1468,186 @@ def test_fetch_sessions(): assert response.data[0].id in [session1, session2, session3] +def test_fetch_scores(): + langfuse = Langfuse() + + # Create a trace with a score + name = create_uuid() + trace = langfuse + trace = langfuse.trace(name=name) + trace.score(name="harmfulness", value=0.5) + trace.score(name="quality", value=1) + trace.score(name="relevance", value=0.8) + langfuse.flush() + + # Fetch scores + response = langfuse.fetch_scores() + + # Assert the structure of the response + assert isinstance(response, FetchScoresResponse) + assert hasattr(response, "data") + assert hasattr(response, "meta") + assert isinstance(response.data, list) + assert response.data[2].name == "harmfulness" + assert response.data[2].value == 0.5 + assert response.data[1].name == "quality" + assert response.data[1].value == 1 + assert response.data[0].name == "relevance" + assert response.data[0].value == 0.8 + + # fetch only one + response = langfuse.fetch_scores(limit=1, page=2) + assert len(response.data) == 1 + +def test_fetch_score_by_id(): + langfuse = Langfuse() + + # Create a trace with a score + name = create_uuid() + trace = langfuse.trace(name=name) + trace.score(name="harmfulness", value=0.55) + trace.score(name="quality", value=0.99) + langfuse.flush() + + scores = langfuse.fetch_scores() + score_1 = scores.data[1] + score_2 = scores.data[0] + # Fetch scores + res_1 = langfuse.fetch_score(id=score_1.id) + res_2 = langfuse.fetch_score(id=score_2.id) + + # Assert the structure of the response + assert isinstance(res_1, FetchScoreResponse) + assert hasattr(res_1, "data") + + # Check that the correct score is returned + assert res_1.data.name == "harmfulness" + assert res_1.data.value == 0.55 + assert res_2.data.name == "quality" + assert res_2.data.value == 0.99 + +def test_fetch_prompts(): + langfuse = Langfuse() + + # Create multiple versions of a prompt + langfuse.create_prompt( + name="simple-prompt", + prompt="What is the weather like today?", + config={ + "model": "gpt-3.5-turbo-1106", + "temperature": 0, + }, + labels=["production"], + ) + + langfuse.create_prompt( + name="simple-prompt", + prompt="What is the weather like today?", + config={ + "model": "gpt-3.5-turbo-1106", + "temperature": 0.7, + }, + labels=["staging"], + ) + + langfuse.create_prompt( + name="simple-prompt", + prompt="What is the weather like today?", + config={ + "model": "gpt-4o-mini", + "temperature": 0.5, + }, + labels=["development"], + ) + + langfuse.flush() + + # Fetch prompts + response = langfuse.fetch_prompts(name="simple-prompt") + + # Assert the structure of the response + assert isinstance(response, FetchPromptsResponse) + assert hasattr(response, "data") + assert hasattr(response, "meta") + assert isinstance(response.data, list) + # Check that all versions and labels are present + assert response.data[0].name == "simple-prompt" + assert set(response.data[0].labels) == {"latest", "production", "staging", "development"} + assert response.data[0].versions == [1, 2, 3] + assert len(response.data[0].versions) == 3 + +def test_get_dataset_items(): + langfuse = Langfuse() + name = create_uuid() + langfuse.create_dataset(name=name) + + input = {"input": "Hello World"} + for _ in range(99): + langfuse.create_dataset_item(dataset_name=name, input=input) + + # Fetch all dataset items without pagination + dataset_items = langfuse.get_dataset_items(name) + assert len(dataset_items.data) == 50 + assert dataset_items.meta.total_items == 99 + assert dataset_items.meta.total_pages == 2 + assert dataset_items.meta.page == 1 + assert dataset_items.meta.limit == 50 + + # Fetch dataset items with pagination + dataset_items_2 = langfuse.get_dataset_items(dataset_name=name, page=1, limit=49) + assert len(dataset_items_2.data) == 49 + assert dataset_items_2.meta.total_items == 99 + assert dataset_items_2.meta.total_pages == 3 + assert dataset_items_2.meta.page == 1 + assert dataset_items_2.meta.limit == 49 + + dataset_items_3 = langfuse.get_dataset_items(name, page=2, limit=50) + assert len(dataset_items_3.data) == 49 + assert dataset_items_3.meta.total_items == 99 + assert dataset_items_3.meta.total_pages == 2 + assert dataset_items_3.meta.page == 2 + assert dataset_items_3.meta.limit == 50 + + +def test_get_datasets(): + langfuse = Langfuse() + name = create_uuid() + langfuse.create_dataset(name=name) + + # Fetch datasets, considering that datasets from previous tests might still exist + datasets = langfuse.get_datasets() + initial_count = len(datasets.data) + assert datasets.meta.total_items == initial_count + assert datasets.meta.total_pages == (initial_count // 50) + 1 + assert datasets.meta.page == 1 + assert datasets.meta.limit == 50 + + name_2 = create_uuid() + langfuse.create_dataset(name=name_2) + + datasets_2 = langfuse.get_datasets() + assert len(datasets_2.data) == initial_count + 1 + assert datasets_2.meta.total_items == initial_count + 1 + assert datasets_2.meta.total_pages == ((initial_count + 1) // 50) + 1 + assert datasets_2.meta.page == 1 + assert datasets_2.meta.limit == 50 + + name_3 = create_uuid() + langfuse.create_dataset(name=name_3) + + datasets_3 = langfuse.get_datasets(page=1, limit=2) + assert len(datasets_3.data) == 2 + assert datasets_3.meta.total_items == initial_count + 2 + assert datasets_3.meta.total_pages == ((initial_count + 2) // 2) + assert datasets_3.meta.page == 1 + assert datasets_3.meta.limit == 2 + + datasets_4 = langfuse.get_datasets(page=2, limit=2) + assert len(datasets_4.data) == min(2, initial_count + 2 - 2) + assert datasets_4.meta.total_items == initial_count + 2 + assert datasets_4.meta.total_pages == ((initial_count + 2) // 2) + assert datasets_4.meta.page == 2 + assert datasets_4.meta.limit == 2 def test_create_trace_sampling_zero(): langfuse = Langfuse(debug=True, sample_rate=0) api_wrapper = LangfuseAPI()