From d58decfed8c4e1aee8b716cb8f8ed8fd7b3287ac Mon Sep 17 00:00:00 2001 From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com> Date: Mon, 1 Dec 2025 11:45:18 +0100 Subject: [PATCH 1/2] fix(datasets): add schema arguments to create_dataset --- langfuse/_client/client.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/langfuse/_client/client.py b/langfuse/_client/client.py index 4c76f11e2..36da0e4bf 100644 --- a/langfuse/_client/client.py +++ b/langfuse/_client/client.py @@ -3250,6 +3250,8 @@ def create_dataset( name: str, description: Optional[str] = None, metadata: Optional[Any] = None, + input_schema: Optional[Any] = None, + expected_output_schema: Optional[Any] = None, ) -> Dataset: """Create a dataset with the given name on Langfuse. @@ -3257,13 +3259,19 @@ def create_dataset( name: Name of the dataset to create. description: Description of the dataset. Defaults to None. metadata: Additional metadata. Defaults to None. + input_schema: JSON Schema for validating dataset item inputs. When set, all new items will be validated against this schema. + expected_output_schema: JSON Schema for validating dataset item expected outputs. When set, all new items will be validated against this schema. Returns: Dataset: The created dataset as returned by the Langfuse API. """ try: body = CreateDatasetRequest( - name=name, description=description, metadata=metadata + name=name, + description=description, + metadata=metadata, + inputSchema=input_schema, + expectedOutputSchema=expected_output_schema, ) langfuse_logger.debug(f"Creating datasets {body}") From 030b82b520be1fce98776fc9d78a1e33b7963373 Mon Sep 17 00:00:00 2001 From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com> Date: Mon, 1 Dec 2025 13:44:56 +0100 Subject: [PATCH 2/2] fix tests --- tests/test_datasets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index c64a4adc1..c3ad7a318 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -146,7 +146,7 @@ def test_dataset_run_with_metadata_and_description(): dataset_name = create_uuid() langfuse.create_dataset(name=dataset_name) - input = json.dumps({"input": "Hello World"}) + input = {"input": "Hello World"} langfuse.create_dataset_item(dataset_name=dataset_name, input=input) dataset = langfuse.get_dataset(dataset_name) @@ -187,7 +187,7 @@ def test_get_dataset_runs(): dataset_name = create_uuid() langfuse.create_dataset(name=dataset_name) - input = json.dumps({"input": "Hello World"}) + input = {"input": "Hello World"} langfuse.create_dataset_item(dataset_name=dataset_name, input=input) dataset = langfuse.get_dataset(dataset_name)