diff --git a/langfuse/_client/client.py b/langfuse/_client/client.py index 4c76f11e2..36da0e4bf 100644 --- a/langfuse/_client/client.py +++ b/langfuse/_client/client.py @@ -3250,6 +3250,8 @@ def create_dataset( name: str, description: Optional[str] = None, metadata: Optional[Any] = None, + input_schema: Optional[Any] = None, + expected_output_schema: Optional[Any] = None, ) -> Dataset: """Create a dataset with the given name on Langfuse. @@ -3257,13 +3259,19 @@ def create_dataset( name: Name of the dataset to create. description: Description of the dataset. Defaults to None. metadata: Additional metadata. Defaults to None. + input_schema: JSON Schema for validating dataset item inputs. When set, all new items will be validated against this schema. + expected_output_schema: JSON Schema for validating dataset item expected outputs. When set, all new items will be validated against this schema. Returns: Dataset: The created dataset as returned by the Langfuse API. """ try: body = CreateDatasetRequest( - name=name, description=description, metadata=metadata + name=name, + description=description, + metadata=metadata, + inputSchema=input_schema, + expectedOutputSchema=expected_output_schema, ) langfuse_logger.debug(f"Creating datasets {body}") diff --git a/tests/test_datasets.py b/tests/test_datasets.py index c64a4adc1..c3ad7a318 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -146,7 +146,7 @@ def test_dataset_run_with_metadata_and_description(): dataset_name = create_uuid() langfuse.create_dataset(name=dataset_name) - input = json.dumps({"input": "Hello World"}) + input = {"input": "Hello World"} langfuse.create_dataset_item(dataset_name=dataset_name, input=input) dataset = langfuse.get_dataset(dataset_name) @@ -187,7 +187,7 @@ def test_get_dataset_runs(): dataset_name = create_uuid() langfuse.create_dataset(name=dataset_name) - input = json.dumps({"input": "Hello World"}) + input = {"input": "Hello World"} langfuse.create_dataset_item(dataset_name=dataset_name, input=input) dataset = langfuse.get_dataset(dataset_name)