diff --git a/README.md b/README.md index e918418d..45ef4f37 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ python-benedict is a dict subclass with **keylist/keypath/keyattr** support, **I - [Disable keypath functionality](#disable-keypath-functionality) - [List index support](#list-index-support) - [I/O](#io) + - [Data validation using Pydantic models](#data-validation-using-pydantic-models) - [API](#api) - [Utility methods](#utility-methods) - [I/O methods](#io-methods) @@ -64,6 +65,7 @@ Here the hierarchy of possible installation targets available when running `pip - `[yaml]` - `[parse]` - `[s3]` + - `[validate]` ## Usage @@ -126,7 +128,8 @@ or using the `getter/setter` property. d.keyattr_dynamic = True ``` -> **Warning** - even if this feature is very useful, it has some obvious limitations: it works only for string keys that are *unprotected* (not starting with an `_`) and that don't clash with the currently supported methods names. +> [!WARNING] +> Even if this feature is very useful, it has some obvious limitations: it works only for string keys that are *unprotected* (not starting with an `_`) and that don't clash with the currently supported methods names. ### Keylist Wherever a **key** is used, it is possible to use also a **list of keys**. @@ -278,6 +281,31 @@ Here are the details of the supported formats, operations and extra options docs | `xml` | :white_check_mark: | :white_check_mark: | [xmltodict](https://github.com/martinblech/xmltodict) | | `yaml` | :white_check_mark: | :white_check_mark: | [PyYAML](https://pyyaml.org/wiki/PyYAMLDocumentation) | +### Data validation using Pydantic models + +> [!IMPORTANT] +> This feature **requires** the `validate` extra to be installed: `pip install "python-benedict[validate]` + +You can validate data in different ways: + +1. Using the `validate` method directly +```python +d = benedict(my_data) +d.validate(schema=MySchema) +``` + +2. Using the `schema` parameter during initialization +```python +d = benedict(my_data, schema=MySchema) +``` + +3. Using the `schema` parameter with any `from_{format}` method +```python +d = benedict.from_json(my_data, schema=MySchema) +``` + +If validation fails, a `ValidationError` will be raised with details about what went wrong. + ### API - **Utility methods** @@ -332,6 +360,7 @@ Here are the details of the supported formats, operations and extra options docs - [`to_toml`](#to_toml) - [`to_xml`](#to_xml) - [`to_yaml`](#to_yaml) + - [`validate`](#validate) - **Parse methods** @@ -814,6 +843,14 @@ s = d.to_xml(**kwargs) s = d.to_yaml(**kwargs) ``` +#### `validate` + +```python +# Validate the dict and update it using a Pydantic schema. +# A ValidationError is raised in case of failure. +d.validate(schema=MySchema) +``` + ### Parse methods These methods are wrappers of the `get` method, they parse data trying to return it in the expected type. diff --git a/benedict/dicts/io/io_dict.py b/benedict/dicts/io/io_dict.py index c70a5d53..fdd722ec 100644 --- a/benedict/dicts/io/io_dict.py +++ b/benedict/dicts/io/io_dict.py @@ -9,7 +9,8 @@ from benedict.dicts.base import BaseDict from benedict.dicts.io import io_util from benedict.exceptions import ExtrasRequireModuleNotFoundError -from benedict.utils import type_util +from benedict.utils import pydantic_util, type_util +from benedict.utils.pydantic_util import PydanticModel _K = TypeVar("_K", default=str) _V = TypeVar("_V", default=Any) @@ -29,7 +30,11 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: d = IODict._decode_init(arg, **kwargs) super().__init__(d) return + + schema = kwargs.pop("schema", None) super().__init__(*args, **kwargs) + if schema: + self.validate(schema=schema) @staticmethod def _decode_init(s: str | Path, **kwargs: Any) -> dict[str, Any]: @@ -37,10 +42,12 @@ def _decode_init(s: str | Path, **kwargs: Any) -> dict[str, Any]: default_format = autodetected_format or "json" format = kwargs.pop("format", default_format).lower() # decode data-string and initialize with dict data. - return IODict._decode(s, format, **kwargs) + data = IODict._decode(s, format, **kwargs) + return data @staticmethod def _decode(s: str | Path, format: str, **kwargs: Any) -> dict[str, Any]: + schema = kwargs.pop("schema", None) data = None try: data = io_util.decode(s, format, **kwargs) @@ -54,12 +61,15 @@ def _decode(s: str | Path, format: str, **kwargs: Any) -> dict[str, Any]: ) from None # if possible return data as dict, otherwise raise exception if type_util.is_dict(data): - return data + pass elif type_util.is_list(data): # force list to dict - return {"values": data} + data = {"values": data} else: raise ValueError(f"Invalid data type: {type(data)}, expected dict or list.") + if schema: + data = pydantic_util.validate_data(data, schema=schema) + return data @staticmethod def _encode(d: Any, format: str, **kwargs: Any) -> Any: @@ -353,3 +363,14 @@ def to_yaml(self, **kwargs: Any) -> str: A ValueError is raised in case of failure. """ return cast("str", self._encode(self.dict(), "yaml", **kwargs)) + + def validate(self, *, schema: PydanticModel) -> None: + """ + Validate the dict and update it using a Pydantic schema. + + Args: + schema: Pydantic model class for validation + """ + data = pydantic_util.validate_data(self, schema=schema) + self.clear() + self.update(data) diff --git a/benedict/extras.py b/benedict/extras.py index df087aca..daa8b6e0 100644 --- a/benedict/extras.py +++ b/benedict/extras.py @@ -5,6 +5,7 @@ "require_parse", "require_s3", "require_toml", + "require_validate", "require_xls", "require_xml", "require_yaml", @@ -32,6 +33,10 @@ def require_toml(*, installed: bool) -> None: _require_optional_dependencies(target="toml", installed=installed) +def require_validate(*, installed: bool) -> None: + _require_optional_dependencies(target="validate", installed=installed) + + def require_xls(*, installed: bool) -> None: _require_optional_dependencies(target="xls", installed=installed) diff --git a/benedict/utils/pydantic_util.py b/benedict/utils/pydantic_util.py new file mode 100644 index 00000000..19c50861 --- /dev/null +++ b/benedict/utils/pydantic_util.py @@ -0,0 +1,36 @@ +from typing import Any + +from benedict.extras import require_validate + +try: + from pydantic import BaseModel + + pydantic_installed = True +except ModuleNotFoundError: + pydantic_installed = False + BaseModel = None + +PydanticModel = type["BaseModel"] + + +def _is_pydantic_model(obj: Any) -> bool: + """ + Check if an object is a Pydantic model class. + """ + return pydantic_installed and isinstance(obj, type) and issubclass(obj, BaseModel) + + +def validate_data(data: Any, *, schema: PydanticModel | None = None) -> Any: + """ + Validate data against a Pydantic schema if provided. + """ + if schema is None: + return data + + require_validate(installed=pydantic_installed) + + if not _is_pydantic_model(schema): + raise ValueError("Invalid schema. Schema must be a Pydantic model class.") + + validated = schema.model_validate(data) + return validated.model_dump() diff --git a/pyproject.toml b/pyproject.toml index e89618fb..992b50ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -118,7 +118,7 @@ Twitter = "https://twitter.com/fabiocaccamo" [project.optional-dependencies] all = [ - "python-benedict[io,parse,s3]", + "python-benedict[io,parse,s3,validate]", ] html = [ "beautifulsoup4 >= 4.12.0, < 5.0.0", @@ -139,6 +139,9 @@ s3 = [ toml = [ "toml >= 0.10.2, < 1.0.0", ] +validate = [ + "pydantic >= 1.10.0", +] xls = [ "openpyxl >= 3.0.0, < 4.0.0", "xlrd >= 2.0.0, < 3.0.0", diff --git a/requirements.txt b/requirements.txt index bef236b0..802ebe8a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ ftfy == 6.3.1 mailchecker == 6.0.18 openpyxl == 3.1.5 phonenumbers == 9.0.15 +pydantic == 2.11.3 python-dateutil == 2.9.0.post0 python-fsutil == 0.15.0 python-slugify == 8.0.4 diff --git a/tests/dicts/io/test_io_dict_schema.py b/tests/dicts/io/test_io_dict_schema.py new file mode 100644 index 00000000..524b0de6 --- /dev/null +++ b/tests/dicts/io/test_io_dict_schema.py @@ -0,0 +1,77 @@ +import json +import unittest + +from pydantic import BaseModel, ValidationError + +from benedict import benedict + + +class TestIODictSchema(unittest.TestCase): + def setUp(self): + class User(BaseModel): + name: str + age: int + email: str + + class UserList(BaseModel): + users: list[User] + + class UserOptional(BaseModel): + name: str + age: int | None = None + email: str | None = None + + self.User = User + self.UserList = UserList + self.UserOptional = UserOptional + self.valid_data = { + "name": "John", + "age": 30, + "email": "john@example.com", + } + self.invalid_data = { + "name": "John", + "age": "not_an_int", + "email": "john@example.com", + } + self.minimal_data = {"name": "John"} + + def test_constructor_with_schema(self): + d = benedict(self.valid_data, schema=self.User) + self.assertEqual(d["name"], "John") + self.assertEqual(d["age"], 30) + self.assertEqual(d["email"], "john@example.com") + + with self.assertRaises(ValidationError): + benedict(self.invalid_data, schema=self.User) + + def test_constructor_with_schema_and_optional_fields(self): + d = benedict(self.minimal_data, schema=self.UserOptional) + self.assertEqual(d["name"], "John") + self.assertIsNone(d.get("age")) + self.assertIsNone(d.get("email")) + + def test_constructor_with_invalid_schema(self): + class InvalidSchema: + pass + + with self.assertRaises(ValueError): + benedict(self.valid_data, schema=InvalidSchema) + + with self.assertRaises(ValueError): + benedict(self.valid_data, schema="not_a_schema") + + def test_from_json_with_schema_and_valid_data(self): + json_data = json.dumps(self.valid_data) + d = benedict.from_json(json_data, schema=self.User) + self.assertEqual(d["name"], "John") + self.assertEqual(d["age"], 30) + + def test_from_json_with_schema_and_invalid_data(self): + json_data = json.dumps(self.invalid_data) + with self.assertRaises(ValidationError): + benedict.from_json(json_data, schema=self.User) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/dicts/io/test_io_dict_validate.py b/tests/dicts/io/test_io_dict_validate.py new file mode 100644 index 00000000..75f5bfa1 --- /dev/null +++ b/tests/dicts/io/test_io_dict_validate.py @@ -0,0 +1,82 @@ +import unittest + +from pydantic import BaseModel, ValidationError + +from benedict.dicts.io import IODict + + +class TestIODictValidate(unittest.TestCase): + def setUp(self): + class User(BaseModel): + name: str + age: int + email: str + + class UserOptional(BaseModel): + name: str + age: int | None = None + email: str | None = None + + self.User = User + self.UserOptional = UserOptional + self.valid_data = { + "name": "John", + "age": 30, + "email": "john@example.com", + } + self.invalid_data = { + "name": "John", + "age": "not_an_int", + "email": "john@example.com", + } + self.minimal_data = {"name": "John"} + self.data_with_extra_fields = { + "name": "John", + "age": 30, + "email": "john@example.com", + "role": "admin", + "active": True, + } + + def test_validate_valid_data(self): + d = IODict(self.valid_data) + d.validate(schema=self.User) + self.assertEqual(d["name"], "John") + self.assertEqual(d["age"], 30) + self.assertEqual(d["email"], "john@example.com") + + def test_validate_invalid_data(self): + d = IODict(self.invalid_data) + with self.assertRaises(ValidationError): + d.validate(schema=self.User) + + def test_validate_optional_fields(self): + d = IODict(self.minimal_data) + d.validate(schema=self.UserOptional) + self.assertEqual(d["name"], "John") + self.assertIsNone(d.get("age")) + self.assertIsNone(d.get("email")) + + def test_validate_removes_extra_fields(self): + d = IODict(self.data_with_extra_fields) + d.validate(schema=self.User) + # required fields are preserved + self.assertEqual(d["name"], "John") + self.assertEqual(d["age"], 30) + self.assertEqual(d["email"], "john@example.com") + # extra fields are removed + self.assertNotIn("role", d) + self.assertNotIn("active", d) + # only the schema fields exist + self.assertEqual(set(d.keys()), {"name", "age", "email"}) + + def test_validate_invalid_schema(self): + class InvalidSchema: + pass + + d = IODict(self.valid_data) + with self.assertRaises(ValueError): + d.validate(schema=InvalidSchema) + + with self.assertRaises(ValueError): + d.validate(schema="not_a_schema") diff --git a/tests/utils/test_pydantic_util.py b/tests/utils/test_pydantic_util.py new file mode 100644 index 00000000..634632c1 --- /dev/null +++ b/tests/utils/test_pydantic_util.py @@ -0,0 +1,65 @@ +import unittest +from unittest.mock import patch + +from pydantic import BaseModel, ValidationError + +from benedict.exceptions import ExtrasRequireModuleNotFoundError +from benedict.utils import pydantic_util + + +class User(BaseModel): + name: str + age: int + + +class TestPydanticUtil(unittest.TestCase): + def setUp(self): + self.valid_data = {"name": "John", "age": 30} + self.invalid_data = {"name": "John", "age": "not_an_int"} + + def test_validate_data_with_valid_data(self): + # validation with valid data succeeds + validated = pydantic_util.validate_data(self.valid_data, schema=User) + self.assertEqual(validated["name"], "John") + self.assertEqual(validated["age"], 30) + + def test_validate_data_with_invalid_data(self): + # validation with invalid data raises ValidationError + with self.assertRaises(ValidationError): + pydantic_util.validate_data(self.invalid_data, schema=User) + + def test_validate_data_with_no_schema(self): + # validation without schema returns original data + data = {"any": "data"} + validated = pydantic_util.validate_data(data, schema=None) + self.assertEqual(validated, data) + + def test_validate_data_with_invalid_schema(self): + # validation with non-pydantic schema raises ValueError + class NotAModel: + pass + + with self.assertRaises(ValueError) as cm: + pydantic_util.validate_data(self.valid_data, schema=NotAModel) + self.assertEqual( + str(cm.exception), "Invalid schema. Schema must be a Pydantic model class." + ) + + # validation with invalid schema type raises ValueError + with self.assertRaises(ValueError) as cm: + pydantic_util.validate_data(self.valid_data, schema="not_a_schema") + self.assertEqual( + str(cm.exception), "Invalid schema. Schema must be a Pydantic model class." + ) + + @patch("benedict.utils.pydantic_util.pydantic_installed", False) + @patch("benedict.utils.pydantic_util.BaseModel", None) + def test_validate_data_when_pydantic_not_installed(self): + # validation without schema still works when pydantic is not installed + data = {"any": "data"} + validated = pydantic_util.validate_data(data, schema=None) + self.assertEqual(validated, data) + + # validation with schema raises ExtrasRequireModuleNotFoundError + with self.assertRaises(ExtrasRequireModuleNotFoundError): + pydantic_util.validate_data(self.valid_data, schema=User)