From 9a473f9752b4174d7baa484eb00ed61adfc3a0e7 Mon Sep 17 00:00:00 2001 From: janmatzek Date: Tue, 30 Sep 2025 15:38:23 +0200 Subject: [PATCH 1/2] feat(gooddata-pipelines): forbid extra fields in provisioning input models --- .../user_data_filters/models/udf_models.py | 8 +- .../entities/users/models/permissions.py | 4 +- .../entities/users/models/user_groups.py | 10 +- .../entities/users/models/users.py | 4 +- .../entities/workspaces/models.py | 2 +- .../tests/provisioning/test_provisioning.py | 105 ++++++++++++++++++ 6 files changed, 126 insertions(+), 7 deletions(-) diff --git a/gooddata-pipelines/gooddata_pipelines/provisioning/entities/user_data_filters/models/udf_models.py b/gooddata-pipelines/gooddata_pipelines/provisioning/entities/user_data_filters/models/udf_models.py index 70b6b709d..3716d3479 100644 --- a/gooddata-pipelines/gooddata_pipelines/provisioning/entities/user_data_filters/models/udf_models.py +++ b/gooddata-pipelines/gooddata_pipelines/provisioning/entities/user_data_filters/models/udf_models.py @@ -7,6 +7,8 @@ from dataclasses import dataclass, field +from pydantic import BaseModel, ConfigDict + @dataclass class UserDataFilterGroup: @@ -20,13 +22,13 @@ class WorkspaceUserDataFilters: user_data_filters: list["UserDataFilterGroup"] = field(default_factory=list) -@dataclass -class UserDataFilterFullLoad: +class UserDataFilterFullLoad(BaseModel): + model_config = ConfigDict(extra="forbid") + workspace_id: str udf_id: str udf_value: str -@dataclass class UserDataFilterIncrementalLoad(UserDataFilterFullLoad): is_active: bool diff --git a/gooddata-pipelines/gooddata_pipelines/provisioning/entities/users/models/permissions.py b/gooddata-pipelines/gooddata_pipelines/provisioning/entities/users/models/permissions.py index dcd42bd2d..06ecc61d6 100644 --- a/gooddata-pipelines/gooddata_pipelines/provisioning/entities/users/models/permissions.py +++ b/gooddata-pipelines/gooddata_pipelines/provisioning/entities/users/models/permissions.py @@ -9,7 +9,7 @@ CatalogDeclarativeSingleWorkspacePermission, CatalogDeclarativeWorkspacePermissions, ) -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict from gooddata_pipelines.provisioning.utils.exceptions import BaseUserException @@ -23,6 +23,8 @@ class EntityType(str, Enum): class BasePermission(BaseModel): + model_config = ConfigDict(extra="forbid") + permission: str workspace_id: str entity_id: str diff --git a/gooddata-pipelines/gooddata_pipelines/provisioning/entities/users/models/user_groups.py b/gooddata-pipelines/gooddata_pipelines/provisioning/entities/users/models/user_groups.py index 792431673..66d57af24 100644 --- a/gooddata-pipelines/gooddata_pipelines/provisioning/entities/users/models/user_groups.py +++ b/gooddata-pipelines/gooddata_pipelines/provisioning/entities/users/models/user_groups.py @@ -1,9 +1,17 @@ # (C) 2025 GoodData Corporation -from pydantic import BaseModel, Field, ValidationInfo, field_validator +from pydantic import ( + BaseModel, + ConfigDict, + Field, + ValidationInfo, + field_validator, +) class UserGroupBase(BaseModel): + model_config = ConfigDict(extra="forbid") + user_group_id: str user_group_name: str parent_user_groups: list[str] = Field(default_factory=list) diff --git a/gooddata-pipelines/gooddata_pipelines/provisioning/entities/users/models/users.py b/gooddata-pipelines/gooddata_pipelines/provisioning/entities/users/models/users.py index 6a5918b09..0d7414de8 100644 --- a/gooddata-pipelines/gooddata_pipelines/provisioning/entities/users/models/users.py +++ b/gooddata-pipelines/gooddata_pipelines/provisioning/entities/users/models/users.py @@ -3,7 +3,7 @@ from typing import Any from gooddata_sdk.catalog.user.entity_model.user import CatalogUser -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field class UserProfile(BaseModel): @@ -18,6 +18,8 @@ class UserProfile(BaseModel): class BaseUser(BaseModel): """Base class containing shared user fields and functionality.""" + model_config = ConfigDict(extra="forbid") + user_id: str firstname: str | None lastname: str | None diff --git a/gooddata-pipelines/gooddata_pipelines/provisioning/entities/workspaces/models.py b/gooddata-pipelines/gooddata_pipelines/provisioning/entities/workspaces/models.py index 6c7deb52c..32b6e5d05 100644 --- a/gooddata-pipelines/gooddata_pipelines/provisioning/entities/workspaces/models.py +++ b/gooddata-pipelines/gooddata_pipelines/provisioning/entities/workspaces/models.py @@ -22,7 +22,7 @@ class WorkspaceDataMaps: class WorkspaceBase(BaseModel): - model_config = ConfigDict(coerce_numbers_to_str=True) + model_config = ConfigDict(coerce_numbers_to_str=True, extra="forbid") parent_id: str workspace_id: str diff --git a/gooddata-pipelines/tests/provisioning/test_provisioning.py b/gooddata-pipelines/tests/provisioning/test_provisioning.py index 2c838327b..31f7a58fc 100644 --- a/gooddata-pipelines/tests/provisioning/test_provisioning.py +++ b/gooddata-pipelines/tests/provisioning/test_provisioning.py @@ -3,11 +3,13 @@ from pathlib import Path import pytest +from pydantic import BaseModel, ValidationError from gooddata_pipelines import ( EntityType, PermissionFullLoad, PermissionIncrementalLoad, + UserDataFilterFullLoad, UserFullLoad, UserGroupFullLoad, UserGroupIncrementalLoad, @@ -119,3 +121,106 @@ def test_create_from_profile() -> None: ) assert provisioner._api._domain == "http://localhost:3000" assert provisioner._api._token == os.environ.pop("MOCK_TOKEN") + + +MODELS_AND_VALID_DATA = [ + ( + WorkspaceFullLoad, + { + "parent_id": "parent_id", + "workspace_id": "workspace_id", + "workspace_name": "workspace_name", + }, + ), + ( + WorkspaceIncrementalLoad, + { + "parent_id": "parent_id", + "workspace_id": "workspace_id", + "workspace_name": "workspace_name", + "is_active": True, + }, + ), + ( + UserFullLoad, + { + "user_id": "user_id", + "firstname": "firstname", + "lastname": "lastname", + "email": "email", + "auth_id": "auth_id", + "user_groups": ["user_group_id"], + }, + ), + ( + UserIncrementalLoad, + { + "user_id": "user_id", + "firstname": "firstname", + "lastname": "lastname", + "email": "email", + "auth_id": "auth_id", + "user_groups": ["user_group_id"], + "is_active": True, + }, + ), + ( + UserGroupFullLoad, + { + "user_group_id": "user_group_id", + "user_group_name": "user_group_name", + }, + ), + ( + UserGroupIncrementalLoad, + { + "user_group_id": "user_group_id", + "user_group_name": "user_group_name", + "is_active": True, + }, + ), + ( + PermissionFullLoad, + { + "permission": "permission", + "workspace_id": "workspace_id", + "entity_id": "entity_id", + "entity_type": EntityType.user, + }, + ), + ( + PermissionIncrementalLoad, + { + "permission": "permission", + "workspace_id": "workspace_id", + "entity_id": "entity_id", + "entity_type": EntityType.user, + "is_active": True, + }, + ), + ( + UserDataFilterFullLoad, + { + "workspace_id": "workspace_id", + "udf_id": "udf_id", + "udf_value": "udf_value", + }, + ), +] + + +@pytest.mark.parametrize(["provisioning_model", "data"], MODELS_AND_VALID_DATA) +def test_validate_valid_data(provisioning_model: BaseModel, data: dict) -> None: + """Test validating valid data.""" + provisioning_model.model_validate(data) + + +@pytest.mark.parametrize( + ["provisioning_model", "data"], + MODELS_AND_VALID_DATA, +) +def test_raise_extra_fields(provisioning_model: BaseModel, data: dict) -> None: + """Test raising an error when extra fields are provided.""" + data["extra_field"] = "extra_field" + with pytest.raises(ValidationError): + provisioning_model.model_validate(data) From 6af79117f120f1e7e6bd1d1a81405b4e4d1ba2a1 Mon Sep 17 00:00:00 2001 From: janmatzek Date: Thu, 2 Oct 2025 10:49:37 +0200 Subject: [PATCH 2/2] chore(gooddata-pipelines): replace dataclasses with attrs --- gooddata-pipelines/TODO.md | 5 --- .../backup_input_processor.py | 5 +-- .../backup_and_restore/backup_manager.py | 4 +- .../backup_and_restore/constants.py | 8 ++-- .../user_data_filters/models/udf_models.py | 12 ++---- .../user_data_filters/user_data_filters.py | 4 +- .../provisioning/utils/utils.py | 39 ++++++++++++------- .../provisioning/entities/users/test_users.py | 4 +- 8 files changed, 40 insertions(+), 41 deletions(-) diff --git a/gooddata-pipelines/TODO.md b/gooddata-pipelines/TODO.md index e6c5e9a64..71788d382 100644 --- a/gooddata-pipelines/TODO.md +++ b/gooddata-pipelines/TODO.md @@ -10,15 +10,10 @@ A list of outstanding tasks, features, or technical debt to be addressed in this - [ ] Integrate with GoodDataApiClient - [ ] Consider replacing the SdkMethods wrapper with direct calls to the SDK methods -- [ ] Consider using orjson library instead of json to load test data - [ ] Cleanup custom exceptions - [ ] Improve test coverage. Write missing unit tests for legacy code (e.g., user data filters) ## Documentation - [ ] Improve package README -- [ ] Workspace provisioning -- [ ] User provisioning -- [ ] User group provisioning -- [ ] Permission provisioning - [ ] User data filter provisioning diff --git a/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_input_processor.py b/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_input_processor.py index 38a799f01..a5117d634 100644 --- a/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_input_processor.py +++ b/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_input_processor.py @@ -1,7 +1,6 @@ # (C) 2025 GoodData Corporation -from dataclasses import dataclass - +import attrs import requests from gooddata_pipelines.api import GoodDataApi @@ -46,7 +45,7 @@ def set_endpoints(self) -> None: ) self.all_workspaces_endpoint = f"{self.base_workspace_endpoint}?page=0&size={self.page_size}&sort=name,asc&metaInclude=page" - @dataclass + @attrs.define class _ProcessDataOutput: workspace_ids: list[str] sub_parents: list[str] | None = None diff --git a/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_manager.py b/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_manager.py index 3a042b9f0..8d6ea4007 100644 --- a/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_manager.py +++ b/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_manager.py @@ -6,10 +6,10 @@ import tempfile import time import traceback -from dataclasses import dataclass from pathlib import Path from typing import Any, Type +import attrs import requests import yaml from gooddata_sdk.utils import PROFILES_FILE_PATH, profile_content @@ -40,7 +40,7 @@ from gooddata_pipelines.utils.rate_limiter import RateLimiter -@dataclass +@attrs.define class BackupBatch: list_of_ids: list[str] diff --git a/gooddata-pipelines/gooddata_pipelines/backup_and_restore/constants.py b/gooddata-pipelines/gooddata_pipelines/backup_and_restore/constants.py index 5ce180b89..900dff511 100644 --- a/gooddata-pipelines/gooddata_pipelines/backup_and_restore/constants.py +++ b/gooddata-pipelines/gooddata_pipelines/backup_and_restore/constants.py @@ -1,11 +1,11 @@ # (C) 2025 GoodData Corporation import datetime -from dataclasses import dataclass +import attrs from gooddata_sdk._version import __version__ as sdk_version -@dataclass(frozen=True) +@attrs.frozen class DirNames: """ Folder names used in the SDK backup process: @@ -21,14 +21,14 @@ class DirNames: UDF = "user_data_filters" -@dataclass(frozen=True) +@attrs.frozen class ApiDefaults: DEFAULT_PAGE_SIZE = 100 DEFAULT_BATCH_SIZE = 100 DEFAULT_API_CALLS_PER_SECOND = 1.0 -@dataclass(frozen=True) +@attrs.frozen class BackupSettings(ApiDefaults): MAX_RETRIES = 3 RETRY_DELAY = 5 # seconds diff --git a/gooddata-pipelines/gooddata_pipelines/provisioning/entities/user_data_filters/models/udf_models.py b/gooddata-pipelines/gooddata_pipelines/provisioning/entities/user_data_filters/models/udf_models.py index 3716d3479..a733be5cf 100644 --- a/gooddata-pipelines/gooddata_pipelines/provisioning/entities/user_data_filters/models/udf_models.py +++ b/gooddata-pipelines/gooddata_pipelines/provisioning/entities/user_data_filters/models/udf_models.py @@ -2,24 +2,20 @@ """This module defines data models for user data filters in a GoodData workspace.""" -# TODO: consider using attrs instead of dataclasses for these models. Dataclasses -# have different functionality per Python version (not package version). - -from dataclasses import dataclass, field - +import attrs from pydantic import BaseModel, ConfigDict -@dataclass +@attrs.define class UserDataFilterGroup: udf_id: str udf_values: list[str] -@dataclass +@attrs.define class WorkspaceUserDataFilters: workspace_id: str - user_data_filters: list["UserDataFilterGroup"] = field(default_factory=list) + user_data_filters: list["UserDataFilterGroup"] = attrs.field(factory=list) class UserDataFilterFullLoad(BaseModel): diff --git a/gooddata-pipelines/gooddata_pipelines/provisioning/entities/user_data_filters/user_data_filters.py b/gooddata-pipelines/gooddata_pipelines/provisioning/entities/user_data_filters/user_data_filters.py index d7e356dc5..8aef3189b 100644 --- a/gooddata-pipelines/gooddata_pipelines/provisioning/entities/user_data_filters/user_data_filters.py +++ b/gooddata-pipelines/gooddata_pipelines/provisioning/entities/user_data_filters/user_data_filters.py @@ -50,6 +50,8 @@ class UserDataFilterProvisioner( ldm_column_name: str = "" maql_column_name: str = "" + FULL_LOAD_TYPE = UserDataFilterFullLoad + def set_ldm_column_name(self, ldm_column_name: str) -> None: """Set the LDM column name for user data filters. @@ -214,8 +216,6 @@ def _provision_full_load(self) -> None: ) self._create_user_data_filters(grouped_db_user_data_filters) - self.logger.info("User data filters provisioning completed") - def _provision_incremental_load(self) -> None: """Provision user data filters in GoodData workspaces.""" raise NotImplementedError("Not implemented yet.") diff --git a/gooddata-pipelines/gooddata_pipelines/provisioning/utils/utils.py b/gooddata-pipelines/gooddata_pipelines/provisioning/utils/utils.py index 4cc44be58..c8bf2af44 100644 --- a/gooddata-pipelines/gooddata_pipelines/provisioning/utils/utils.py +++ b/gooddata-pipelines/gooddata_pipelines/provisioning/utils/utils.py @@ -2,6 +2,8 @@ """Module for utilities used in GoodData Pipelines provisioning.""" +from typing import Any, cast + import attrs from requests import Response @@ -11,9 +13,8 @@ class AttributesMixin: Mixin class to provide a method for getting attributes of an object which may or may not exist. """ - @staticmethod def get_attrs( - *objects: object, overrides: dict[str, str] | None = None + self, *objects: object, overrides: dict[str, str] | None = None ) -> dict[str, str]: """ Returns a dictionary of attributes from the given objects. @@ -27,11 +28,11 @@ def get_attrs( """ # TODO: This might not work great with nested objects, values which are lists of objects etc. # If we care about parsing the logs back from the string, we should consider some other approach - attrs: dict[str, str] = {} + attributes: dict[str, str] = {} for context_object in objects: if isinstance(context_object, Response): # for request.Response objects, keys need to be renamed to match the log schema - attrs.update( + attributes.update( { "http_status": str(context_object.status_code), "http_method": getattr( @@ -42,23 +43,31 @@ def get_attrs( ), } ) + elif attrs.has(type(context_object)): + for key, value in attrs.asdict( + cast(attrs.AttrsInstance, context_object) + ).items(): + self._add_to_dict(attributes, key, value) else: # Generic handling for other objects for key, value in context_object.__dict__.items(): - if value is None: - continue - - if isinstance(value, list): - attrs[key] = ", ".join( - str(list_item) for list_item in value - ) - else: - attrs[key] = str(value) + self._add_to_dict(attributes, key, value) if overrides: - attrs.update(overrides) + attributes.update(overrides) + + return attributes + + def _add_to_dict( + self, attributes: dict[str, str], key: str, value: Any + ) -> None: + if value is None: + return - return attrs + if isinstance(value, list): + attributes[key] = ", ".join(str(list_item) for list_item in value) + else: + attributes[key] = str(value) @attrs.define diff --git a/gooddata-pipelines/tests/provisioning/entities/users/test_users.py b/gooddata-pipelines/tests/provisioning/entities/users/test_users.py index 45a6c7707..6cf423af9 100644 --- a/gooddata-pipelines/tests/provisioning/entities/users/test_users.py +++ b/gooddata-pipelines/tests/provisioning/entities/users/test_users.py @@ -1,7 +1,7 @@ # (C) 2025 GoodData Corporation -from dataclasses import dataclass from typing import Literal, Optional +import attrs import orjson import pytest from gooddata_api_client.exceptions import NotFoundException # type: ignore @@ -29,7 +29,7 @@ TEST_DATA_SUBDIR = f"{TEST_DATA_DIR}/provisioning/entities/users" -@dataclass +@attrs.define class MockUser: id: str firstname: Optional[str]