Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions gooddata-pipelines/TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,10 @@ A list of outstanding tasks, features, or technical debt to be addressed in this

- [ ] Integrate with GoodDataApiClient
- [ ] Consider replacing the SdkMethods wrapper with direct calls to the SDK methods
- [ ] Consider using orjson library instead of json to load test data
- [ ] Cleanup custom exceptions
- [ ] Improve test coverage. Write missing unit tests for legacy code (e.g., user data filters)

## Documentation

- [ ] Improve package README
- [ ] Workspace provisioning
- [ ] User provisioning
- [ ] User group provisioning
- [ ] Permission provisioning
- [ ] User data filter provisioning
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# (C) 2025 GoodData Corporation

from dataclasses import dataclass

import attrs
import requests

from gooddata_pipelines.api import GoodDataApi
Expand Down Expand Up @@ -46,7 +45,7 @@ def set_endpoints(self) -> None:
)
self.all_workspaces_endpoint = f"{self.base_workspace_endpoint}?page=0&size={self.page_size}&sort=name,asc&metaInclude=page"

@dataclass
@attrs.define
class _ProcessDataOutput:
workspace_ids: list[str]
sub_parents: list[str] | None = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
import tempfile
import time
import traceback
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Type

import attrs
import requests
import yaml
from gooddata_sdk.utils import PROFILES_FILE_PATH, profile_content
Expand Down Expand Up @@ -40,7 +40,7 @@
from gooddata_pipelines.utils.rate_limiter import RateLimiter


@dataclass
@attrs.define
class BackupBatch:
list_of_ids: list[str]

Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# (C) 2025 GoodData Corporation
import datetime
from dataclasses import dataclass

import attrs
from gooddata_sdk._version import __version__ as sdk_version


@dataclass(frozen=True)
@attrs.frozen
class DirNames:
"""
Folder names used in the SDK backup process:
Expand All @@ -21,14 +21,14 @@ class DirNames:
UDF = "user_data_filters"


@dataclass(frozen=True)
@attrs.frozen
class ApiDefaults:
DEFAULT_PAGE_SIZE = 100
DEFAULT_BATCH_SIZE = 100
DEFAULT_API_CALLS_PER_SECOND = 1.0


@dataclass(frozen=True)
@attrs.frozen
class BackupSettings(ApiDefaults):
MAX_RETRIES = 3
RETRY_DELAY = 5 # seconds
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,29 @@

"""This module defines data models for user data filters in a GoodData workspace."""

# TODO: consider using attrs instead of dataclasses for these models. Dataclasses
# have different functionality per Python version (not package version).
import attrs
from pydantic import BaseModel, ConfigDict

from dataclasses import dataclass, field


@dataclass
@attrs.define
class UserDataFilterGroup:
udf_id: str
udf_values: list[str]


@dataclass
@attrs.define
class WorkspaceUserDataFilters:
workspace_id: str
user_data_filters: list["UserDataFilterGroup"] = field(default_factory=list)
user_data_filters: list["UserDataFilterGroup"] = attrs.field(factory=list)


class UserDataFilterFullLoad(BaseModel):
model_config = ConfigDict(extra="forbid")

@dataclass
class UserDataFilterFullLoad:
workspace_id: str
udf_id: str
udf_value: str


@dataclass
class UserDataFilterIncrementalLoad(UserDataFilterFullLoad):
is_active: bool
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class UserDataFilterProvisioner(
ldm_column_name: str = ""
maql_column_name: str = ""

FULL_LOAD_TYPE = UserDataFilterFullLoad

def set_ldm_column_name(self, ldm_column_name: str) -> None:
"""Set the LDM column name for user data filters.

Expand Down Expand Up @@ -214,8 +216,6 @@ def _provision_full_load(self) -> None:
)
self._create_user_data_filters(grouped_db_user_data_filters)

self.logger.info("User data filters provisioning completed")

def _provision_incremental_load(self) -> None:
"""Provision user data filters in GoodData workspaces."""
raise NotImplementedError("Not implemented yet.")
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
CatalogDeclarativeSingleWorkspacePermission,
CatalogDeclarativeWorkspacePermissions,
)
from pydantic import BaseModel
from pydantic import BaseModel, ConfigDict

from gooddata_pipelines.provisioning.utils.exceptions import BaseUserException

Expand All @@ -23,6 +23,8 @@ class EntityType(str, Enum):


class BasePermission(BaseModel):
model_config = ConfigDict(extra="forbid")

permission: str
workspace_id: str
entity_id: str
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
# (C) 2025 GoodData Corporation

from pydantic import BaseModel, Field, ValidationInfo, field_validator
from pydantic import (
BaseModel,
ConfigDict,
Field,
ValidationInfo,
field_validator,
)


class UserGroupBase(BaseModel):
model_config = ConfigDict(extra="forbid")

user_group_id: str
user_group_name: str
parent_user_groups: list[str] = Field(default_factory=list)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Any

from gooddata_sdk.catalog.user.entity_model.user import CatalogUser
from pydantic import BaseModel, Field
from pydantic import BaseModel, ConfigDict, Field


class UserProfile(BaseModel):
Expand All @@ -18,6 +18,8 @@ class UserProfile(BaseModel):
class BaseUser(BaseModel):
"""Base class containing shared user fields and functionality."""

model_config = ConfigDict(extra="forbid")

user_id: str
firstname: str | None
lastname: str | None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class WorkspaceDataMaps:


class WorkspaceBase(BaseModel):
model_config = ConfigDict(coerce_numbers_to_str=True)
model_config = ConfigDict(coerce_numbers_to_str=True, extra="forbid")

parent_id: str
workspace_id: str
Expand Down
39 changes: 24 additions & 15 deletions gooddata-pipelines/gooddata_pipelines/provisioning/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

"""Module for utilities used in GoodData Pipelines provisioning."""

from typing import Any, cast

import attrs
from requests import Response

Expand All @@ -11,9 +13,8 @@ class AttributesMixin:
Mixin class to provide a method for getting attributes of an object which may or may not exist.
"""

@staticmethod
def get_attrs(
*objects: object, overrides: dict[str, str] | None = None
self, *objects: object, overrides: dict[str, str] | None = None
) -> dict[str, str]:
"""
Returns a dictionary of attributes from the given objects.
Expand All @@ -27,11 +28,11 @@ def get_attrs(
"""
# TODO: This might not work great with nested objects, values which are lists of objects etc.
# If we care about parsing the logs back from the string, we should consider some other approach
attrs: dict[str, str] = {}
attributes: dict[str, str] = {}
for context_object in objects:
if isinstance(context_object, Response):
# for request.Response objects, keys need to be renamed to match the log schema
attrs.update(
attributes.update(
{
"http_status": str(context_object.status_code),
"http_method": getattr(
Expand All @@ -42,23 +43,31 @@ def get_attrs(
),
}
)
elif attrs.has(type(context_object)):
for key, value in attrs.asdict(
cast(attrs.AttrsInstance, context_object)
).items():
self._add_to_dict(attributes, key, value)
else:
# Generic handling for other objects
for key, value in context_object.__dict__.items():
if value is None:
continue

if isinstance(value, list):
attrs[key] = ", ".join(
str(list_item) for list_item in value
)
else:
attrs[key] = str(value)
self._add_to_dict(attributes, key, value)

if overrides:
attrs.update(overrides)
attributes.update(overrides)

return attributes

def _add_to_dict(
self, attributes: dict[str, str], key: str, value: Any
) -> None:
if value is None:
return

return attrs
if isinstance(value, list):
attributes[key] = ", ".join(str(list_item) for list_item in value)
else:
attributes[key] = str(value)


@attrs.define
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# (C) 2025 GoodData Corporation
from dataclasses import dataclass
from typing import Literal, Optional

import attrs
import orjson
import pytest
from gooddata_api_client.exceptions import NotFoundException # type: ignore
Expand Down Expand Up @@ -29,7 +29,7 @@
TEST_DATA_SUBDIR = f"{TEST_DATA_DIR}/provisioning/entities/users"


@dataclass
@attrs.define
class MockUser:
id: str
firstname: Optional[str]
Expand Down
Loading
Loading