diff --git a/gooddata-pipelines/README.md b/gooddata-pipelines/README.md index ce853649c..6d9d777f5 100644 --- a/gooddata-pipelines/README.md +++ b/gooddata-pipelines/README.md @@ -1,8 +1,8 @@ # GoodData Pipelines -A high level library for automating the lifecycle of GoodData Cloud (GDC). +A high-level library for automating the lifecycle of GoodData Cloud (GDC). -You can use the package to manage following resoursec in GDC: +You can use the package to manage following resources in GDC: 1. Provisioning (create, update, delete) - User profiles @@ -14,7 +14,7 @@ You can use the package to manage following resoursec in GDC: 1. _[PLANNED]:_ Custom fields management - extend the Logical Data Model of a child workspace -In case you are not interested in incorporating a library in your own program, but would like to use a ready-made script, consider having a look at [GoodData Productivity Tools](https://github.com/gooddata/gooddata-productivity-tools). +In case you are not interested in incorporating a library in your own program but would like to use a ready-made script, consider having a look at [GoodData Productivity Tools](https://github.com/gooddata/gooddata-productivity-tools). ## Provisioning @@ -31,23 +31,20 @@ import os from csv import DictReader from pathlib import Path -# Import the Entity Provisioner class and corresponing model from gooddata_pipelines library +# Import the Entity Provisioner class and corresponding model from gooddata_pipelines library from gooddata_pipelines import UserFullLoad, UserProvisioner +from gooddata_pipelines.logger.logger import LogObserver -# Optional: you can set up logging and subscribe it to the Provisioner -from utils.logger import setup_logging - -setup_logging() +# Optionally, subscribe a standard Python logger to the LogObserver +import logging logger = logging.getLogger(__name__) +LogObserver().subscribe(logger) # Create the Provisioner instance - you can also create the instance from a GDC yaml profile provisioner = UserProvisioner( host=os.environ["GDC_HOSTNAME"], token=os.environ["GDC_AUTH_TOKEN"] ) -# Optional: subscribe to logs -provisioner.logger.subscribe(logger) - # Load your data from your data source source_data_path: Path = Path("path/to/some.csv") source_data_reader = DictReader(source_data_path.read_text().splitlines()) @@ -60,4 +57,4 @@ full_load_data: list[UserFullLoad] = UserFullLoad.from_list_of_dicts( provisioner.full_load(full_load_data) ``` -Ready made scripts covering the basic use cases can be found here in the [GoodData Productivity Tools](https://github.com/gooddata/gooddata-productivity-tools) repository +Ready-made scripts covering the basic use cases can be found here in the [GoodData Productivity Tools](https://github.com/gooddata/gooddata-productivity-tools) repository diff --git a/gooddata-pipelines/gooddata_pipelines/backup_and_restore/models/storage.py b/gooddata-pipelines/gooddata_pipelines/backup_and_restore/models/storage.py index 64a6337a6..8ec1add43 100644 --- a/gooddata-pipelines/gooddata_pipelines/backup_and_restore/models/storage.py +++ b/gooddata-pipelines/gooddata_pipelines/backup_and_restore/models/storage.py @@ -1,7 +1,7 @@ # (C) 2025 GoodData Corporation from enum import Enum -from typing import Annotated, TypeAlias +from typing import Annotated, TypeAlias, Optional import yaml from pydantic import BaseModel, Field @@ -22,6 +22,9 @@ class S3StorageConfig(BaseModel): backup_path: str bucket: str profile: str = "default" + aws_access_key_id: Optional[str] = None + aws_secret_access_key: Optional[str] = None + aws_default_region: Optional[str] = None class LocalStorageConfig(BaseModel): diff --git a/gooddata-pipelines/gooddata_pipelines/backup_and_restore/storage/s3_storage.py b/gooddata-pipelines/gooddata_pipelines/backup_and_restore/storage/s3_storage.py index 051243e9d..2a30f40cf 100644 --- a/gooddata-pipelines/gooddata_pipelines/backup_and_restore/storage/s3_storage.py +++ b/gooddata-pipelines/gooddata_pipelines/backup_and_restore/storage/s3_storage.py @@ -21,8 +21,7 @@ def __init__(self, conf: BackupRestoreConfig): raise ValueError("S3 storage config is required") self._config = conf.storage - self._profile = self._config.profile - self._session = self._create_boto_session(self._profile) + self._session = self._create_boto_session(self._config) self._resource = self._session.resource("s3") self._bucket = self._resource.Bucket(self._config.bucket) # type: ignore [missing library stubs] suffix = "/" if not self._config.backup_path.endswith("/") else "" @@ -30,9 +29,25 @@ def __init__(self, conf: BackupRestoreConfig): self._verify_connection() - def _create_boto_session(self, profile: str) -> boto3.Session: + def _create_boto_session(self, config: S3StorageConfig) -> boto3.Session: + if config.aws_access_key_id and config.aws_secret_access_key: + if not config.aws_default_region: + self.logger.warning( + "No AWS region specified. Defaulting to us-east-1." + ) + try: + return boto3.Session( + aws_access_key_id=config.aws_access_key_id, + aws_secret_access_key=config.aws_secret_access_key, + region_name=config.aws_default_region, + ) + except Exception: + self.logger.warning( + "Failed to create boto3 session with supplied credentials. Falling back to profile..." + ) + try: - return boto3.Session(profile_name=profile) + return boto3.Session(profile_name=config.profile) except Exception: self.logger.warning( 'AWS profile "[default]" not found. Trying other fallback methods...'