Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 9 additions & 12 deletions gooddata-pipelines/README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# GoodData Pipelines

A high level library for automating the lifecycle of GoodData Cloud (GDC).
A high-level library for automating the lifecycle of GoodData Cloud (GDC).

You can use the package to manage following resoursec in GDC:
You can use the package to manage following resources in GDC:

1. Provisioning (create, update, delete)
- User profiles
Expand All @@ -14,7 +14,7 @@ You can use the package to manage following resoursec in GDC:
1. _[PLANNED]:_ Custom fields management
- extend the Logical Data Model of a child workspace

In case you are not interested in incorporating a library in your own program, but would like to use a ready-made script, consider having a look at [GoodData Productivity Tools](https://github.com/gooddata/gooddata-productivity-tools).
In case you are not interested in incorporating a library in your own program but would like to use a ready-made script, consider having a look at [GoodData Productivity Tools](https://github.com/gooddata/gooddata-productivity-tools).

## Provisioning

Expand All @@ -31,23 +31,20 @@ import os
from csv import DictReader
from pathlib import Path

# Import the Entity Provisioner class and corresponing model from gooddata_pipelines library
# Import the Entity Provisioner class and corresponding model from gooddata_pipelines library
from gooddata_pipelines import UserFullLoad, UserProvisioner
from gooddata_pipelines.logger.logger import LogObserver

# Optional: you can set up logging and subscribe it to the Provisioner
from utils.logger import setup_logging

setup_logging()
# Optionally, subscribe a standard Python logger to the LogObserver
import logging
logger = logging.getLogger(__name__)
LogObserver().subscribe(logger)

# Create the Provisioner instance - you can also create the instance from a GDC yaml profile
provisioner = UserProvisioner(
host=os.environ["GDC_HOSTNAME"], token=os.environ["GDC_AUTH_TOKEN"]
)

# Optional: subscribe to logs
provisioner.logger.subscribe(logger)

# Load your data from your data source
source_data_path: Path = Path("path/to/some.csv")
source_data_reader = DictReader(source_data_path.read_text().splitlines())
Expand All @@ -60,4 +57,4 @@ full_load_data: list[UserFullLoad] = UserFullLoad.from_list_of_dicts(
provisioner.full_load(full_load_data)
```

Ready made scripts covering the basic use cases can be found here in the [GoodData Productivity Tools](https://github.com/gooddata/gooddata-productivity-tools) repository
Ready-made scripts covering the basic use cases can be found here in the [GoodData Productivity Tools](https://github.com/gooddata/gooddata-productivity-tools) repository
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# (C) 2025 GoodData Corporation

from enum import Enum
from typing import Annotated, TypeAlias
from typing import Annotated, TypeAlias, Optional

import yaml
from pydantic import BaseModel, Field
Expand All @@ -22,6 +22,9 @@ class S3StorageConfig(BaseModel):
backup_path: str
bucket: str
profile: str = "default"
aws_access_key_id: Optional[str] = None
aws_secret_access_key: Optional[str] = None
aws_default_region: Optional[str] = None


class LocalStorageConfig(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,33 @@ def __init__(self, conf: BackupRestoreConfig):
raise ValueError("S3 storage config is required")

self._config = conf.storage
self._profile = self._config.profile
self._session = self._create_boto_session(self._profile)
self._session = self._create_boto_session(self._config)
self._resource = self._session.resource("s3")
self._bucket = self._resource.Bucket(self._config.bucket) # type: ignore [missing library stubs]
suffix = "/" if not self._config.backup_path.endswith("/") else ""
self._backup_path = self._config.backup_path + suffix

self._verify_connection()

def _create_boto_session(self, profile: str) -> boto3.Session:
def _create_boto_session(self, config: S3StorageConfig) -> boto3.Session:
if config.aws_access_key_id and config.aws_secret_access_key:
if not config.aws_default_region:
self.logger.warning(
"No AWS region specified. Defaulting to us-east-1."
)
try:
return boto3.Session(
aws_access_key_id=config.aws_access_key_id,
aws_secret_access_key=config.aws_secret_access_key,
region_name=config.aws_default_region,
)
except Exception:
self.logger.warning(
"Failed to create boto3 session with supplied credentials. Falling back to profile..."
)

try:
return boto3.Session(profile_name=profile)
return boto3.Session(profile_name=config.profile)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Profile can be None right now. What happens if everything is set to None?

except Exception:
self.logger.warning(
'AWS profile "[default]" not found. Trying other fallback methods...'
Expand Down
Loading