Skip to content

Commit 00b0a78

Browse files
committed
feat(workspace-backup): Backup to Azure blob storage
1 parent 1ee2b68 commit 00b0a78

File tree

7 files changed

+476
-10
lines changed

7 files changed

+476
-10
lines changed

.gitignore

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,16 @@ packages/gooddata-sdk/tests/catalog/translate
1212
.vscode
1313
.ruff_cache
1414

15+
# Python build artifacts
16+
.tox
17+
*.egg-info
18+
dist/
19+
build/
20+
__pycache__/
21+
*.pyc
22+
*.pyo
23+
*.pyd
24+
1525
docs/node_modules
1626
docs/public
1727
docs/resources/_gen

packages/gooddata-pipelines/README.md

Lines changed: 194 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ You can use the package to manage following resources in GDC:
1111
- User Data Filters
1212
- Child workspaces (incl. Workspace Data Filter settings)
1313
1. Backup and restore of workspaces
14-
- Create and backup snapshots of workspace metadata.
14+
- Create and backup snapshots of workspace metadata to local storage, AWS S3, or Azure Blob Storage
1515
1. LDM Extension
1616
- extend the Logical Data Model of a child workspace with custom datasets and fields
1717

@@ -34,7 +34,7 @@ import logging
3434
from csv import DictReader
3535
from pathlib import Path
3636

37-
# Import the Entity Provisioner class and corresponding model from gooddata_pipelines library
37+
# Import the Entity Provisioner class and corresponding model from the gooddata_pipelines library
3838
from gooddata_pipelines import UserFullLoad, UserProvisioner
3939

4040
# Create the Provisioner instance - you can also create the instance from a GDC yaml profile
@@ -62,12 +62,200 @@ provisioner.full_load(full_load_data)
6262

6363
```
6464

65+
Ready-made scripts covering the basic use cases can be found here in the [GoodData Productivity Tools](https://github.com/gooddata/gooddata-productivity-tools) repository.
66+
67+
## Backup and Restore of Workspaces
68+
69+
The backup and restore module allows you to create snapshots of GoodData Cloud workspaces and restore them later. This is useful for:
70+
71+
- Creating backups before major changes
72+
- Migrating workspaces between environments
73+
- Disaster recovery scenarios
74+
- Copying workspace configurations
75+
76+
### Backup
77+
78+
The module supports three backup modes:
79+
80+
1. **List of workspaces** - Backup specific workspaces by providing a list of workspace IDs
81+
2. **Workspace hierarchies** - Backup a workspace and all its direct and indirect children
82+
3. **Entire organization** - Backup all workspaces in the organization
83+
84+
Each backup includes:
85+
- Workspace declarative model (logical data model, analytics model, permissions)
86+
- User data filters
87+
- Filter views
88+
- Automations
89+
90+
#### Storage Options
91+
92+
Backups can be stored in:
93+
- **Local storage** - Save backups to a local directory
94+
- **S3 storage** - Upload backups to an AWS S3 bucket
95+
- **Azure Blob Storage** - Upload backups to Azure Blob Storage
96+
97+
#### Basic Usage
98+
99+
```python
100+
import os
101+
from pathlib import Path
102+
103+
from gooddata_pipelines import BackupManager
104+
from gooddata_pipelines.backup_and_restore.models.storage import (
105+
BackupRestoreConfig,
106+
LocalStorageConfig,
107+
StorageType,
108+
)
109+
from gooddata_pipelines.logger.logger import LogObserver
110+
111+
# Optionally, subscribe a standard Python logger to the LogObserver
112+
import logging
113+
logger = logging.getLogger(__name__)
114+
LogObserver().subscribe(logger)
115+
116+
# Configure backup storage
117+
config = BackupRestoreConfig(
118+
storage_type=StorageType.LOCAL,
119+
storage=LocalStorageConfig(),
120+
batch_size=10, # Number of workspaces to process in one batch
121+
api_calls_per_second=10, # Rate limit for API calls
122+
)
123+
124+
# Create the BackupManager instance
125+
backup_manager = BackupManager.create(
126+
config=config,
127+
host=os.environ["GDC_HOSTNAME"],
128+
token=os.environ["GDC_AUTH_TOKEN"]
129+
)
130+
131+
# Backup specific workspaces
132+
workspace_ids = ["workspace1", "workspace2", "workspace3"]
133+
backup_manager.backup_workspaces(workspace_ids=workspace_ids)
134+
135+
# Or read workspace IDs from a CSV file
136+
backup_manager.backup_workspaces(path_to_csv="workspaces.csv")
137+
138+
# Backup workspace hierarchies (workspace + all children)
139+
backup_manager.backup_hierarchies(workspace_ids=["parent_workspace"])
140+
141+
# Backup entire organization
142+
backup_manager.backup_entire_organization()
143+
```
144+
145+
#### Using S3 Storage
146+
147+
```python
148+
from gooddata_pipelines.backup_and_restore.models.storage import (
149+
BackupRestoreConfig,
150+
S3StorageConfig,
151+
StorageType,
152+
)
153+
154+
# Configure S3 storage with explicit credentials
155+
config = BackupRestoreConfig(
156+
storage_type=StorageType.S3,
157+
storage=S3StorageConfig(
158+
bucket="my-backup-bucket",
159+
backup_path="gooddata-backups/",
160+
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
161+
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
162+
aws_default_region="us-east-1"
163+
),
164+
)
165+
166+
# Or use AWS profile
167+
config = BackupRestoreConfig(
168+
storage_type=StorageType.S3,
169+
storage=S3StorageConfig(
170+
bucket="my-backup-bucket",
171+
backup_path="gooddata-backups/",
172+
profile="my-aws-profile"
173+
),
174+
)
175+
176+
backup_manager = BackupManager.create(
177+
config=config,
178+
host=os.environ["GDC_HOSTNAME"],
179+
token=os.environ["GDC_AUTH_TOKEN"]
180+
)
181+
182+
backup_manager.backup_workspaces(workspace_ids=["workspace1"])
183+
```
184+
185+
#### Using Azure Blob Storage
186+
187+
```python
188+
from gooddata_pipelines.backup_and_restore.models.storage import (
189+
BackupRestoreConfig,
190+
AzureStorageConfig,
191+
StorageType,
192+
)
193+
194+
# Configure Azure storage
195+
config = BackupRestoreConfig(
196+
storage_type=StorageType.AZURE,
197+
storage=AzureStorageConfig(
198+
account_name="mystorageaccount",
199+
container="my-backup-container",
200+
backup_path="gooddata-backups/"
201+
),
202+
)
203+
204+
backup_manager = BackupManager.create(
205+
config=config,
206+
host=os.environ["GDC_HOSTNAME"],
207+
token=os.environ["GDC_AUTH_TOKEN"]
208+
)
209+
210+
backup_manager.backup_workspaces(workspace_ids=["workspace1"])
211+
```
212+
213+
#### Using GoodData Profile
214+
215+
You can also create the BackupManager from a GoodData profile file:
216+
217+
```python
218+
from pathlib import Path
219+
220+
backup_manager = BackupManager.create_from_profile(
221+
config=config,
222+
profile="production",
223+
profiles_path=Path.home() / ".gooddata" / "profiles.yaml"
224+
)
225+
```
226+
227+
#### CSV File Format
228+
229+
When providing workspace IDs via a CSV file, the file should have a `workspace_id` column:
230+
231+
```csv
232+
workspace_id
233+
workspace1
234+
workspace2
235+
workspace3
236+
```
237+
238+
#### Configuration Options
239+
240+
The `BackupRestoreConfig` class accepts the following parameters:
241+
- `storage_type` - Type of storage (`StorageType.LOCAL`, `StorageType.S3`, or `StorageType.AZURE`)
242+
- `storage` - Storage-specific configuration (`LocalStorageConfig`, `S3StorageConfig`, or `AzureStorageConfig`)
243+
- `batch_size` (optional, default: 10) - Number of workspaces to process in one batch
244+
- `api_calls_per_second` (optional, default: 10) - Rate limit for API calls to avoid throttling
245+
- `api_page_size` (optional, default: 500) - Page size for paginated API calls
246+
247+
#### Error Handling and Retries
248+
249+
The backup process includes automatic retry logic with exponential backoff. If a batch fails, it will retry up to 3 times before failing completely. Individual workspace errors are logged but don't stop the entire backup process.
250+
251+
### Restore
252+
253+
Note: Restore functionality is currently in development.
254+
65255
## Bugs & Requests
66256

67-
Please use the [GitHub issue tracker](https://github.com/gooddata/gooddata-python-sdk/issues) to submit bugs
68-
or request features.
257+
Please use the [GitHub issue tracker](https://github.com/gooddata/gooddata-python-sdk/issues) to submit bugs or request features.
69258

70259
## Changelog
71260

72-
See [Github releases](https://github.com/gooddata/gooddata-python-sdk/releases) for released versions
73-
and a list of changes.
261+
See [GitHub releases](https://github.com/gooddata/gooddata-python-sdk/releases) for released versions and a list of changes.

packages/gooddata-pipelines/pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ dependencies = [
1414
"gooddata-sdk~=1.54.0",
1515
"boto3 (>=1.39.3,<2.0.0)",
1616
"boto3-stubs (>=1.39.3,<2.0.0)",
17+
"azure-storage-blob (>=12.19.0,<13.0.0)",
18+
"azure-identity (>=1.15.0,<2.0.0)",
1719
"types-pyyaml (>=6.0.12.20250326,<7.0.0)",
1820
]
1921

packages/gooddata-pipelines/src/gooddata_pipelines/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# -------- Backup and Restore --------
66
from .backup_and_restore.backup_manager import BackupManager
77
from .backup_and_restore.models.storage import (
8+
AzureStorageConfig,
89
BackupRestoreConfig,
910
LocalStorageConfig,
1011
S3StorageConfig,
@@ -14,6 +15,7 @@
1415
RestoreManager,
1516
WorkspaceToRestore,
1617
)
18+
from .backup_and_restore.storage.azure_storage import AzureStorage
1719
from .backup_and_restore.storage.local_storage import LocalStorage
1820
from .backup_and_restore.storage.s3_storage import S3Storage
1921

@@ -67,13 +69,15 @@
6769
"StorageType",
6870
"LocalStorage",
6971
"S3Storage",
72+
"AzureStorage",
7073
"WorkspaceFullLoad",
7174
"WorkspaceProvisioner",
7275
"UserIncrementalLoad",
7376
"UserGroupIncrementalLoad",
7477
"PermissionFullLoad",
7578
"LocalStorageConfig",
7679
"S3StorageConfig",
80+
"AzureStorageConfig",
7781
"PermissionIncrementalLoad",
7882
"UserFullLoad",
7983
"UserGroupFullLoad",

packages/gooddata-pipelines/src/gooddata_pipelines/backup_and_restore/base_manager.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from gooddata_sdk.utils import PROFILES_FILE_PATH, profile_content
88

9-
from gooddata_pipelines.api.gooddata_api_wrapper import GoodDataApi
9+
from gooddata_pipelines.api import GoodDataApi
1010
from gooddata_pipelines.backup_and_restore.models.storage import (
1111
BackupRestoreConfig,
1212
StorageType,
@@ -18,6 +18,9 @@
1818
LocalStorage,
1919
)
2020
from gooddata_pipelines.backup_and_restore.storage.s3_storage import S3Storage
21+
from gooddata_pipelines.backup_and_restore.storage.azure_storage import (
22+
AzureStorage,
23+
)
2124
from gooddata_pipelines.logger import LogObserver
2225
from gooddata_pipelines.utils.file_utils import JsonUtils, YamlUtils
2326

@@ -44,6 +47,8 @@ def _get_storage(self, conf: BackupRestoreConfig) -> BackupStorage:
4447
"""Returns the storage class based on the storage type."""
4548
if conf.storage_type == StorageType.S3:
4649
return S3Storage(conf)
50+
elif conf.storage_type == StorageType.AZURE:
51+
return AzureStorage(conf)
4752
elif conf.storage_type == StorageType.LOCAL:
4853
return LocalStorage(conf)
4954
else:

0 commit comments

Comments
 (0)