Skip to content

Commit 5102bff

Browse files
committed
feat(workspace-backup): Backup to Azure blob storage
1 parent b7e9aeb commit 5102bff

File tree

8 files changed

+311
-16
lines changed

8 files changed

+311
-16
lines changed

.gitignore

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,16 @@ packages/gooddata-sdk/tests/catalog/translate
1212
.vscode
1313
.ruff_cache
1414

15+
# Python build artifacts
16+
.tox
17+
*.egg-info
18+
dist/
19+
build/
20+
__pycache__/
21+
*.pyc
22+
*.pyo
23+
*.pyd
24+
1525
docs/node_modules
1626
docs/public
1727
docs/resources/_gen

packages/gooddata-pipelines/README.md

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ You can use the package to manage following resources in GDC:
1111
- User Data Filters
1212
- Child workspaces (incl. Workspace Data Filter settings)
1313
1. Backup and restore of workspaces
14-
- Create and backup snapshots of workspace metadata.
14+
- Create and backup snapshots of workspace metadata to local storage, AWS S3, or Azure Blob Storage
1515
1. LDM Extension
1616
- extend the Logical Data Model of a child workspace with custom datasets and fields
1717

@@ -34,7 +34,7 @@ import logging
3434
from csv import DictReader
3535
from pathlib import Path
3636

37-
# Import the Entity Provisioner class and corresponding model from gooddata_pipelines library
37+
# Import the Entity Provisioner class and corresponding model from the gooddata_pipelines library
3838
from gooddata_pipelines import UserFullLoad, UserProvisioner
3939

4040
# Create the Provisioner instance - you can also create the instance from a GDC yaml profile
@@ -62,12 +62,51 @@ provisioner.full_load(full_load_data)
6262

6363
```
6464

65+
Ready-made scripts covering the basic use cases can be found here in the [GoodData Productivity Tools](https://github.com/gooddata/gooddata-productivity-tools) repository.
66+
67+
## Backup and Restore of Workspaces
68+
69+
The backup and restore module allows you to create snapshots of GoodData Cloud workspaces and restore them later. Backups can be stored locally, in AWS S3, or Azure Blob Storage.
70+
71+
```python
72+
import os
73+
74+
from gooddata_pipelines import BackupManager
75+
from gooddata_pipelines.backup_and_restore.models.storage import (
76+
BackupRestoreConfig,
77+
LocalStorageConfig,
78+
StorageType,
79+
)
80+
81+
# Configure backup storage
82+
config = BackupRestoreConfig(
83+
storage_type=StorageType.LOCAL,
84+
storage=LocalStorageConfig(),
85+
)
86+
87+
# Create the BackupManager instance
88+
backup_manager = BackupManager.create(
89+
config=config,
90+
host=os.environ["GDC_HOSTNAME"],
91+
token=os.environ["GDC_AUTH_TOKEN"]
92+
)
93+
94+
# Backup specific workspaces
95+
backup_manager.backup_workspaces(workspace_ids=["workspace1", "workspace2"])
96+
97+
# Backup workspace hierarchies (workspace + all children)
98+
backup_manager.backup_hierarchies(workspace_ids=["parent_workspace"])
99+
100+
# Backup entire organization
101+
backup_manager.backup_entire_organization()
102+
```
103+
104+
For S3 or Azure Blob Storage, configure the appropriate storage type and credentials in `BackupRestoreConfig`.
105+
65106
## Bugs & Requests
66107

67-
Please use the [GitHub issue tracker](https://github.com/gooddata/gooddata-python-sdk/issues) to submit bugs
68-
or request features.
108+
Please use the [GitHub issue tracker](https://github.com/gooddata/gooddata-python-sdk/issues) to submit bugs or request features.
69109

70110
## Changelog
71111

72-
See [Github releases](https://github.com/gooddata/gooddata-python-sdk/releases) for released versions
73-
and a list of changes.
112+
See [GitHub releases](https://github.com/gooddata/gooddata-python-sdk/releases) for released versions and a list of changes.

packages/gooddata-pipelines/pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ dependencies = [
1414
"gooddata-sdk~=1.54.0",
1515
"boto3 (>=1.39.3,<2.0.0)",
1616
"boto3-stubs (>=1.39.3,<2.0.0)",
17+
"azure-storage-blob (>=12.19.0,<13.0.0)",
18+
"azure-identity (>=1.15.0,<2.0.0)",
1719
"types-pyyaml (>=6.0.12.20250326,<7.0.0)",
1820
]
1921

packages/gooddata-pipelines/src/gooddata_pipelines/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# -------- Backup and Restore --------
66
from .backup_and_restore.backup_manager import BackupManager
77
from .backup_and_restore.models.storage import (
8+
AzureStorageConfig,
89
BackupRestoreConfig,
910
LocalStorageConfig,
1011
S3StorageConfig,
@@ -14,6 +15,7 @@
1415
RestoreManager,
1516
WorkspaceToRestore,
1617
)
18+
from .backup_and_restore.storage.azure_storage import AzureStorage
1719
from .backup_and_restore.storage.local_storage import LocalStorage
1820
from .backup_and_restore.storage.s3_storage import S3Storage
1921

@@ -67,13 +69,15 @@
6769
"StorageType",
6870
"LocalStorage",
6971
"S3Storage",
72+
"AzureStorage",
7073
"WorkspaceFullLoad",
7174
"WorkspaceProvisioner",
7275
"UserIncrementalLoad",
7376
"UserGroupIncrementalLoad",
7477
"PermissionFullLoad",
7578
"LocalStorageConfig",
7679
"S3StorageConfig",
80+
"AzureStorageConfig",
7781
"PermissionIncrementalLoad",
7882
"UserFullLoad",
7983
"UserGroupFullLoad",

packages/gooddata-pipelines/src/gooddata_pipelines/backup_and_restore/base_manager.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from gooddata_sdk.utils import PROFILES_FILE_PATH, profile_content
88

9-
from gooddata_pipelines.api.gooddata_api_wrapper import GoodDataApi
9+
from gooddata_pipelines.api import GoodDataApi
1010
from gooddata_pipelines.backup_and_restore.models.storage import (
1111
BackupRestoreConfig,
1212
StorageType,
@@ -18,6 +18,9 @@
1818
LocalStorage,
1919
)
2020
from gooddata_pipelines.backup_and_restore.storage.s3_storage import S3Storage
21+
from gooddata_pipelines.backup_and_restore.storage.azure_storage import (
22+
AzureStorage,
23+
)
2124
from gooddata_pipelines.logger import LogObserver
2225
from gooddata_pipelines.utils.file_utils import JsonUtils, YamlUtils
2326

@@ -44,6 +47,8 @@ def _get_storage(self, conf: BackupRestoreConfig) -> BackupStorage:
4447
"""Returns the storage class based on the storage type."""
4548
if conf.storage_type == StorageType.S3:
4649
return S3Storage(conf)
50+
elif conf.storage_type == StorageType.AZURE:
51+
return AzureStorage(conf)
4752
elif conf.storage_type == StorageType.LOCAL:
4853
return LocalStorage(conf)
4954
else:

packages/gooddata-pipelines/src/gooddata_pipelines/backup_and_restore/models/storage.py

Lines changed: 105 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ class StorageType(Enum):
1414

1515
S3 = "s3"
1616
LOCAL = "local"
17+
AZURE = "azure"
1718

1819

1920
class S3StorageConfig(BaseModel):
@@ -91,6 +92,102 @@ def from_aws_profile(
9192
return cls(backup_path=backup_path, bucket=bucket, profile=profile)
9293

9394

95+
class AzureStorageConfig(BaseModel):
96+
"""Configuration for Azure Blob Storage.
97+
98+
Can be created using the following constructor methods:
99+
- `from_workload_identity`
100+
- `from_connection_string`
101+
- `from_service_principal`
102+
"""
103+
104+
backup_path: str
105+
account_name: str
106+
container: str
107+
connection_string: str | None = None
108+
client_id: str | None = None
109+
client_secret: str | None = None
110+
tenant_id: str | None = None
111+
112+
@classmethod
113+
def from_workload_identity(
114+
cls, backup_path: str, account_name: str, container: str
115+
) -> "AzureStorageConfig":
116+
"""Use Azure Workload Identity (for Kubernetes).
117+
118+
Args:
119+
backup_path: The path to the backup directory.
120+
account_name: The Azure storage account name.
121+
container: The name of the blob container.
122+
123+
Returns:
124+
AzureStorageConfig: The Azure storage configuration.
125+
"""
126+
return cls(
127+
backup_path=backup_path,
128+
account_name=account_name,
129+
container=container,
130+
)
131+
132+
@classmethod
133+
def from_connection_string(
134+
cls,
135+
backup_path: str,
136+
account_name: str,
137+
container: str,
138+
connection_string: str,
139+
) -> "AzureStorageConfig":
140+
"""Use Azure Storage connection string.
141+
142+
Args:
143+
backup_path: The path to the backup directory.
144+
account_name: The Azure storage account name.
145+
container: The name of the blob container.
146+
connection_string: The Azure Storage connection string.
147+
148+
Returns:
149+
AzureStorageConfig: The Azure storage configuration.
150+
"""
151+
return cls(
152+
backup_path=backup_path,
153+
account_name=account_name,
154+
container=container,
155+
connection_string=connection_string,
156+
)
157+
158+
@classmethod
159+
def from_service_principal(
160+
cls,
161+
backup_path: str,
162+
account_name: str,
163+
container: str,
164+
client_id: str,
165+
client_secret: str,
166+
tenant_id: str,
167+
) -> "AzureStorageConfig":
168+
"""Use Azure Service Principal credentials.
169+
170+
Args:
171+
backup_path: The path to the backup directory.
172+
account_name: The Azure storage account name.
173+
container: The name of the blob container.
174+
client_id: The Azure client ID.
175+
client_secret: The Azure client secret.
176+
tenant_id: The Azure tenant ID.
177+
178+
Returns:
179+
AzureStorageConfig: The Azure storage configuration.
180+
"""
181+
return cls(
182+
backup_path=backup_path,
183+
account_name=account_name,
184+
container=container,
185+
client_id=client_id,
186+
client_secret=client_secret,
187+
tenant_id=tenant_id,
188+
)
189+
190+
94191
class LocalStorageConfig(BaseModel):
95192
"""Placeholder for local storage config."""
96193

@@ -102,14 +199,14 @@ class BackupRestoreConfig(BaseModel):
102199
103200
Args:
104201
storage_type: The type of storage to use. Defaults to `StorageType.LOCAL`.
105-
storage: Storage configuration. Either `S3StorageConfig` or `LocalStorageConfig`. Defaults to `LocalStorageConfig()`.
202+
storage: Storage configuration. Either `S3StorageConfig`, `AzureStorageConfig`, or `LocalStorageConfig`. Defaults to `LocalStorageConfig()`.
106203
api_page_size: The page size for fetching workspace relationships. Defaults to `BackupSettings.API.PAGE_SIZE`.
107204
batch_size: The batch size for fetching workspace relationships. Defaults to `BackupSettings.API.BATCH_SIZE`.
108205
api_calls_per_second: The maximum API calls per second (rate limiting). Defaults to `BackupSettings.API.CALLS_PER_SECOND`.
109206
"""
110207

111208
storage_type: StorageType = Field(default=StorageType.LOCAL)
112-
storage: S3StorageConfig | LocalStorageConfig = Field(
209+
storage: S3StorageConfig | AzureStorageConfig | LocalStorageConfig = Field(
113210
default_factory=LocalStorageConfig
114211
)
115212
api_page_size: Annotated[
@@ -142,12 +239,17 @@ def from_yaml(cls, conf_path: str) -> "BackupRestoreConfig":
142239

143240
@model_validator(mode="after")
144241
def validate_storage(self) -> "BackupRestoreConfig":
145-
"""Check that the storage gets correct configuration when using S3 storage"""
242+
"""Check that the storage gets correct configuration"""
146243
if self.storage_type == StorageType.S3:
147244
if not isinstance(self.storage, S3StorageConfig):
148245
raise ValueError(
149246
"S3 storage must be configured with S3StorageConfig object"
150247
)
248+
elif self.storage_type == StorageType.AZURE:
249+
if not isinstance(self.storage, AzureStorageConfig):
250+
raise ValueError(
251+
"Azure storage must be configured with AzureStorageConfig object"
252+
)
151253
elif self.storage_type == StorageType.LOCAL:
152254
if not isinstance(self.storage, LocalStorageConfig):
153255
raise ValueError(

0 commit comments

Comments
 (0)