feat(gooddata-pipelines): Backup workspaces from list of workspace IDs

benkeanna · benkeanna · commit 4ace2db12966 · 2025-08-13T14:45:40.000+02:00
diff --git a/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_input_processor.py b/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_input_processor.py
@@ -158,25 +158,40 @@ def get_all_workspaces(self) -> list[str]:
         return all_workspaces
 
     def get_ids_to_backup(
-        self, input_type: InputType, path_to_csv: str | None = None
+        self,
+        input_type: InputType,
+        path_to_csv: str | None = None,
+        workspace_ids: list[str] | None = None,
     ) -> list[str]:
         """Returns the list of workspace IDs to back up based on the input type."""
 
         if input_type in (InputType.LIST_OF_WORKSPACES, InputType.HIERARCHY):
-            if path_to_csv is None:
+            if path_to_csv is None and workspace_ids is None:
                 raise ValueError(
-                    f"Path to CSV is required for this input type: {input_type.value}"
+                    f"Path to CSV or list of workspace IDs is required for this input type: {input_type.value}"
+                )
+            elif path_to_csv is not None and workspace_ids is not None:
+                raise ValueError(
+                    f"Path to CSV and list of workspace IDs are mutually exclusive for this input type: {input_type.value}"
                 )
 
             # If we're backing up based on the list, simply read it from the CSV
             if input_type == InputType.LIST_OF_WORKSPACES:
-                return self.csv_reader.read_backup_csv(path_to_csv)
+                if path_to_csv:
+                    return self.csv_reader.read_backup_csv(path_to_csv)
+                else:
+                    return workspace_ids
             else:
                 # For hierarchy backup, we read the CSV and treat it as a list of
                 # parent workspace IDs. Then we retrieve the children of each parent,
                 # including their children, and so on. The parent workspaces are
                 # also included in the backup.
-                list_of_parents = self.csv_reader.read_backup_csv(path_to_csv)
+                if path_to_csv:
+                    list_of_parents = self.csv_reader.read_backup_csv(
+                        path_to_csv
+                    )
+                else:
+                    list_of_parents = workspace_ids
                 list_of_children: list[str] = []
 
                 for parent in list_of_parents:
diff --git a/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_manager.py b/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_manager.py
@@ -373,29 +373,37 @@ def process_batches_in_parallel(
 
                     raise
 
-    def backup_workspaces(self, path_to_csv: str) -> None:
+    def backup_workspaces(
+        self, path_to_csv: str | None, workspace_ids: list[str] | None
+    ) -> None:
         """Runs the backup process for a list of workspace IDs.
 
-        Will read the list of workspace IDs from a CSV file and create backup for
-        each workspace in storage specified in the configuration.
+        Will take the list of workspace IDs or read the list of
+        workspace IDs from a CSV file and create backup for each
+        workspace in storage specified in the configuration.
 
         Args:
             path_to_csv (str): Path to a CSV file containing a list of workspace IDs.
+            workspace_ids (list[str]): List of workspace IDs
         """
-        self.backup(InputType.LIST_OF_WORKSPACES, path_to_csv)
+        self.backup(InputType.LIST_OF_WORKSPACES, path_to_csv, workspace_ids)
 
-    def backup_hierarchies(self, path_to_csv: str) -> None:
+    def backup_hierarchies(
+        self, path_to_csv: str | None, workspace_ids: list[str] | None
+    ) -> None:
         """Runs the backup process for a list of hierarchies.
 
-        Will read the list of workspace IDs from a CSV file and create backup for
-        each those workspaces' hierarchies in storage specified in the configuration.
+        Will take the list of workspace IDs or read the list of workspace IDs
+        from a CSV file and create backup for each those workspaces' hierarchies
+        in storage specified in the configuration.
         Workspace hierarchy means the workspace itself and all its direct and
         indirect children.
 
         Args:
             path_to_csv (str): Path to a CSV file containing a list of workspace IDs.
+            workspace_ids (list[str]): List of workspace IDs
         """
-        self.backup(InputType.HIERARCHY, path_to_csv)
+        self.backup(InputType.HIERARCHY, path_to_csv, workspace_ids)
 
     def backup_entire_organization(self) -> None:
         """Runs the backup process for the entire organization.
@@ -406,12 +414,17 @@ def backup_entire_organization(self) -> None:
         self.backup(InputType.ORGANIZATION)
 
     def backup(
-        self, input_type: InputType, path_to_csv: str | None = None
+        self,
+        input_type: InputType,
+        path_to_csv: str | None = None,
+        workspace_ids: list[str] | None = None,
     ) -> None:
         """Runs the backup process with selected input type."""
         try:
             workspaces_to_export: list[str] = self.loader.get_ids_to_backup(
-                input_type, path_to_csv
+                input_type,
+                path_to_csv,
+                workspace_ids,
             )
             batches = self.split_to_batches(
                 workspaces_to_export, self.config.batch_size