From 31aeaf1cab019ee8100252b04b3b0e5a71700ebd Mon Sep 17 00:00:00 2001
From: Anna Benke <anna.benke@gooddata.com>
Date: Wed, 13 Aug 2025 14:45:40 +0200
Subject: [PATCH] feat(gooddata-pipelines): Backup workspaces from list of
 workspace IDs

---
 .../backup_input_processor.py                 | 18 +++++++---
 .../backup_and_restore/backup_manager.py      | 33 +++++++++++++------
 2 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_input_processor.py b/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_input_processor.py
index 7b431d502..38a799f01 100644
--- a/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_input_processor.py
+++ b/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_input_processor.py
@@ -158,25 +158,33 @@ def get_all_workspaces(self) -> list[str]:
         return all_workspaces
 
     def get_ids_to_backup(
-        self, input_type: InputType, path_to_csv: str | None = None
+        self,
+        input_type: InputType,
+        path_to_csv: str | None = None,
+        workspace_ids: list[str] | None = None,
     ) -> list[str]:
         """Returns the list of workspace IDs to back up based on the input type."""
 
         if input_type in (InputType.LIST_OF_WORKSPACES, InputType.HIERARCHY):
-            if path_to_csv is None:
+            if (path_to_csv is None) == (workspace_ids is None):
                 raise ValueError(
-                    f"Path to CSV is required for this input type: {input_type.value}"
+                    f"Path to CSV and list of workspace IDs must be specified exclusively for this input type: {input_type.value}"
                 )
 
             # If we're backing up based on the list, simply read it from the CSV
+            list_of_parents = []
+            if path_to_csv is not None:
+                list_of_parents = self.csv_reader.read_backup_csv(path_to_csv)
+            if workspace_ids is not None:
+                list_of_parents = workspace_ids
+
             if input_type == InputType.LIST_OF_WORKSPACES:
-                return self.csv_reader.read_backup_csv(path_to_csv)
+                return list_of_parents
             else:
                 # For hierarchy backup, we read the CSV and treat it as a list of
                 # parent workspace IDs. Then we retrieve the children of each parent,
                 # including their children, and so on. The parent workspaces are
                 # also included in the backup.
-                list_of_parents = self.csv_reader.read_backup_csv(path_to_csv)
                 list_of_children: list[str] = []
 
                 for parent in list_of_parents:
diff --git a/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_manager.py b/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_manager.py
index 81b07c03f..0e8111ca9 100644
--- a/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_manager.py
+++ b/gooddata-pipelines/gooddata_pipelines/backup_and_restore/backup_manager.py
@@ -373,29 +373,37 @@ def process_batches_in_parallel(
 
                     raise
 
-    def backup_workspaces(self, path_to_csv: str) -> None:
+    def backup_workspaces(
+        self, path_to_csv: str | None, workspace_ids: list[str] | None
+    ) -> None:
         """Runs the backup process for a list of workspace IDs.
 
-        Will read the list of workspace IDs from a CSV file and create backup for
-        each workspace in storage specified in the configuration.
+        Will take the list of workspace IDs or read the list of
+        workspace IDs from a CSV file and create backup for each
+        workspace in storage specified in the configuration.
 
         Args:
             path_to_csv (str): Path to a CSV file containing a list of workspace IDs.
+            workspace_ids (list[str]): List of workspace IDs
         """
-        self.backup(InputType.LIST_OF_WORKSPACES, path_to_csv)
+        self.backup(InputType.LIST_OF_WORKSPACES, path_to_csv, workspace_ids)
 
-    def backup_hierarchies(self, path_to_csv: str) -> None:
+    def backup_hierarchies(
+        self, path_to_csv: str | None, workspace_ids: list[str] | None
+    ) -> None:
         """Runs the backup process for a list of hierarchies.
 
-        Will read the list of workspace IDs from a CSV file and create backup for
-        each those workspaces' hierarchies in storage specified in the configuration.
+        Will take the list of workspace IDs or read the list of workspace IDs
+        from a CSV file and create backup for each those workspaces' hierarchies
+        in storage specified in the configuration.
         Workspace hierarchy means the workspace itself and all its direct and
         indirect children.
 
         Args:
             path_to_csv (str): Path to a CSV file containing a list of workspace IDs.
+            workspace_ids (list[str]): List of workspace IDs
         """
-        self.backup(InputType.HIERARCHY, path_to_csv)
+        self.backup(InputType.HIERARCHY, path_to_csv, workspace_ids)
 
     def backup_entire_organization(self) -> None:
         """Runs the backup process for the entire organization.
@@ -406,12 +414,17 @@ def backup_entire_organization(self) -> None:
         self.backup(InputType.ORGANIZATION)
 
     def backup(
-        self, input_type: InputType, path_to_csv: str | None = None
+        self,
+        input_type: InputType,
+        path_to_csv: str | None = None,
+        workspace_ids: list[str] | None = None,
     ) -> None:
         """Runs the backup process with selected input type."""
         try:
             workspaces_to_export: list[str] = self.loader.get_ids_to_backup(
-                input_type, path_to_csv
+                input_type,
+                path_to_csv,
+                workspace_ids,
             )
             batches = self.split_to_batches(
                 workspaces_to_export, self.config.batch_size