44import os
55import shutil
66import tempfile
7- import threading
87import time
98import traceback
10- from concurrent .futures import ThreadPoolExecutor , as_completed
119from dataclasses import dataclass
1210from pathlib import Path
1311from typing import Any , Type
3937 S3Storage ,
4038)
4139from gooddata_pipelines .logger import LogObserver
40+ from gooddata_pipelines .utils .rate_limiter import RateLimiter
4241
4342
4443@dataclass
@@ -60,6 +59,10 @@ def __init__(self, host: str, token: str, config: BackupRestoreConfig):
6059
6160 self .loader = BackupInputProcessor (self ._api , self .config .api_page_size )
6261
62+ self ._api_rate_limiter = RateLimiter (
63+ calls_per_second = self .config .api_calls_per_second ,
64+ )
65+
6366 @classmethod
6467 def create (
6568 cls : Type ["BackupManager" ],
@@ -95,11 +98,12 @@ def _get_storage(conf: BackupRestoreConfig) -> BackupStorage:
9598
9699 def get_user_data_filters (self , ws_id : str ) -> dict :
97100 """Returns the user data filters for the specified workspace."""
98- response : requests .Response = self ._api .get_user_data_filters (ws_id )
99- if response .ok :
100- return response .json ()
101- else :
102- raise RuntimeError (f"{ response .status_code } : { response .text } " )
101+ with self ._api_rate_limiter :
102+ response : requests .Response = self ._api .get_user_data_filters (ws_id )
103+ if response .ok :
104+ return response .json ()
105+ else :
106+ raise RuntimeError (f"{ response .status_code } : { response .text } " )
103107
104108 def _store_user_data_filters (
105109 self ,
@@ -144,14 +148,17 @@ def _write_to_yaml(path: str, source: Any) -> None:
144148
145149 def _get_automations_from_api (self , workspace_id : str ) -> Any :
146150 """Returns automations for the workspace as JSON."""
147- response : requests .Response = self ._api .get_automations (workspace_id )
148- if response .ok :
149- return response .json ()
150- else :
151- raise RuntimeError (
152- f"Failed to get automations for { workspace_id } . "
153- + f"{ response .status_code } : { response .text } "
151+ with self ._api_rate_limiter :
152+ response : requests .Response = self ._api .get_automations (
153+ workspace_id
154154 )
155+ if response .ok :
156+ return response .json ()
157+ else :
158+ raise RuntimeError (
159+ f"Failed to get automations for { workspace_id } . "
160+ + f"{ response .status_code } : { response .text } "
161+ )
155162
156163 def _store_automations (self , export_path : Path , workspace_id : str ) -> None :
157164 """Stores the automations in the specified export path."""
@@ -183,7 +190,8 @@ def store_declarative_filter_views(
183190 ) -> None :
184191 """Stores the filter views in the specified export path."""
185192 # Get the filter views YAML files from the API
186- self ._api .store_declarative_filter_views (workspace_id , export_path )
193+ with self ._api_rate_limiter :
194+ self ._api .store_declarative_filter_views (workspace_id , export_path )
187195
188196 # Move filter views to the subfolder containing the analytics model
189197 self ._move_folder (
@@ -231,7 +239,10 @@ def _get_workspace_export(
231239 # the SDK. That way we could save and package all the declarations
232240 # directly instead of reorganizing the folder structures. That should
233241 # be more transparent/readable and possibly safer for threading
234- self ._api .store_declarative_workspace (workspace_id , export_path )
242+ with self ._api_rate_limiter :
243+ self ._api .store_declarative_workspace (
244+ workspace_id , export_path
245+ )
235246 self .store_declarative_filter_views (export_path , workspace_id )
236247 self ._store_automations (export_path , workspace_id )
237248
@@ -291,18 +302,13 @@ def _split_to_batches(
291302 def _process_batch (
292303 self ,
293304 batch : BackupBatch ,
294- stop_event : threading .Event ,
295305 retry_count : int = 0 ,
296306 ) -> None :
297307 """Processes a single batch of workspaces for backup.
298308 If the batch processing fails, the function will wait
299309 and retry with exponential backoff up to BackupSettings.MAX_RETRIES.
300310 The base wait time is defined by BackupSettings.RETRY_DELAY.
301311 """
302- if stop_event .is_set ():
303- # If the stop_event flag is set, return. This will terminate the thread
304- return
305-
306312 try :
307313 with tempfile .TemporaryDirectory () as tmpdir :
308314 self ._get_workspace_export (tmpdir , batch .list_of_ids )
@@ -314,10 +320,7 @@ def _process_batch(
314320 self .storage .export (tmpdir , self .org_id )
315321
316322 except Exception as e :
317- if stop_event .is_set ():
318- return
319-
320- elif retry_count < BackupSettings .MAX_RETRIES :
323+ if retry_count < BackupSettings .MAX_RETRIES :
321324 # Retry with exponential backoff until MAX_RETRIES
322325 next_retry = retry_count + 1
323326 wait_time = BackupSettings .RETRY_DELAY ** next_retry
@@ -328,52 +331,23 @@ def _process_batch(
328331 )
329332
330333 time .sleep (wait_time )
331- self ._process_batch (batch , stop_event , next_retry )
334+ self ._process_batch (batch , next_retry )
332335 else :
333336 # If the batch fails after MAX_RETRIES, raise the error
334337 self .logger .error (f"Batch failed: { e .__class__ .__name__ } : { e } " )
335338 raise
336339
337- def _process_batches_in_parallel (
340+ def _process_batches (
338341 self ,
339342 batches : list [BackupBatch ],
340343 ) -> None :
341344 """
342- Processes batches in parallel using concurrent.futures. Will stop the processing
343- if any one of the batches fails .
345+ Processes batches sequentially to avoid overloading the API.
346+ If any batch fails, the processing will stop .
344347 """
345-
346- # Create a threading flag to control the threads that have already been started
347- stop_event = threading .Event ()
348-
349- with ThreadPoolExecutor (
350- max_workers = self .config .max_workers
351- ) as executor :
352- # Set the futures tasks.
353- futures = []
354- for batch in batches :
355- futures .append (
356- executor .submit (
357- self ._process_batch ,
358- batch ,
359- stop_event ,
360- )
361- )
362-
363- # Process futures as they complete
364- for future in as_completed (futures ):
365- try :
366- future .result ()
367- except Exception :
368- # On failure, set the flag to True - signal running processes to stop
369- stop_event .set ()
370-
371- # Cancel unstarted threads
372- for f in futures :
373- if not f .done ():
374- f .cancel ()
375-
376- raise
348+ for i , batch in enumerate (batches , 1 ):
349+ self .logger .info (f"Processing batch { i } /{ len (batches )} ..." )
350+ self ._process_batch (batch )
377351
378352 def backup_workspaces (
379353 self ,
@@ -440,7 +414,7 @@ def _backup(
440414 f"Exporting { len (workspaces_to_export )} workspaces in { len (batches )} batches."
441415 )
442416
443- self ._process_batches_in_parallel (batches )
417+ self ._process_batches (batches )
444418
445419 self .logger .info ("Backup completed" )
446420 except Exception as e :
0 commit comments