diff --git a/libcflib/preloader.py b/libcflib/preloader.py index d48e0fc..398b7a3 100644 --- a/libcflib/preloader.py +++ b/libcflib/preloader.py @@ -123,10 +123,10 @@ def reap_package(root_path, package, dst_path, src_url, progress_callback=None): return harvested_data -def reap(path, known_bad_packages=()): +def reap(path, known_bad_packages=(), n_pkgs=1000): sorted_files = list(diff(path)) print(f"TOTAL OUTSTANDING ARTIFACTS: {len(sorted_files)}") - sorted_files = sorted_files[:1000] + sorted_files = sorted_files[:n_pkgs] progress = tqdm.tqdm(total=len(sorted_files)) with ThreadPoolExecutor(max_workers=20) as pool: @@ -160,6 +160,11 @@ def reap(path, known_bad_packages=()): "--known-bad-packages", help="name of a json file containing a list of urls to be skipped", ) + parser.add_argument( + "--n_pkgs", + help="number of pkgs to run the preloader on", + default=100 + ) args = parser.parse_args() print(args) @@ -169,4 +174,4 @@ def reap(path, known_bad_packages=()): else: known_bad_packages = set() - reap(args.root_path, known_bad_packages) + reap(args.root_path, known_bad_packages, n_pkgs=args.n_pkgs)