From f8e8b60ae9809b20ae4abfb494010458aefd70a5 Mon Sep 17 00:00:00 2001 From: "Christopher J. Wright" Date: Wed, 3 Feb 2021 18:43:39 -0500 Subject: [PATCH 1/2] n_pkgs arg and default to 100 --- libcflib/preloader.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/libcflib/preloader.py b/libcflib/preloader.py index d48e0fc..0b1afd5 100644 --- a/libcflib/preloader.py +++ b/libcflib/preloader.py @@ -123,10 +123,10 @@ def reap_package(root_path, package, dst_path, src_url, progress_callback=None): return harvested_data -def reap(path, known_bad_packages=()): +def reap(path, known_bad_packages=(), n_pkgs=1000): sorted_files = list(diff(path)) print(f"TOTAL OUTSTANDING ARTIFACTS: {len(sorted_files)}") - sorted_files = sorted_files[:1000] + sorted_files = sorted_files[:n_pkgs] progress = tqdm.tqdm(total=len(sorted_files)) with ThreadPoolExecutor(max_workers=20) as pool: @@ -160,6 +160,11 @@ def reap(path, known_bad_packages=()): "--known-bad-packages", help="name of a json file containing a list of urls to be skipped", ) + parser.add_argument( + "--n_pkgs", + help="number of pkgs to run the preloader on", + default=100 + ) args = parser.parse_args() print(args) @@ -169,4 +174,4 @@ def reap(path, known_bad_packages=()): else: known_bad_packages = set() - reap(args.root_path, known_bad_packages) + reap(args.root_path, known_bad_packages, n_pkgs=parser.n_pkgs) From 17dfed160b92ea171f18bad2ad58b6472fcd45b6 Mon Sep 17 00:00:00 2001 From: "Christopher J. Wright" Date: Wed, 3 Feb 2021 19:04:40 -0500 Subject: [PATCH 2/2] pull from args not from parser --- libcflib/preloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcflib/preloader.py b/libcflib/preloader.py index 0b1afd5..398b7a3 100644 --- a/libcflib/preloader.py +++ b/libcflib/preloader.py @@ -174,4 +174,4 @@ def reap(path, known_bad_packages=(), n_pkgs=1000): else: known_bad_packages = set() - reap(args.root_path, known_bad_packages, n_pkgs=parser.n_pkgs) + reap(args.root_path, known_bad_packages, n_pkgs=args.n_pkgs)