@@ -55,8 +55,9 @@ def get_with_retries(url, headers):
5555 )
5656
5757
58- def fetch_zip (commit_hash , zip_dir , * , org = 'python' , verbose ):
59- url = f'https://github.com/{ org } /cpython-source-deps/archive/{ commit_hash } .zip'
58+ def fetch_zip (commit_hash , zip_dir , * , org = 'python' , binary = False , verbose = False ):
59+ repo = 'cpython-bin-deps' if binary else 'cpython-source-deps'
60+ url = f'https://github.com/{ org } /{ repo } /archive/{ commit_hash } .zip'
6061 reporthook = None
6162 if verbose :
6263 reporthook = print
@@ -69,42 +70,48 @@ def fetch_zip(commit_hash, zip_dir, *, org='python', verbose):
6970 return filename
7071
7172
72- def fetch_release_asset (asset_id , output_path , org ):
73+ def fetch_release_asset (asset_id , output_path , org , verbose = False ):
7374 """Download a GitHub release asset.
7475
7576 Release assets need the Content-Type header set to
7677 application/octet-stream to download the binary, so we can't use
77- urlretrieve. Code here is based on urlretrieve
78+ urlretrieve. Code here is based on urlretrieve.
7879 """
7980 url = f'https://api.github.com/repos/{ org } /cpython-bin-deps/releases/assets/{ asset_id } '
80- rest = get_with_retries (url ,
81- headers = {'Accept' : 'application/vnd.github+json' })
82- json_data = json .loads (rest .read ())
83- hash_info = json_data ['digest' ]
81+ if verbose :
82+ print (f'Fetching metadata for asset { asset_id } ...' )
83+ metadata_resp = get_with_retries (url ,
84+ headers = {'Accept' : 'application/vnd.github+json' })
85+ json_data = json .loads (metadata_resp .read ())
86+ hash_info = json_data .get ('digest' )
87+ if not hash_info :
88+ raise RuntimeError (f'Release asset { asset_id } missing digest field in metadata' )
8489 algorithm , hashsum = hash_info .split (':' )
8590 if algorithm != 'sha256' :
8691 raise RuntimeError (f'Unknown hash algorithm { algorithm } for asset { asset_id } ' )
92+ if verbose :
93+ print (f'Downloading asset { asset_id } ...' )
8794 with contextlib .closing (
8895 get_with_retries (url , headers = {'Accept' : 'application/octet-stream' })
8996 ) as resp :
90- read = 0
9197 hasher = hashlib .sha256 ()
9298 with open (output_path , 'wb' ) as fp :
9399 while block := resp .read (io .DEFAULT_BUFFER_SIZE ):
94100 hasher .update (block )
95- read += len (block )
96101 fp .write (block )
97102 if hasher .hexdigest () != hashsum :
98103 raise RuntimeError ('Downloaded content hash did not match!' )
104+ if verbose :
105+ print (f'Successfully downloaded and verified { output_path } ' )
99106
100107
101- def fetch_release (tag , tarball_dir , * , org = 'python' ):
102- tarball_dir .mkdir (exist_ok = True )
108+ def fetch_release (tag , tarball_dir , * , org = 'python' , verbose = False ):
109+ tarball_dir .mkdir (parents = True , exist_ok = True )
103110 asset_id = TAG_TO_ASSET_ID .get (tag )
104111 if asset_id is None :
105112 raise ValueError (f'Unknown tag for binary dependencies { tag } ' )
106113 output_path = tarball_dir / f'{ tag } .tar.xz'
107- fetch_release_asset (asset_id , output_path , org )
114+ fetch_release_asset (asset_id , output_path , org , verbose )
108115 return output_path
109116
110117
@@ -137,22 +144,36 @@ def parse_args():
137144
138145def main ():
139146 args = parse_args ()
140- if args .binary :
147+ final_name = args .externals_dir / args .tag
148+
149+ # Check if the dependency already exists in externals/ directory
150+ # (either already downloaded/extracted, or checked into the git tree)
151+ if final_name .exists ():
152+ if args .verbose :
153+ print (f'{ args .tag } already exists at { final_name } , skipping download.' )
154+ return
155+
156+ # Determine download method: release artifacts for large deps (like LLVM),
157+ # otherwise zip download from GitHub branches
158+ if args .tag in TAG_TO_ASSET_ID :
141159 tarball_path = fetch_release (
142160 args .tag ,
143161 args .externals_dir / 'tarballs' ,
144162 org = args .organization ,
163+ verbose = args .verbose ,
145164 )
146165 extracted = extract_tarball (args .externals_dir , tarball_path , args .tag )
147166 else :
167+ # Use zip download from GitHub branches
168+ # (cpython-bin-deps if --binary, cpython-source-deps otherwise)
148169 zip_path = fetch_zip (
149170 args .tag ,
150171 args .externals_dir / 'zips' ,
151172 org = args .organization ,
173+ binary = args .binary ,
152174 verbose = args .verbose ,
153175 )
154176 extracted = extract_zip (args .externals_dir , zip_path )
155- final_name = args .externals_dir / args .tag
156177 for wait in [1 , 2 , 3 , 5 , 8 , 0 ]:
157178 try :
158179 extracted .replace (final_name )
0 commit comments