Skip to content

Commit 0034f14

Browse files
emmatypingsavannahostrowski
authored andcommitted
Download binaries from GitHub releases
With LLVM 20, individual files are greater than the 100MiB single file limit for items checked into git. Therefore, this PR pulls down binaries from GitHub releases, as `.tar.xz` files to additionally maximize compression ratio. Currently this is somewhat of a first draft, as there are things like hash checking needed to be done.
1 parent 84781b4 commit 0034f14

File tree

1 file changed

+97
-21
lines changed

1 file changed

+97
-21
lines changed

PCbuild/get_external.py

Lines changed: 97 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,68 @@
11
#!/usr/bin/env python3
22

33
import argparse
4+
import contextlib
5+
import io
46
import os
57
import pathlib
8+
import shutil
69
import sys
710
import time
811
import urllib.error
912
import urllib.request
1013
import zipfile
1114

1215

13-
def retrieve_with_retries(download_location, output_path, reporthook,
14-
max_retries=7):
15-
"""Download a file with exponential backoff retry and save to disk."""
16+
# Mapping of binary dependency tag to GitHub release asset ID
17+
TAG_TO_ASSET_ID = {
18+
"libffi-3.4.4": 280027073,
19+
"openssl-bin-3.0.16.2": 280041244,
20+
"tcltk-8.6.15.0": 280042163,
21+
"nasm-2.11.06": 280042740,
22+
"llvm-19.1.7.0": 280052497,
23+
}
24+
25+
26+
def request_with_retry(
27+
request_func, *args, max_retries=7, err_msg="Request failed.", **kwargs,
28+
):
29+
"""Make a request using request_func with exponential backoff"""
1630
for attempt in range(max_retries + 1):
1731
try:
18-
resp = urllib.request.urlretrieve(
19-
download_location,
20-
output_path,
21-
reporthook=reporthook,
22-
)
32+
resp = request_func(*args, **kwargs)
2333
except (urllib.error.URLError, ConnectionError) as ex:
2434
if attempt == max_retries:
25-
msg = f"Download from {download_location} failed."
26-
raise OSError(msg) from ex
35+
raise OSError(err_msg) from ex
2736
time.sleep(2.25**attempt)
2837
else:
2938
return resp
3039

3140

32-
def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
33-
repo = f'cpython-{"bin" if binary else "source"}-deps'
34-
url = f'https://github.com/{org}/{repo}/archive/{commit_hash}.zip'
41+
def retrieve_with_retries(download_location, output_path, reporthook):
42+
"""Download a file with retries."""
43+
return request_with_retry(
44+
urllib.request.urlretrieve,
45+
download_location,
46+
output_path,
47+
reporthook,
48+
err_msg=f"Download from {download_location} failed.",
49+
)
50+
51+
52+
def get_with_retries(url, headers):
53+
req = urllib.request.Request(
54+
url=url,
55+
headers=headers,
56+
method="GET",
57+
)
58+
return request_with_retry(
59+
urllib.request.urlopen, req, err_msg=f"Request to {url} failed.",
60+
timeout=30,
61+
)
62+
63+
64+
def fetch_zip(commit_hash, zip_dir, *, org='python', verbose):
65+
url = f'https://github.com/{org}/cpython-source-deps/archive/{commit_hash}.zip'
3566
reporthook = None
3667
if verbose:
3768
reporthook = print
@@ -44,6 +75,44 @@ def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
4475
return filename
4576

4677

78+
def fetch_release_asset(asset_id, output_path, org):
79+
"""Download a GitHub release asset.
80+
81+
Release assets need the Content-Type header set to
82+
application/octet-stream, so we can't use urlretrieve. Code here is
83+
based on urlretrieve
84+
"""
85+
# TODO: digest/shasum checking
86+
url = f"https://api.github.com/repos/{org}/cpython-bin-deps/releases/assets/{asset_id}"
87+
with contextlib.closing(
88+
get_with_retries(url, headers={"Accept": "application/octet-stream"})
89+
) as resp:
90+
headers = resp.info()
91+
if resp.status != 200:
92+
raise RuntimeError("Failed to download asset")
93+
read = 0
94+
with open(output_path, 'wb') as fp:
95+
while block := resp.read(io.DEFAULT_BUFFER_SIZE):
96+
read += len(block)
97+
fp.write(block)
98+
99+
100+
def fetch_release(tag, tarball_dir, *, org='python'):
101+
tarball_dir.mkdir(exist_ok=True)
102+
asset_id = TAG_TO_ASSET_ID.get(tag)
103+
if asset_id is None:
104+
raise ValueError(f"Unknown tag for binary dependencies {tag}")
105+
output_path = tarball_dir / f'{tag}.tar.xz'
106+
fetch_release_asset(asset_id, output_path, org)
107+
return output_path
108+
109+
110+
def extract_tarball(externals_dir, tarball_path, tag):
111+
output_path = externals_dir / tag
112+
shutil.unpack_archive(os.fspath(tarball_path), os.fspath(output_path))
113+
return output_path
114+
115+
47116
def extract_zip(externals_dir, zip_path):
48117
with zipfile.ZipFile(os.fspath(zip_path)) as zf:
49118
zf.extractall(os.fspath(externals_dir))
@@ -67,15 +136,22 @@ def parse_args():
67136

68137
def main():
69138
args = parse_args()
70-
zip_path = fetch_zip(
71-
args.tag,
72-
args.externals_dir / 'zips',
73-
org=args.organization,
74-
binary=args.binary,
75-
verbose=args.verbose,
76-
)
139+
if args.binary:
140+
tarball_path = fetch_release(
141+
args.tag,
142+
args.externals_dir / 'tarballs',
143+
org=args.organization,
144+
)
145+
extracted = extract_tarball(args.externals_dir, tarball_path, args.tag)
146+
else:
147+
zip_path = fetch_zip(
148+
args.tag,
149+
args.externals_dir / 'zips',
150+
org=args.organization,
151+
verbose=args.verbose,
152+
)
153+
extracted = extract_zip(args.externals_dir, zip_path)
77154
final_name = args.externals_dir / args.tag
78-
extracted = extract_zip(args.externals_dir, zip_path)
79155
for wait in [1, 2, 3, 5, 8, 0]:
80156
try:
81157
extracted.replace(final_name)

0 commit comments

Comments
 (0)