Skip to content

Commit cc98d30

Browse files
emmatypingsavannahostrowski
authored andcommitted
Add hash checking
1 parent 0034f14 commit cc98d30

File tree

1 file changed

+30
-25
lines changed

1 file changed

+30
-25
lines changed

PCbuild/get_external.py

Lines changed: 30 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
import argparse
44
import contextlib
5+
import hashlib
56
import io
7+
import json
68
import os
79
import pathlib
810
import shutil
@@ -15,17 +17,16 @@
1517

1618
# Mapping of binary dependency tag to GitHub release asset ID
1719
TAG_TO_ASSET_ID = {
18-
"libffi-3.4.4": 280027073,
19-
"openssl-bin-3.0.16.2": 280041244,
20-
"tcltk-8.6.15.0": 280042163,
21-
"nasm-2.11.06": 280042740,
22-
"llvm-19.1.7.0": 280052497,
20+
'libffi-3.4.4': 280027073,
21+
'openssl-bin-3.0.16.2': 280041244,
22+
'tcltk-8.6.15.0': 280042163,
23+
'nasm-2.11.06': 280042740,
24+
'llvm-19.1.7.0': 280052497,
2325
}
2426

2527

26-
def request_with_retry(
27-
request_func, *args, max_retries=7, err_msg="Request failed.", **kwargs,
28-
):
28+
def request_with_retry(request_func, *args, max_retries=7,
29+
err_msg='Request failed.', **kwargs):
2930
"""Make a request using request_func with exponential backoff"""
3031
for attempt in range(max_retries + 1):
3132
try:
@@ -45,19 +46,16 @@ def retrieve_with_retries(download_location, output_path, reporthook):
4546
download_location,
4647
output_path,
4748
reporthook,
48-
err_msg=f"Download from {download_location} failed.",
49+
err_msg=f'Download from {download_location} failed.',
4950
)
5051

5152

5253
def get_with_retries(url, headers):
53-
req = urllib.request.Request(
54-
url=url,
55-
headers=headers,
56-
method="GET",
57-
)
54+
req = urllib.request.Request(url=url, headers=headers, method='GET')
5855
return request_with_retry(
59-
urllib.request.urlopen, req, err_msg=f"Request to {url} failed.",
60-
timeout=30,
56+
urllib.request.urlopen,
57+
req,
58+
err_msg=f'Request to {url} failed.'
6159
)
6260

6361

@@ -79,29 +77,36 @@ def fetch_release_asset(asset_id, output_path, org):
7977
"""Download a GitHub release asset.
8078
8179
Release assets need the Content-Type header set to
82-
application/octet-stream, so we can't use urlretrieve. Code here is
83-
based on urlretrieve
80+
application/octet-stream to download the binary, so we can't use
81+
urlretrieve. Code here is based on urlretrieve
8482
"""
85-
# TODO: digest/shasum checking
86-
url = f"https://api.github.com/repos/{org}/cpython-bin-deps/releases/assets/{asset_id}"
83+
url = f'https://api.github.com/repos/{org}/cpython-bin-deps/releases/assets/{asset_id}'
84+
rest = get_with_retries(url,
85+
headers={'Accept': 'application/vnd.github+json'})
86+
json_data = json.loads(rest.read())
87+
hash_info = json_data['digest']
88+
algorithm, hashsum = hash_info.split(':')
89+
if algorithm != 'sha256':
90+
raise RuntimeError(f'Unknown hash algorithm {algorithm} for asset {asset_id}')
8791
with contextlib.closing(
88-
get_with_retries(url, headers={"Accept": "application/octet-stream"})
92+
get_with_retries(url, headers={'Accept': 'application/octet-stream'})
8993
) as resp:
90-
headers = resp.info()
91-
if resp.status != 200:
92-
raise RuntimeError("Failed to download asset")
9394
read = 0
95+
hasher = hashlib.sha256()
9496
with open(output_path, 'wb') as fp:
9597
while block := resp.read(io.DEFAULT_BUFFER_SIZE):
98+
hasher.update(block)
9699
read += len(block)
97100
fp.write(block)
101+
if hasher.hexdigest() != hashsum:
102+
raise RuntimeError('Downloaded content hash did not match!')
98103

99104

100105
def fetch_release(tag, tarball_dir, *, org='python'):
101106
tarball_dir.mkdir(exist_ok=True)
102107
asset_id = TAG_TO_ASSET_ID.get(tag)
103108
if asset_id is None:
104-
raise ValueError(f"Unknown tag for binary dependencies {tag}")
109+
raise ValueError(f'Unknown tag for binary dependencies {tag}')
105110
output_path = tarball_dir / f'{tag}.tar.xz'
106111
fetch_release_asset(asset_id, output_path, org)
107112
return output_path

0 commit comments

Comments
 (0)