Skip to content

Commit 45918e7

Browse files
authored
Patch to remove tests from wheels (#20653)
* wip * remove changes * remove tests * remove funcions * patch * remove imports for patch * patch * print in patch * fix typo * repack wheels * fix * add wheel library * comment patch * after repair * patch * uncomment patch call * skip unchanged * skip unchanged * skip unchanged * debug * debug * debug * after repair * after repair * fix * classify wheels * fix * classify wheels * remove import * patch * patch fix * fix * remove quotes * replace quote * fix quotes in patch * remove patch * remove patch * simplify utils and .toml * toml * toml * toml * typo * change toml format to gitignore patterns * publish wheels for testing * test * rename wheels * uncomment * remove built extra index * remove built flag * upload wheels * Uncomment condition to publish wheels * Fix typo * Fix conflicts * Add formatting * Remove unnecessary continues
1 parent 2dcc658 commit 45918e7

File tree

6 files changed

+352
-182
lines changed

6 files changed

+352
-182
lines changed

.builders/images/runner_dependencies.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,5 @@ urllib3==2.2.0
33
auditwheel==6.0.0; sys_platform == 'linux'
44
delvewheel==1.5.2; sys_platform == 'win32'
55
delocate==0.13.0; sys_platform == 'darwin'
6+
wheel==0.45.1
7+
pathspec==0.12.1

.builders/scripts/build_wheels.py

Lines changed: 206 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,40 @@
11
from __future__ import annotations
22

33
import argparse
4+
import email
45
import json
56
import os
7+
import re
8+
import shutil
69
import subprocess
710
import sys
11+
import time
12+
import tomllib
13+
from functools import cache
14+
from hashlib import sha256
815
from pathlib import Path
916
from tempfile import TemporaryDirectory
1017
from typing import TypedDict
1118
from zipfile import ZipFile
1219

20+
import pathspec
21+
import urllib3
1322
from dotenv import dotenv_values
14-
from utils import extract_metadata, normalize_project_name
23+
from utils import iter_wheels
24+
from wheel.cli.pack import pack
25+
from wheel.cli.unpack import unpack
1526

1627
INDEX_BASE_URL = 'https://agent-int-packages.datadoghq.com'
1728
CUSTOM_EXTERNAL_INDEX = f'{INDEX_BASE_URL}/external'
1829
CUSTOM_BUILT_INDEX = f'{INDEX_BASE_URL}/built'
30+
UNNORMALIZED_PROJECT_NAME_CHARS = re.compile(r'[-_.]+')
31+
1932

2033
class WheelSizes(TypedDict):
2134
compressed: int
2235
uncompressed: int
2336

37+
2438
if sys.platform == 'win32':
2539
PY3_PATH = Path('C:\\py3\\Scripts\\python.exe')
2640
PY2_PATH = Path('C:\\py2\\Scripts\\python.exe')
@@ -62,6 +76,161 @@ def check_process(*args, **kwargs) -> subprocess.CompletedProcess:
6276
return process
6377

6478

79+
def extract_metadata(wheel: Path) -> email.Message:
80+
with ZipFile(str(wheel)) as zip_archive:
81+
for path in zip_archive.namelist():
82+
root = path.split('/', 1)[0]
83+
if root.endswith('.dist-info'):
84+
dist_info_dir = root
85+
break
86+
else:
87+
message = f'Could not find the `.dist-info` directory in wheel: {wheel.name}'
88+
raise RuntimeError(message)
89+
90+
try:
91+
with zip_archive.open(f'{dist_info_dir}/METADATA') as zip_file:
92+
metadata_file_contents = zip_file.read().decode('utf-8')
93+
except KeyError:
94+
message = f'Could not find a `METADATA` file in the `{dist_info_dir}` directory'
95+
raise RuntimeError(message) from None
96+
97+
return email.message_from_string(metadata_file_contents)
98+
99+
100+
def normalize_project_name(name: str) -> str:
101+
# https://peps.python.org/pep-0503/#normalized-names
102+
return UNNORMALIZED_PROJECT_NAME_CHARS.sub('-', name).lower()
103+
104+
105+
@cache
106+
def get_wheel_hashes(project) -> dict[str, str]:
107+
retry_wait = 2
108+
while True:
109+
try:
110+
response = urllib3.request(
111+
'GET',
112+
f'https://pypi.org/simple/{project}',
113+
headers={"Accept": "application/vnd.pypi.simple.v1+json"},
114+
)
115+
except urllib3.exceptions.HTTPError as e:
116+
err_msg = f'Failed to fetch hashes for `{project}`: {e}'
117+
else:
118+
if response.status == 200:
119+
break
120+
121+
err_msg = f'Failed to fetch hashes for `{project}`, status code: {response.status}'
122+
123+
print(err_msg)
124+
print(f'Retrying in {retry_wait} seconds')
125+
time.sleep(retry_wait)
126+
retry_wait *= 2
127+
continue
128+
129+
data = response.json()
130+
return {
131+
file['filename']: file['hashes']['sha256']
132+
for file in data['files']
133+
if file['filename'].endswith('.whl') and 'sha256' in file['hashes']
134+
}
135+
136+
137+
def wheel_was_built(wheel: Path) -> bool:
138+
project_metadata = extract_metadata(wheel)
139+
project_name = normalize_project_name(project_metadata['Name'])
140+
wheel_hashes = get_wheel_hashes(project_name)
141+
if wheel.name not in wheel_hashes:
142+
return True
143+
144+
file_hash = sha256(wheel.read_bytes()).hexdigest()
145+
return file_hash != wheel_hashes[wheel.name]
146+
147+
148+
def remove_test_files(wheel_path: Path) -> bool:
149+
'''
150+
Unpack the wheel, remove excluded test files, then repack it to rebuild RECORD correctly.
151+
'''
152+
# First, check whether the wheel contains any files that should be excluded. If not, leave it untouched.
153+
with ZipFile(wheel_path, 'r') as zf:
154+
excluded_members = [name for name in zf.namelist() if is_excluded_from_wheel(name)]
155+
156+
if not excluded_members:
157+
# Nothing to strip, so skip rewriting the wheel
158+
return False
159+
with TemporaryDirectory() as td:
160+
td_path = Path(td)
161+
162+
# Unpack the wheel into temp dir
163+
unpack(wheel_path, dest=td_path)
164+
unpacked_dir = next(td_path.iterdir())
165+
# Remove excluded files/folders
166+
for root, dirs, files in os.walk(td, topdown=False):
167+
for d in list(dirs):
168+
full_dir = Path(root) / d
169+
rel = full_dir.relative_to(unpacked_dir).as_posix()
170+
if is_excluded_from_wheel(rel):
171+
shutil.rmtree(full_dir)
172+
dirs.remove(d)
173+
for f in files:
174+
rel = Path(root).joinpath(f).relative_to(unpacked_dir).as_posix()
175+
if is_excluded_from_wheel(rel):
176+
os.remove(Path(root) / f)
177+
178+
print(f'Tests removed from {wheel_path.name}')
179+
180+
dest_dir = wheel_path.parent
181+
before = {p.resolve() for p in dest_dir.glob("*.whl")}
182+
# Repack to same directory, regenerating RECORD
183+
pack(unpacked_dir, dest_dir=dest_dir, build_number=None)
184+
185+
# The wheel might not be platform-specific, so repacking restores its original name.
186+
# We need to move the repacked wheel to wheel_path, which was changed to be platform-specific.
187+
after = {p.resolve() for p in wheel_path.parent.glob("*.whl")}
188+
new_files = sorted(after - before, key=lambda p: p.stat().st_mtime, reverse=True)
189+
190+
if new_files:
191+
shutil.move(str(new_files[0]), str(wheel_path))
192+
193+
return True
194+
195+
196+
@cache
197+
def _load_excluded_spec() -> pathspec.PathSpec:
198+
"""
199+
Load excluded paths from files_to_remove.toml and compile them
200+
with .gitignore-style semantics.
201+
"""
202+
config_path = Path(__file__).parent / "files_to_remove.toml"
203+
with open(config_path, "rb") as f:
204+
config = tomllib.load(f)
205+
206+
patterns = config.get("excluded_paths", [])
207+
return pathspec.PathSpec.from_lines("gitignore", patterns)
208+
209+
210+
def is_excluded_from_wheel(path: str | Path) -> bool:
211+
"""
212+
Return True if `path` (file or directory) should be excluded per files_to_remove.toml.
213+
Matches:
214+
- type annotation files: **/*.pyi, **/py.typed
215+
- test directories listed with a trailing '/'
216+
"""
217+
spec = _load_excluded_spec()
218+
rel = Path(path).as_posix()
219+
220+
if spec.match_file(rel) or spec.match_file(rel + "/"):
221+
return True
222+
223+
return False
224+
225+
226+
def add_dependency(dependencies: dict[str, str], sizes: dict[str, WheelSizes], wheel: Path) -> None:
227+
project_metadata = extract_metadata(wheel)
228+
project_name = normalize_project_name(project_metadata['Name'])
229+
project_version = project_metadata['Version']
230+
dependencies[project_name] = project_version
231+
sizes[project_name] = {'version': project_version, **calculate_wheel_sizes(wheel)}
232+
233+
65234
def calculate_wheel_sizes(wheel_path: Path) -> WheelSizes:
66235
compressed_size = wheel_path.stat(follow_symlinks=True).st_size
67236
with ZipFile(wheel_path) as zf:
@@ -92,6 +261,13 @@ def main():
92261

93262
with TemporaryDirectory() as d:
94263
staged_wheel_dir = Path(d).resolve()
264+
staged_built_wheels_dir = staged_wheel_dir / 'built'
265+
staged_external_wheels_dir = staged_wheel_dir / 'external'
266+
267+
# Create the directories
268+
staged_built_wheels_dir.mkdir(parents=True, exist_ok=True)
269+
staged_external_wheels_dir.mkdir(parents=True, exist_ok=True)
270+
95271
env_vars = dict(os.environ)
96272
env_vars['PATH'] = f'{python_path.parent}{os.pathsep}{env_vars["PATH"]}'
97273
env_vars['PIP_WHEEL_DIR'] = str(staged_wheel_dir)
@@ -131,25 +307,29 @@ def main():
131307
str(MOUNT_DIR / 'requirements.in'),
132308
'--wheel-dir',
133309
str(staged_wheel_dir),
134-
# Temporarily removing extra index urls. See below.
135-
# '--extra-index-url', CUSTOM_EXTERNAL_INDEX,
310+
'--extra-index-url',
311+
CUSTOM_EXTERNAL_INDEX,
136312
]
137-
# Temporarily disable extra index urls. There are broken wheels in the gcloud bucket
138-
# while working on removing tests from them. Adding extra indices causes undefined behavior
139-
# and can pull a broken image, preventing the building from running.
140-
# if args.use_built_index:
141-
# command_args.extend(['--extra-index-url', CUSTOM_BUILT_INDEX])
142313

143314
check_process(command_args, env=env_vars)
144315

316+
# Classify wheels
317+
for wheel in iter_wheels(staged_wheel_dir):
318+
if wheel_was_built(wheel):
319+
shutil.move(wheel, staged_built_wheels_dir)
320+
else:
321+
shutil.move(wheel, staged_external_wheels_dir)
322+
145323
# Repair wheels
146324
check_process(
147325
[
148326
sys.executable,
149327
'-u',
150328
str(MOUNT_DIR / 'scripts' / 'repair_wheels.py'),
151-
'--source-dir',
152-
str(staged_wheel_dir),
329+
'--source-built-dir',
330+
str(staged_built_wheels_dir),
331+
'--source-external-dir',
332+
str(staged_external_wheels_dir),
153333
'--built-dir',
154334
str(built_wheels_dir),
155335
'--external-dir',
@@ -160,15 +340,22 @@ def main():
160340
dependencies: dict[str, tuple[str, str]] = {}
161341
sizes: dict[str, WheelSizes] = {}
162342

163-
for wheel_dir in wheels_dir.iterdir():
164-
for wheel in wheel_dir.iterdir():
165-
project_metadata = extract_metadata(wheel)
166-
project_name = normalize_project_name(project_metadata['Name'])
167-
project_version = project_metadata['Version']
168-
dependencies[project_name] = project_version
169-
170-
171-
sizes[project_name] = {'version': project_version, **calculate_wheel_sizes(wheel)}
343+
# Handle wheels currently in the external directory and move them to the built directory if they were modified
344+
for wheel in iter_wheels(external_wheels_dir):
345+
was_modified = remove_test_files(wheel)
346+
if was_modified:
347+
# A modified wheel is no longer external → move it to built directory
348+
new_path = built_wheels_dir / wheel.name
349+
wheel.rename(new_path)
350+
wheel = new_path
351+
print(f'Moved {wheel.name} to built directory')
352+
353+
add_dependency(dependencies, sizes, wheel)
354+
355+
# Handle wheels already in the built directory
356+
for wheel in iter_wheels(built_wheels_dir):
357+
remove_test_files(wheel)
358+
add_dependency(dependencies, sizes, wheel)
172359

173360
output_path = MOUNT_DIR / 'sizes.json'
174361
with output_path.open('w', encoding='utf-8') as fp:
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
excluded_paths = [
2+
# --- Type annotation ---
3+
"krb5/**/*.pyi",
4+
"krb5/**/py.typed",
5+
6+
"Cryptodome/**/*.pyi",
7+
"Cryptodome/**/py.typed",
8+
9+
"ddtrace/**/*.pyi",
10+
"ddtrace/**/py.typed",
11+
12+
"pyVmomi/**/*.pyi",
13+
"pyVmomi/**/py.typed",
14+
15+
"gssapi/**/*.pyi",
16+
"gssapi/**/py.typed",
17+
18+
# --- Tests ---
19+
20+
"idlelib/idle_test/",
21+
"bs4/tests/",
22+
"Cryptodome/SelfTest/",
23+
"gssapi/tests/",
24+
"keystoneauth1/tests/",
25+
"lazy_loader/tests/",
26+
"openstack/tests/",
27+
"os_service_types/tests/",
28+
"pbr/tests/",
29+
"pkg_resources/tests/",
30+
"pip/_vendor/colorama/tests/",
31+
"psutil/tests/",
32+
"requests_unixsocket/tests/",
33+
"securesystemslib/_vendor/ed25519/test_data/",
34+
"setuptools/_distutils/compilers/C/tests/",
35+
"setuptools/_vendor/packaging/tests/",
36+
"setuptools/_distutils/tests/",
37+
"setuptools/tests/",
38+
"simplejson/tests/",
39+
"stevedore/tests/",
40+
"supervisor/tests/",
41+
"/test/",
42+
"vertica_python/tests/",
43+
"websocket/tests/",
44+
"win32com/test/",
45+
]

0 commit comments

Comments
 (0)