Skip to content

Commit 3027997

Browse files
committed
Add migration for missing metadata artifacts
1 parent e519f3a commit 3027997

File tree

1 file changed

+198
-0
lines changed

1 file changed

+198
-0
lines changed
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
# Generated manually on 2025-12-15 14:00 for creating missing metadata artifacts
2+
3+
from django.db import migrations
4+
5+
6+
def pulp_hashlib_new(name, *args, **kwargs):
7+
"""
8+
Copied and updated (to comply with migrations) from pulpcore.
9+
"""
10+
import hashlib as the_real_hashlib
11+
from django.conf import settings
12+
13+
if name not in settings.ALLOWED_CONTENT_CHECKSUMS:
14+
return None
15+
16+
return the_real_hashlib.new(name, *args, **kwargs)
17+
18+
19+
def init_and_validate(file, artifact_model, expected_digests):
20+
"""
21+
Copied and updated (to comply with migrations) from pulpcore.
22+
"""
23+
from django.conf import settings
24+
25+
digest_fields = []
26+
for alg in ("sha512", "sha384", "sha256", "sha224", "sha1", "md5"):
27+
if alg in settings.ALLOWED_CONTENT_CHECKSUMS:
28+
digest_fields.append(alg)
29+
30+
if isinstance(file, str):
31+
with open(file, "rb") as f:
32+
hashers = {
33+
n: hasher for n in digest_fields if (hasher := pulp_hashlib_new(n)) is not None
34+
}
35+
if not hashers:
36+
return None
37+
38+
size = 0
39+
while True:
40+
chunk = f.read(1048576) # 1 megabyte
41+
if not chunk:
42+
break
43+
for algorithm in hashers.values():
44+
algorithm.update(chunk)
45+
size = size + len(chunk)
46+
else:
47+
size = file.size
48+
hashers = file.hashers
49+
50+
for algorithm, expected_digest in expected_digests.items():
51+
if algorithm not in hashers:
52+
return None
53+
actual_digest = hashers[algorithm].hexdigest()
54+
if expected_digest != actual_digest:
55+
return None
56+
# todo: log/fail, user should repair sha256
57+
58+
attributes = {"size": size, "file": file}
59+
for algorithm in digest_fields:
60+
attributes[algorithm] = hashers[algorithm].hexdigest()
61+
62+
return artifact_model(**attributes)
63+
64+
65+
def extract_wheel_metadata(filename):
66+
"""
67+
Extract the metadata file content from a wheel file.
68+
Returns the raw metadata content as bytes or None if metadata cannot be extracted.
69+
"""
70+
import zipfile
71+
72+
try:
73+
with zipfile.ZipFile(filename, "r") as f:
74+
for file_path in f.namelist():
75+
if file_path.endswith(".dist-info/METADATA"):
76+
return f.read(file_path)
77+
except (zipfile.BadZipFile, KeyError, OSError):
78+
pass
79+
return None
80+
81+
82+
def artifact_to_metadata_artifact(filename, artifact, md_digests, tmp_dir, artifact_model):
83+
"""
84+
Creates artifact for metadata from the provided wheel artifact.
85+
"""
86+
import shutil
87+
import tempfile
88+
89+
if not filename.endswith(".whl"):
90+
return None
91+
92+
with tempfile.NamedTemporaryFile("wb", dir=tmp_dir, suffix=filename, delete=False) as temp_file:
93+
temp_wheel_path = temp_file.name
94+
artifact.file.seek(0)
95+
shutil.copyfileobj(artifact.file, temp_file)
96+
temp_file.flush()
97+
98+
metadata_content = extract_wheel_metadata(temp_wheel_path)
99+
if not metadata_content:
100+
return None
101+
102+
with tempfile.NamedTemporaryFile(
103+
"wb", dir=tmp_dir, suffix=".metadata", delete=False
104+
) as temp_md:
105+
temp_metadata_path = temp_md.name
106+
temp_md.write(metadata_content)
107+
temp_md.flush()
108+
109+
metadata_artifact = init_and_validate(temp_metadata_path, artifact_model, md_digests)
110+
if not metadata_artifact:
111+
return None
112+
113+
try:
114+
metadata_artifact.save()
115+
except Exception:
116+
return None
117+
118+
return metadata_artifact
119+
120+
121+
# todo: bulk create
122+
def create_missing_metadata_artifacts(apps, schema_editor):
123+
"""
124+
Create metadata artifacts for PythonPackageContent instances that have metadata_sha256
125+
but are missing the corresponding metadata artifact.
126+
"""
127+
import tempfile
128+
from django.conf import settings
129+
130+
PythonPackageContent = apps.get_model("python", "PythonPackageContent")
131+
ContentArtifact = apps.get_model("core", "ContentArtifact")
132+
Artifact = apps.get_model("core", "Artifact")
133+
134+
# todo: filter only wheels!
135+
packages = (
136+
PythonPackageContent.objects.filter(metadata_sha256__isnull=False)
137+
.exclude(metadata_sha256="")
138+
.prefetch_related("contentartifact_set")
139+
.only("filename", "metadata_sha256")
140+
)
141+
# todo: only for testing, remove later
142+
created_count = 0
143+
skipped_count = 0
144+
145+
with tempfile.TemporaryDirectory(dir=settings.WORKING_DIRECTORY) as temp_dir:
146+
for package in packages:
147+
filename = package.filename
148+
metadata_relative_path = f"{filename}.metadata"
149+
content_artifacts = list(package.contentartifact_set.all())
150+
151+
if any(ca.relative_path == metadata_relative_path for ca in content_artifacts):
152+
# Metadata artifact already exist
153+
continue
154+
155+
main_content_artifact = next(
156+
(ca for ca in content_artifacts if ca.relative_path == filename),
157+
None,
158+
)
159+
if not main_content_artifact:
160+
# Main artifact does not exist
161+
skipped_count += 1
162+
continue
163+
164+
metadata_digests = {"sha256": package.metadata_sha256}
165+
metadata_artifact = artifact_to_metadata_artifact(
166+
filename, main_content_artifact.artifact, metadata_digests, temp_dir, Artifact
167+
)
168+
if not metadata_artifact:
169+
# Failed to create metadata artifact
170+
skipped_count += 1
171+
continue
172+
173+
try:
174+
ContentArtifact.objects.create(
175+
artifact=metadata_artifact,
176+
content=package,
177+
relative_path=metadata_relative_path,
178+
)
179+
created_count += 1
180+
except Exception:
181+
# Failed to save metadata content artifact
182+
skipped_count += 1
183+
184+
print(f"Created {created_count} missing metadata artifacts. Skipped {skipped_count} packages.")
185+
186+
187+
class Migration(migrations.Migration):
188+
189+
dependencies = [
190+
("python", "0018_packageprovenance"),
191+
]
192+
193+
operations = [
194+
migrations.RunPython(
195+
create_missing_metadata_artifacts,
196+
reverse_code=migrations.RunPython.noop,
197+
),
198+
]

0 commit comments

Comments
 (0)