Skip to content

Commit f16e3b4

Browse files
committed
Fix edge case when creating metadata file
fixes #1101
1 parent beb85e7 commit f16e3b4

File tree

5 files changed

+48
-13
lines changed

5 files changed

+48
-13
lines changed

CHANGES/1101.bugfix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed edge case where metadata file did not match wheel metadata.

pulp_python/app/models.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,10 @@ def init_from_artifact_and_relative_path(artifact, relative_path):
216216
"""Used when downloading package from pull-through cache."""
217217
path = PurePath(relative_path)
218218
data = artifact_to_python_content_data(path.name, artifact, domain=get_domain())
219+
name = data["name"]
220+
version = data["version"]
219221
artifacts = {path.name: artifact}
220-
if metadata_artifact := artifact_to_metadata_artifact(path.name, artifact):
222+
if metadata_artifact := artifact_to_metadata_artifact(path.name, artifact, name, version):
221223
artifacts[f"{path.name}.metadata"] = metadata_artifact
222224
return PythonPackageContent(**data), artifacts
223225

pulp_python/app/serializers.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,11 @@ def deferred_validate(self, data):
410410

411411
# Create metadata artifact for wheel files
412412
if filename.endswith(".whl"):
413-
if metadata_artifact := artifact_to_metadata_artifact(filename, artifact):
413+
name = data["name"]
414+
version = data["version"]
415+
if metadata_artifact := artifact_to_metadata_artifact(
416+
filename, artifact, name, version
417+
):
414418
data["metadata_artifact"] = metadata_artifact
415419
data["metadata_sha256"] = metadata_artifact.sha256
416420

@@ -552,9 +556,11 @@ def validate(self, data):
552556
)
553557
# Create metadata artifact for wheel files
554558
if filename.endswith(".whl"):
559+
name = data["name"]
560+
version = data["version"]
555561
with tempfile.TemporaryDirectory(dir=settings.WORKING_DIRECTORY) as temp_dir:
556562
if metadata_artifact := artifact_to_metadata_artifact(
557-
filename, artifact, tmp_dir=temp_dir
563+
filename, artifact, name, version, tmp_dir=temp_dir
558564
):
559565
data["metadata_artifact"] = metadata_artifact
560566
data["metadata_sha256"] = metadata_artifact.sha256

pulp_python/app/tasks/upload.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,9 @@ def create():
9898
content = PythonPackageContent.objects.create(**data)
9999
ContentArtifact.objects.create(artifact=artifact, content=content, relative_path=filename)
100100

101-
if metadata_artifact := artifact_to_metadata_artifact(filename, artifact):
101+
name = data["name"]
102+
version = data["version"]
103+
if metadata_artifact := artifact_to_metadata_artifact(filename, artifact, name, version):
102104
ContentArtifact.objects.create(
103105
artifact=metadata_artifact, content=content, relative_path=f"{filename}.metadata"
104106
)

pulp_python/app/utils.py

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,9 @@ def get_project_metadata_from_file(filename):
197197
packagetype = DIST_EXTENSIONS[extensions[pkg_type_index]]
198198

199199
metadata = DIST_TYPES[packagetype](filename)
200-
metadata.metadata_sha256 = compute_metadata_sha256(filename)
200+
name = metadata.name
201+
version = metadata.version
202+
metadata.metadata_sha256 = compute_metadata_sha256(filename, name, version)
201203
metadata.packagetype = packagetype
202204
if packagetype == "sdist":
203205
metadata.python_version = "source"
@@ -210,31 +212,53 @@ def get_project_metadata_from_file(filename):
210212
return metadata
211213

212214

213-
def extract_wheel_metadata(filename: str) -> bytes | None:
215+
def extract_non_normalized_pkg_name_with_version(
216+
filename: str, name: str, version: str
217+
) -> str | None:
218+
"""
219+
Search `filename` for a package name that normalizes to `name` and is followed by `version`.
220+
221+
Returns the original (non-normalized) name with version if found, otherwise None.
222+
"""
223+
# Ensure the package name is normalized
224+
normalized = re.sub(r"[-_.]+", "-", name).lower()
225+
226+
parts = normalized.split("-")
227+
name_pattern = r"[-_.]+".join(map(re.escape, parts))
228+
pattern = rf"({name_pattern})-{re.escape(version)}"
229+
230+
match = re.search(pattern, filename, re.IGNORECASE)
231+
if match:
232+
return match.group(0)
233+
return None
234+
235+
236+
def extract_wheel_metadata(filename: str, name: str, version: str) -> bytes | None:
214237
"""
215238
Extract the metadata file content from a wheel file.
216239
217240
Returns the raw metadata content as bytes or None if metadata cannot be extracted.
218241
"""
219242
if not filename.endswith(".whl"):
220243
return None
244+
245+
original_name_version = extract_non_normalized_pkg_name_with_version(filename, name, version)
246+
metadata_path = f"{original_name_version}.dist-info/METADATA"
221247
try:
222248
with zipfile.ZipFile(filename, "r") as f:
223-
for file_path in f.namelist():
224-
if file_path.endswith(".dist-info/METADATA"):
225-
return f.read(file_path)
249+
return f.read(metadata_path)
226250
except (zipfile.BadZipFile, KeyError, OSError) as e:
227251
log.warning(f"Failed to extract metadata file from {filename}: {e}")
228252
return None
229253

230254

231-
def compute_metadata_sha256(filename: str) -> str | None:
255+
def compute_metadata_sha256(filename: str, name: str, version: str) -> str | None:
232256
"""
233257
Compute SHA256 hash of the metadata file from a Python package.
234258
235259
Returns SHA256 hash or None if metadata cannot be extracted.
236260
"""
237-
metadata_content = extract_wheel_metadata(filename)
261+
metadata_content = extract_wheel_metadata(filename, name, version)
238262
return hashlib.sha256(metadata_content).hexdigest() if metadata_content else None
239263

240264

@@ -260,7 +284,7 @@ def artifact_to_python_content_data(filename, artifact, domain=None):
260284

261285

262286
def artifact_to_metadata_artifact(
263-
filename: str, artifact: Artifact, tmp_dir: str = "."
287+
filename: str, artifact: Artifact, name: str, version: str, tmp_dir: str = "."
264288
) -> Artifact | None:
265289
"""
266290
Creates artifact for metadata from the provided wheel artifact.
@@ -274,7 +298,7 @@ def artifact_to_metadata_artifact(
274298
shutil.copyfileobj(artifact.file, temp_file)
275299
temp_file.flush()
276300

277-
metadata_content = extract_wheel_metadata(temp_wheel_path)
301+
metadata_content = extract_wheel_metadata(temp_wheel_path, name, version)
278302
if not metadata_content:
279303
return None
280304

0 commit comments

Comments
 (0)