11import hashlib
2+ import logging
23import pkginfo
34import re
45import shutil
1415from packaging .requirements import Requirement
1516from packaging .version import parse , InvalidVersion
1617from pypi_simple import ACCEPT_JSON_PREFERRED , ProjectPage
17- from pulpcore .plugin .models import Remote
18+ from pulpcore .plugin .models import Artifact , Remote
1819from pulpcore .plugin .exceptions import TimeoutException
1920
2021
22+ log = logging .getLogger (__name__ )
23+
24+
2125PYPI_LAST_SERIAL = "X-PYPI-LAST-SERIAL"
2226"""TODO This serial constant is temporary until Python repositories implements serials"""
2327PYPI_SERIAL_CONSTANT = 1000000000
@@ -206,25 +210,34 @@ def get_project_metadata_from_file(filename):
206210 return metadata
207211
208212
209- def compute_metadata_sha256 (filename : str ) -> str | None :
213+ def extract_wheel_metadata (filename : str ) -> bytes | None :
210214 """
211- Compute SHA256 hash of the metadata file from a Python package .
215+ Extract the metadata file content from a wheel file .
212216
213- Returns SHA256 hash or None if metadata cannot be extracted.
217+ Returns the raw metadata content as bytes or None if metadata cannot be extracted.
214218 """
215219 if not filename .endswith (".whl" ):
216220 return None
217221 try :
218222 with zipfile .ZipFile (filename , "r" ) as f :
219223 for file_path in f .namelist ():
220224 if file_path .endswith (".dist-info/METADATA" ):
221- metadata_content = f .read (file_path )
222- return hashlib .sha256 (metadata_content ).hexdigest ()
223- except (zipfile .BadZipFile , KeyError , OSError ):
224- pass
225+ return f .read (file_path )
226+ except (zipfile .BadZipFile , KeyError , OSError ) as e :
227+ log .warning (f"Failed to extract metadata file from { filename } : { e } " )
225228 return None
226229
227230
231+ def compute_metadata_sha256 (filename : str ) -> str | None :
232+ """
233+ Compute SHA256 hash of the metadata file from a Python package.
234+
235+ Returns SHA256 hash or None if metadata cannot be extracted.
236+ """
237+ metadata_content = extract_wheel_metadata (filename )
238+ return hashlib .sha256 (metadata_content ).hexdigest () if metadata_content else None
239+
240+
228241def artifact_to_python_content_data (filename , artifact , domain = None ):
229242 """
230243 Takes the artifact/filename and returns the metadata needed to create a PythonPackageContent.
@@ -233,6 +246,7 @@ def artifact_to_python_content_data(filename, artifact, domain=None):
233246 # because pkginfo validates that the filename has a valid extension before
234247 # reading it
235248 with tempfile .NamedTemporaryFile ("wb" , dir = "." , suffix = filename ) as temp_file :
249+ artifact .file .seek (0 )
236250 shutil .copyfileobj (artifact .file , temp_file )
237251 temp_file .flush ()
238252 metadata = get_project_metadata_from_file (temp_file .name )
@@ -245,6 +259,28 @@ def artifact_to_python_content_data(filename, artifact, domain=None):
245259 return data
246260
247261
262+ def artifact_to_metadata_artifact (filename : str , artifact : Artifact ) -> Artifact | None :
263+ """
264+ Creates artifact for metadata from the provided wheel artifact.
265+ """
266+ if not filename .endswith (".whl" ):
267+ return None
268+
269+ with tempfile .NamedTemporaryFile ("wb" , dir = "." , suffix = filename ) as temp_file :
270+ artifact .file .seek (0 )
271+ shutil .copyfileobj (artifact .file , temp_file )
272+ temp_file .flush ()
273+ metadata_content = extract_wheel_metadata (temp_file .name )
274+ if not metadata_content :
275+ return None
276+ with tempfile .NamedTemporaryFile (suffix = ".metadata" ) as metadata_temp :
277+ metadata_temp .write (metadata_content )
278+ metadata_temp .flush ()
279+ metadata_artifact = Artifact .init_and_validate (metadata_temp .name )
280+ metadata_artifact .save ()
281+ return metadata_artifact
282+
283+
248284def fetch_json_release_metadata (name : str , version : str , remotes : set [Remote ]) -> dict :
249285 """
250286 Fetches metadata for a specific release from PyPI's JSON API. A release can contain
@@ -408,7 +444,9 @@ def find_artifact():
408444 _art = models .RemoteArtifact .objects .filter (content_artifact = content_artifact ).first ()
409445 return _art
410446
411- content_artifact = content .contentartifact_set .first ()
447+ content_artifact = content .contentartifact_set .exclude (
448+ relative_path__endswith = ".metadata"
449+ ).first ()
412450 artifact = find_artifact ()
413451 origin = settings .CONTENT_ORIGIN or settings .PYPI_API_HOSTNAME or ""
414452 origin = origin .strip ("/" )
0 commit comments