11import hashlib
2+ import logging
23import pkginfo
34import re
45import shutil
89from aiohttp .client_exceptions import ClientError
910from collections import defaultdict
1011from django .conf import settings
12+ from django .db .utils import IntegrityError
1113from django .utils import timezone
1214from jinja2 import Template
1315from packaging .utils import canonicalize_name
1416from packaging .requirements import Requirement
1517from packaging .version import parse , InvalidVersion
1618from pypi_simple import ACCEPT_JSON_PREFERRED , ProjectPage
17- from pulpcore .plugin .models import Remote
19+ from pulpcore .plugin .models import Artifact , Remote
1820from pulpcore .plugin .exceptions import TimeoutException
21+ from pulpcore .plugin .util import get_domain
22+
23+
24+ log = logging .getLogger (__name__ )
1925
2026
2127PYPI_LAST_SERIAL = "X-PYPI-LAST-SERIAL"
@@ -206,25 +212,34 @@ def get_project_metadata_from_file(filename):
206212 return metadata
207213
208214
209- def compute_metadata_sha256 (filename : str ) -> str | None :
215+ def extract_wheel_metadata (filename : str ) -> bytes | None :
210216 """
211- Compute SHA256 hash of the metadata file from a Python package .
217+ Extract the metadata file content from a wheel file .
212218
213- Returns SHA256 hash or None if metadata cannot be extracted.
219+ Returns the raw metadata content as bytes or None if metadata cannot be extracted.
214220 """
215221 if not filename .endswith (".whl" ):
216222 return None
217223 try :
218224 with zipfile .ZipFile (filename , "r" ) as f :
219225 for file_path in f .namelist ():
220226 if file_path .endswith (".dist-info/METADATA" ):
221- metadata_content = f .read (file_path )
222- return hashlib .sha256 (metadata_content ).hexdigest ()
223- except (zipfile .BadZipFile , KeyError , OSError ):
224- pass
227+ return f .read (file_path )
228+ except (zipfile .BadZipFile , KeyError , OSError ) as e :
229+ log .warning (f"Failed to extract metadata file from { filename } : { e } " )
225230 return None
226231
227232
233+ def compute_metadata_sha256 (filename : str ) -> str | None :
234+ """
235+ Compute SHA256 hash of the metadata file from a Python package.
236+
237+ Returns SHA256 hash or None if metadata cannot be extracted.
238+ """
239+ metadata_content = extract_wheel_metadata (filename )
240+ return hashlib .sha256 (metadata_content ).hexdigest () if metadata_content else None
241+
242+
228243def artifact_to_python_content_data (filename , artifact , domain = None ):
229244 """
230245 Takes the artifact/filename and returns the metadata needed to create a PythonPackageContent.
@@ -233,6 +248,7 @@ def artifact_to_python_content_data(filename, artifact, domain=None):
233248 # because pkginfo validates that the filename has a valid extension before
234249 # reading it
235250 with tempfile .NamedTemporaryFile ("wb" , dir = "." , suffix = filename ) as temp_file :
251+ artifact .file .seek (0 )
236252 shutil .copyfileobj (artifact .file , temp_file )
237253 temp_file .flush ()
238254 metadata = get_project_metadata_from_file (temp_file .name )
@@ -245,6 +261,42 @@ def artifact_to_python_content_data(filename, artifact, domain=None):
245261 return data
246262
247263
264+ def artifact_to_metadata_artifact (
265+ filename : str , artifact : Artifact , tmp_dir : str = "."
266+ ) -> Artifact | None :
267+ """
268+ Creates artifact for metadata from the provided wheel artifact.
269+ """
270+ if not filename .endswith (".whl" ):
271+ return None
272+
273+ with tempfile .NamedTemporaryFile ("wb" , dir = tmp_dir , suffix = filename , delete = False ) as temp_file :
274+ temp_wheel_path = temp_file .name
275+ artifact .file .seek (0 )
276+ shutil .copyfileobj (artifact .file , temp_file )
277+ temp_file .flush ()
278+
279+ metadata_content = extract_wheel_metadata (temp_wheel_path )
280+ if not metadata_content :
281+ return None
282+
283+ with tempfile .NamedTemporaryFile (
284+ "wb" , dir = tmp_dir , suffix = ".metadata" , delete = False
285+ ) as temp_md :
286+ temp_metadata_path = temp_md .name
287+ temp_md .write (metadata_content )
288+ temp_md .flush ()
289+
290+ metadata_artifact = Artifact .init_and_validate (temp_metadata_path )
291+ try :
292+ metadata_artifact .save ()
293+ except IntegrityError :
294+ metadata_artifact = Artifact .objects .get (
295+ sha256 = metadata_artifact .sha256 , pulp_domain = get_domain ()
296+ )
297+ return metadata_artifact
298+
299+
248300def fetch_json_release_metadata (name : str , version : str , remotes : set [Remote ]) -> dict :
249301 """
250302 Fetches metadata for a specific release from PyPI's JSON API. A release can contain
@@ -408,7 +460,9 @@ def find_artifact():
408460 _art = models .RemoteArtifact .objects .filter (content_artifact = content_artifact ).first ()
409461 return _art
410462
411- content_artifact = content .contentartifact_set .first ()
463+ content_artifact = content .contentartifact_set .exclude (
464+ relative_path__endswith = ".metadata"
465+ ).first ()
412466 artifact = find_artifact ()
413467 origin = settings .CONTENT_ORIGIN or settings .PYPI_API_HOSTNAME or ""
414468 origin = origin .strip ("/" )
0 commit comments