11import hashlib
2+ import logging
23import pkginfo
34import re
45import shutil
89from aiohttp .client_exceptions import ClientError
910from collections import defaultdict
1011from django .conf import settings
12+ from django .db .utils import IntegrityError
1113from django .utils import timezone
1214from jinja2 import Template
1315from packaging .utils import canonicalize_name
1416from packaging .requirements import Requirement
1517from packaging .version import parse , InvalidVersion
1618from pypi_simple import ACCEPT_JSON_PREFERRED , ProjectPage
17- from pulpcore .plugin .models import Remote
19+ from pulpcore .plugin .models import Artifact , Remote
1820from pulpcore .plugin .exceptions import TimeoutException
21+ from pulpcore .plugin .util import get_domain
22+
23+
24+ log = logging .getLogger (__name__ )
1925
2026
2127PYPI_LAST_SERIAL = "X-PYPI-LAST-SERIAL"
@@ -203,25 +209,34 @@ def get_project_metadata_from_file(filename):
203209 return metadata
204210
205211
206- def compute_metadata_sha256 (filename : str ) -> str | None :
212+ def extract_wheel_metadata (filename : str ) -> bytes | None :
207213 """
208- Compute SHA256 hash of the metadata file from a Python package .
214+ Extract the metadata file content from a wheel file .
209215
210- Returns SHA256 hash or None if metadata cannot be extracted.
216+ Returns the raw metadata content as bytes or None if metadata cannot be extracted.
211217 """
212218 if not filename .endswith (".whl" ):
213219 return None
214220 try :
215221 with zipfile .ZipFile (filename , "r" ) as f :
216222 for file_path in f .namelist ():
217223 if file_path .endswith (".dist-info/METADATA" ):
218- metadata_content = f .read (file_path )
219- return hashlib .sha256 (metadata_content ).hexdigest ()
220- except (zipfile .BadZipFile , KeyError , OSError ):
221- pass
224+ return f .read (file_path )
225+ except (zipfile .BadZipFile , KeyError , OSError ) as e :
226+ log .warning (f"Failed to extract metadata file from { filename } : { e } " )
222227 return None
223228
224229
230+ def compute_metadata_sha256 (filename : str ) -> str | None :
231+ """
232+ Compute SHA256 hash of the metadata file from a Python package.
233+
234+ Returns SHA256 hash or None if metadata cannot be extracted.
235+ """
236+ metadata_content = extract_wheel_metadata (filename )
237+ return hashlib .sha256 (metadata_content ).hexdigest () if metadata_content else None
238+
239+
225240def artifact_to_python_content_data (filename , artifact , domain = None ):
226241 """
227242 Takes the artifact/filename and returns the metadata needed to create a PythonPackageContent.
@@ -230,6 +245,7 @@ def artifact_to_python_content_data(filename, artifact, domain=None):
230245 # because pkginfo validates that the filename has a valid extension before
231246 # reading it
232247 with tempfile .NamedTemporaryFile ("wb" , dir = "." , suffix = filename ) as temp_file :
248+ artifact .file .seek (0 )
233249 shutil .copyfileobj (artifact .file , temp_file )
234250 temp_file .flush ()
235251 metadata = get_project_metadata_from_file (temp_file .name )
@@ -242,6 +258,42 @@ def artifact_to_python_content_data(filename, artifact, domain=None):
242258 return data
243259
244260
261+ def artifact_to_metadata_artifact (
262+ filename : str , artifact : Artifact , tmp_dir : str = "."
263+ ) -> Artifact | None :
264+ """
265+ Creates artifact for metadata from the provided wheel artifact.
266+ """
267+ if not filename .endswith (".whl" ):
268+ return None
269+
270+ with tempfile .NamedTemporaryFile ("wb" , dir = tmp_dir , suffix = filename , delete = False ) as temp_file :
271+ temp_wheel_path = temp_file .name
272+ artifact .file .seek (0 )
273+ shutil .copyfileobj (artifact .file , temp_file )
274+ temp_file .flush ()
275+
276+ metadata_content = extract_wheel_metadata (temp_wheel_path )
277+ if not metadata_content :
278+ return None
279+
280+ with tempfile .NamedTemporaryFile (
281+ "wb" , dir = tmp_dir , suffix = ".metadata" , delete = False
282+ ) as temp_md :
283+ temp_metadata_path = temp_md .name
284+ temp_md .write (metadata_content )
285+ temp_md .flush ()
286+
287+ metadata_artifact = Artifact .init_and_validate (temp_metadata_path )
288+ try :
289+ metadata_artifact .save ()
290+ except IntegrityError :
291+ metadata_artifact = Artifact .objects .get (
292+ sha256 = metadata_artifact .sha256 , pulp_domain = get_domain ()
293+ )
294+ return metadata_artifact
295+
296+
245297def fetch_json_release_metadata (name : str , version : str , remotes : set [Remote ]) -> dict :
246298 """
247299 Fetches metadata for a specific release from PyPI's JSON API. A release can contain
@@ -405,7 +457,9 @@ def find_artifact():
405457 _art = models .RemoteArtifact .objects .filter (content_artifact = content_artifact ).first ()
406458 return _art
407459
408- content_artifact = content .contentartifact_set .first ()
460+ content_artifact = content .contentartifact_set .exclude (
461+ relative_path__endswith = ".metadata"
462+ ).first ()
409463 artifact = find_artifact ()
410464 origin = settings .CONTENT_ORIGIN or settings .PYPI_API_HOSTNAME or ""
411465 origin = origin .strip ("/" )
0 commit comments