11import hashlib
2+ import logging
23import pkginfo
34import re
45import shutil
89from aiohttp .client_exceptions import ClientError
910from collections import defaultdict
1011from django .conf import settings
12+ from django .db .utils import IntegrityError
1113from django .utils import timezone
1214from jinja2 import Template
1315from packaging .utils import canonicalize_name
1416from packaging .requirements import Requirement
1517from packaging .version import parse , InvalidVersion
1618from pypi_simple import ACCEPT_JSON_PREFERRED , ProjectPage
17- from pulpcore .plugin .models import Remote
19+ from pulpcore .plugin .models import Artifact , Remote
1820from pulpcore .plugin .exceptions import TimeoutException
21+ from pulpcore .plugin .util import get_domain
22+
23+
24+ log = logging .getLogger (__name__ )
1925
2026
2127PYPI_LAST_SERIAL = "X-PYPI-LAST-SERIAL"
@@ -206,25 +212,34 @@ def get_project_metadata_from_file(filename):
206212 return metadata
207213
208214
209- def compute_metadata_sha256 (filename : str ) -> str | None :
215+ def extract_wheel_metadata (filename : str ) -> bytes | None :
210216 """
211- Compute SHA256 hash of the metadata file from a Python package .
217+ Extract the metadata file content from a wheel file .
212218
213- Returns SHA256 hash or None if metadata cannot be extracted.
219+ Returns the raw metadata content as bytes or None if metadata cannot be extracted.
214220 """
215221 if not filename .endswith (".whl" ):
216222 return None
217223 try :
218224 with zipfile .ZipFile (filename , "r" ) as f :
219225 for file_path in f .namelist ():
220226 if file_path .endswith (".dist-info/METADATA" ):
221- metadata_content = f .read (file_path )
222- return hashlib .sha256 (metadata_content ).hexdigest ()
223- except (zipfile .BadZipFile , KeyError , OSError ):
224- pass
227+ return f .read (file_path )
228+ except (zipfile .BadZipFile , KeyError , OSError ) as e :
229+ log .warning (f"Failed to extract metadata file from { filename } : { e } " )
225230 return None
226231
227232
233+ def compute_metadata_sha256 (filename : str ) -> str | None :
234+ """
235+ Compute SHA256 hash of the metadata file from a Python package.
236+
237+ Returns SHA256 hash or None if metadata cannot be extracted.
238+ """
239+ metadata_content = extract_wheel_metadata (filename )
240+ return hashlib .sha256 (metadata_content ).hexdigest () if metadata_content else None
241+
242+
228243def artifact_to_python_content_data (filename , artifact , domain = None ):
229244 """
230245 Takes the artifact/filename and returns the metadata needed to create a PythonPackageContent.
@@ -233,6 +248,7 @@ def artifact_to_python_content_data(filename, artifact, domain=None):
233248 # because pkginfo validates that the filename has a valid extension before
234249 # reading it
235250 with tempfile .NamedTemporaryFile ("wb" , dir = "." , suffix = filename ) as temp_file :
251+ artifact .file .seek (0 )
236252 shutil .copyfileobj (artifact .file , temp_file )
237253 temp_file .flush ()
238254 metadata = get_project_metadata_from_file (temp_file .name )
@@ -245,6 +261,35 @@ def artifact_to_python_content_data(filename, artifact, domain=None):
245261 return data
246262
247263
264+ def artifact_to_metadata_artifact (filename : str , artifact : Artifact ) -> Artifact | None :
265+ """
266+ Creates artifact for metadata from the provided wheel artifact.
267+ """
268+ if not filename .endswith (".whl" ):
269+ return None
270+
271+ with tempfile .TemporaryDirectory (dir = settings .WORKING_DIRECTORY ) as temp_dir_name :
272+ with tempfile .NamedTemporaryFile (dir = temp_dir_name , suffix = filename ) as temp_file :
273+ artifact .file .seek (0 )
274+ shutil .copyfileobj (artifact .file , temp_file )
275+ temp_file .flush ()
276+ metadata_content = extract_wheel_metadata (temp_file .name )
277+ if not metadata_content :
278+ return None
279+
280+ with tempfile .NamedTemporaryFile (dir = temp_dir_name , suffix = ".metadata" ) as temp_md :
281+ temp_md .write (metadata_content )
282+ temp_md .flush ()
283+ metadata_artifact = Artifact .init_and_validate (temp_md .name )
284+ try :
285+ metadata_artifact .save ()
286+ except IntegrityError :
287+ metadata_artifact = Artifact .objects .get (
288+ sha256 = metadata_artifact .sha256 , pulp_domain = get_domain ()
289+ )
290+ return metadata_artifact
291+
292+
248293def fetch_json_release_metadata (name : str , version : str , remotes : set [Remote ]) -> dict :
249294 """
250295 Fetches metadata for a specific release from PyPI's JSON API. A release can contain
@@ -408,7 +453,9 @@ def find_artifact():
408453 _art = models .RemoteArtifact .objects .filter (content_artifact = content_artifact ).first ()
409454 return _art
410455
411- content_artifact = content .contentartifact_set .first ()
456+ content_artifact = content .contentartifact_set .exclude (
457+ relative_path__endswith = ".metadata"
458+ ).first ()
412459 artifact = find_artifact ()
413460 origin = settings .CONTENT_ORIGIN or settings .PYPI_API_HOSTNAME or ""
414461 origin = origin .strip ("/" )
0 commit comments