11import logging
2+ import asyncio
23
34from aiohttp import ClientResponseError , ClientError
45from lxml .etree import LxmlError
1920from pulp_python .app .models import (
2021 PythonPackageContent ,
2122 PythonRemote ,
23+ PackageProvenance ,
2224)
23- from pulp_python .app .utils import parse_metadata , PYPI_LAST_SERIAL
25+ from pulp_python .app .utils import parse_metadata , PYPI_LAST_SERIAL , aget_remote_simple_page
2426from pypi_simple import IndexPage
27+ from pypi_attestations import Provenance
2528
2629from bandersnatch .mirror import Mirror
2730from bandersnatch .master import Master
@@ -163,6 +166,7 @@ def __init__(self, serial, master, workers, deferred_download, python_stage, pro
163166 self .python_stage = python_stage
164167 self .progress_report = progress_report
165168 self .deferred_download = deferred_download
169+ self .remote = self .python_stage .remote
166170
167171 async def determine_packages_to_sync (self ):
168172 """
@@ -194,8 +198,8 @@ async def determine_packages_to_sync(self):
194198 continue
195199 else :
196200 logger .info ("Failed to get package list using XMLRPC, trying parse simple page." )
197- url = urljoin (self .python_stage . remote .url , "simple/" )
198- downloader = self .python_stage . remote .get_downloader (url = url )
201+ url = urljoin (self .remote .url , "simple/" )
202+ downloader = self .remote .get_downloader (url = url )
199203 result = await downloader .run ()
200204 with open (result .path ) as f :
201205 index = IndexPage .from_html (f .read ())
@@ -224,6 +228,7 @@ async def create_content(self, pkg):
224228 Take the filtered package, separate into releases and
225229 create a Content Unit to put into the pipeline
226230 """
231+ declared_contents = {}
227232 for version , dists in pkg .releases .items ():
228233 for package in dists :
229234 entry = parse_metadata (pkg .info , version , package )
@@ -237,13 +242,44 @@ async def create_content(self, pkg):
237242 artifact = artifact ,
238243 url = url ,
239244 relative_path = entry ["filename" ],
240- remote = self .python_stage . remote ,
245+ remote = self .remote ,
241246 deferred_download = self .deferred_download ,
242247 )
243248 dc = DeclarativeContent (content = package , d_artifacts = [da ])
244-
249+ declared_contents [ entry [ "filename" ]] = dc
245250 await self .python_stage .put (dc )
246251
252+ if pkg .releases and (page := await aget_remote_simple_page (pkg .name , self .remote )):
253+ if self .remote .provenance :
254+ await self .sync_provenance (page , declared_contents )
255+
256+ async def sync_provenance (self , page , declared_contents ):
257+ """Sync the provenance for the package"""
258+
259+ async def _create_provenance (filename , provenance_url ):
260+ downloader = self .remote .get_downloader (
261+ url = provenance_url , silence_errors_for_response_codes = {404 }
262+ )
263+ try :
264+ result = await downloader .run ()
265+ except FileNotFoundError :
266+ pass
267+ else :
268+ package_content = await declared_contents [filename ].resolution ()
269+ with open (result .path ) as f :
270+ provenance = Provenance .model_validate_json (f .read ())
271+ prov_content = PackageProvenance (
272+ package = package_content , provenance = provenance .model_dump (mode = "json" )
273+ )
274+ prov_content .set_sha256_hook ()
275+ await self .python_stage .put (DeclarativeContent (content = prov_content ))
276+
277+ tasks = []
278+ for package in page .packages :
279+ if package .filename in declared_contents and package .provenance_url :
280+ tasks .append (_create_provenance (package .filename , package .provenance_url ))
281+ await asyncio .gather (* tasks )
282+
247283 def finalize_sync (self , * args , ** kwargs ):
248284 """No work to be done currently"""
249285 pass
0 commit comments