22import uuid
33from gettext import gettext as _
44
5+ from requests .exceptions import RequestException
56from django .db .models .query import QuerySet
67from pulpcore .plugin .models import ProgressReport
78from pulpcore .plugin .util import get_domain
89
910from pulp_python .app .models import PythonPackageContent , PythonRepository
10- from pulp_python .app .utils import artifact_to_python_content_data
11+ from pulp_python .app .utils import (
12+ artifact_to_python_content_data ,
13+ fetch_json_release_metadata ,
14+ parse_metadata ,
15+ )
16+ from itertools import groupby
1117
1218log = logging .getLogger (__name__ )
1319
@@ -47,8 +53,17 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> int:
4753 Returns:
4854 int: The number of packages that were repaired.
4955 """
50- # TODO: Add on_demand content repair
51- immediate_content = content .filter (contentartifact__artifact__isnull = False )
56+ immediate_content = (
57+ content .filter (contentartifact__artifact__isnull = False )
58+ .distinct ()
59+ .prefetch_related ("_artifacts" )
60+ )
61+ on_demand_content = (
62+ content .filter (contentartifact__artifact__isnull = True )
63+ .distinct ()
64+ .prefetch_related ("contentartifact_set__remoteartifact_set" )
65+ .order_by ("name" , "version" )
66+ )
5267 domain = get_domain ()
5368
5469 batch = []
@@ -58,12 +73,12 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> int:
5873 progress_report = ProgressReport (
5974 message = "Repairing packages' metadata" ,
6075 code = "repair.metadata" ,
61- total = immediate_content .count (),
76+ total = content .count (),
6277 )
6378 progress_report .save ()
6479 with progress_report :
6580 for package in progress_report .iter (
66- immediate_content .prefetch_related ( "_artifacts" ). iterator (chunk_size = 1000 )
81+ immediate_content .iterator (chunk_size = 1000 )
6782 ):
6883 new_data = artifact_to_python_content_data (
6984 package .filename , package ._artifacts .get (), domain
@@ -82,6 +97,85 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> int:
8297 batch = []
8398 set_of_update_fields .clear ()
8499
100+ # For on-demand content, we expect that:
101+ # 1. PythonPackageContent always has correct name and version, and one ContentArtifact
102+ # 2. RemoteArtifact always has correct sha256
103+ # Repair is only supported if all PythonPackageContent items with the same name and
104+ # version (i.e. group) share the same remote URL. Otherwise, the entire group is skipped
105+ for (name , version ), group in groupby (
106+ on_demand_content .iterator (chunk_size = 1000 ),
107+ key = lambda x : (x .name , x .version ),
108+ ):
109+ group = list (group )
110+ remotes = set (
111+ remote
112+ for content in group
113+ for remote in content .contentartifact_set .get ()
114+ .remoteartifact_set .all ()
115+ .values_list ("remote__url" , flat = True )
116+ )
117+ if len (remotes ) != 1 :
118+ log .warning (
119+ _ ("Only one remote url is supported for {} {}" ).format (
120+ name , version
121+ )
122+ )
123+ continue
124+ remote_url = remotes .pop ()
125+
126+ # Retrieve data with all distributions for the given package version
127+ try :
128+ json_data = fetch_json_release_metadata (name , version , remote_url )
129+ except RequestException as exc :
130+ log .warning (
131+ _ ("Could not fetch metadata for {} {} from {}. Error: {}" ).format (
132+ name , version , remote_url , exc
133+ )
134+ )
135+ continue
136+
137+ for package in progress_report .iter (group ):
138+ remote_artifacts = (
139+ package .contentartifact_set .get ().remoteartifact_set .all ()
140+ )
141+ # Extract data only for the specific distribution being checked
142+ dist_data = next (
143+ (
144+ dist
145+ for ra in remote_artifacts
146+ for dist in json_data ["urls" ]
147+ if ra .sha256 == dist ["digests" ]["sha256" ]
148+ ),
149+ None ,
150+ )
151+ if not dist_data :
152+ log .warning (
153+ _ (
154+ "Could not fetch distribution for {} {} with sha256 {}."
155+ ).format (name , version , package .sha256 )
156+ )
157+ continue
158+
159+ new_data = parse_metadata (json_data ["info" ], package .version , dist_data )
160+ new_data .pop ("url" ) # belongs to RemoteArtifact
161+ new_data ["pulp_domain" ] = domain
162+ new_data ["_pulp_domain" ] = new_data ["pulp_domain" ]
163+ changed = False
164+ for field , value in new_data .items ():
165+ if getattr (package , field ) != value :
166+ setattr (package , field , value )
167+ set_of_update_fields .add (field )
168+ changed = True
169+ if changed :
170+ batch .append (package )
171+ if len (batch ) == 1000 :
172+ total_repaired += len (batch )
173+ PythonPackageContent .objects .bulk_update (
174+ batch , set_of_update_fields
175+ )
176+ batch = []
177+ set_of_update_fields .clear ()
178+
85179 if batch :
86180 total_repaired += len (batch )
87181 PythonPackageContent .objects .bulk_update (batch , set_of_update_fields )
0 commit comments