11# Generated manually on 2025-12-15 14:00 for creating missing metadata artifacts
22
33from django .db import migrations
4+ from itertools import groupby
45
56BATCH_SIZE = 1000
67
@@ -117,7 +118,9 @@ def create_missing_metadata_artifacts(apps, schema_editor):
117118 """
118119 import tempfile
119120 from django .conf import settings
120- from django .db import models
121+ from django .db import models , transaction
122+ from django .db .utils import IntegrityError
123+ from pulpcore .plugin .util import set_domain
121124
122125 PythonPackageContent = apps .get_model ("python" , "PythonPackageContent" )
123126 ContentArtifact = apps .get_model ("core" , "ContentArtifact" )
@@ -128,62 +131,97 @@ def create_missing_metadata_artifacts(apps, schema_editor):
128131 metadata_sha256__isnull = False ,
129132 filename__endswith = ".whl" ,
130133 contentartifact__artifact__isnull = False ,
131- contentartifact__relative_path = models .F ("filename" ),
132134 )
133135 .exclude (metadata_sha256 = "" )
134- .prefetch_related ("_artifacts" )
135- .only ("filename" , "metadata_sha256" )
136+ .only ("filename" , "metadata_sha256" , "pulp_domain_id" , "pulp_type" )
137+ .order_by ("pulp_domain_id" )
138+ .prefetch_related (
139+ models .Prefetch (
140+ "_artifacts" , queryset = Artifact .objects .only ("sha256" , "file" ), to_attr = "cartifacts"
141+ )
142+ )
143+ .prefetch_related (models .Prefetch ("pulp_domain" ))
136144 )
137- artifact_batch = []
145+ artifact_batch = {}
138146 contentartifact_batch = []
139147 packages_batch = []
140148
141- with tempfile .TemporaryDirectory (dir = settings .WORKING_DIRECTORY ) as temp_dir :
142- for package in packages :
143- # Get the main artifact for package
144- main_artifact = package ._artifacts .get ()
145-
146- filename = package .filename
147- metadata_digests = {"sha256" : package .metadata_sha256 }
148- result = artifact_to_metadata_artifact (
149- filename , main_artifact , metadata_digests , temp_dir , Artifact
150- )
151- if result is None :
152- # Unset metadata_sha256 when extraction or validation fails
153- package .metadata_sha256 = None
154- packages_batch .append (package )
155- continue
156- metadata_artifact , mismatched_sha256 = result
157- if mismatched_sha256 :
158- # Fix the package if its metadata_sha256 differs from the actual value
159- package .metadata_sha256 = mismatched_sha256
160- packages_batch .append (package )
161-
162- # Set the domain on the metadata artifact to match the package's domain
163- metadata_artifact .pulp_domain = package ._pulp_domain
164-
165- contentartifact = ContentArtifact (
166- artifact = metadata_artifact ,
167- content = package ,
168- relative_path = f"{ filename } .metadata" ,
149+ def batch_save_artifacts (domain_id ):
150+ try :
151+ with transaction .atomic ():
152+ Artifact .objects .bulk_create (artifact_batch .values (), batch_size = BATCH_SIZE )
153+ except IntegrityError :
154+ # Find the existing artifacts and update the contentartifacts to point to the existing artifacts
155+ digest_cas = {}
156+ for ca in contentartifact_batch :
157+ digest_cas .setdefault (ca .artifact .sha256 , []).append (ca )
158+ artifacts = Artifact .objects .filter (
159+ sha256__in = artifact_batch .keys (), pulp_domain_id = domain_id
169160 )
170- artifact_batch .append (metadata_artifact )
171- contentartifact_batch .append (contentartifact )
172-
173- if len (artifact_batch ) == BATCH_SIZE :
174- Artifact .objects .bulk_create (artifact_batch , batch_size = BATCH_SIZE )
175- ContentArtifact .objects .bulk_create (contentartifact_batch , batch_size = BATCH_SIZE )
176- artifact_batch .clear ()
177- contentartifact_batch .clear ()
178- if len (packages_batch ) == BATCH_SIZE :
179- PythonPackageContent .objects .bulk_update (
180- packages_batch , ["metadata_sha256" ], batch_size = BATCH_SIZE
161+ for artifact in artifacts :
162+ for ca in digest_cas [artifact .sha256 ]:
163+ ca .artifact = artifact
164+ artifact_batch .pop (artifact .sha256 )
165+ Artifact .objects .bulk_create (artifact_batch .values (), batch_size = BATCH_SIZE )
166+
167+ ContentArtifact .objects .bulk_create (
168+ contentartifact_batch ,
169+ batch_size = BATCH_SIZE ,
170+ update_conflicts = True ,
171+ update_fields = ["artifact" ],
172+ unique_fields = ["content" , "relative_path" ],
173+ )
174+ artifact_batch .clear ()
175+ contentartifact_batch .clear ()
176+
177+ for domain_id , domain_packages in groupby (
178+ packages .iterator (chunk_size = BATCH_SIZE ), key = lambda x : x .pulp_domain_id
179+ ):
180+ for package in domain_packages :
181+ set_domain (package .pulp_domain )
182+ with tempfile .TemporaryDirectory (dir = settings .WORKING_DIRECTORY ) as temp_dir :
183+ # Get the main artifact for package
184+ main_artifact = [
185+ ca for ca in package .cartifacts if ca .relative_path .endswith (".whl" )
186+ ][0 ]
187+
188+ filename = package .filename
189+ metadata_digests = {"sha256" : package .metadata_sha256 }
190+ result = artifact_to_metadata_artifact (
191+ filename , main_artifact , metadata_digests , temp_dir , Artifact
181192 )
182- packages_batch .clear ()
183-
184- if artifact_batch :
185- Artifact .objects .bulk_create (artifact_batch , batch_size = BATCH_SIZE )
186- ContentArtifact .objects .bulk_create (contentartifact_batch , batch_size = BATCH_SIZE )
193+ if result is None :
194+ # Unset metadata_sha256 when extraction or validation fails
195+ package .metadata_sha256 = None
196+ packages_batch .append (package )
197+ continue
198+ metadata_artifact , mismatched_sha256 = result
199+ if mismatched_sha256 :
200+ # Fix the package if its metadata_sha256 differs from the actual value
201+ package .metadata_sha256 = mismatched_sha256
202+ packages_batch .append (package )
203+
204+ # Set the domain on the metadata artifact to match the package's domain
205+ metadata_artifact .pulp_domain = package .pulp_domain
206+
207+ art = artifact_batch .setdefault (metadata_artifact .sha256 , metadata_artifact )
208+ contentartifact = ContentArtifact (
209+ artifact = art ,
210+ content = package ,
211+ relative_path = f"{ filename } .metadata" ,
212+ )
213+ contentartifact_batch .append (contentartifact )
214+
215+ if len (contentartifact_batch ) == BATCH_SIZE :
216+ batch_save_artifacts (domain_id )
217+ if len (packages_batch ) == BATCH_SIZE :
218+ PythonPackageContent .objects .bulk_update (
219+ packages_batch , ["metadata_sha256" ], batch_size = BATCH_SIZE
220+ )
221+ packages_batch .clear ()
222+
223+ if artifact_batch or contentartifact_batch :
224+ batch_save_artifacts (domain_id )
187225 if packages_batch :
188226 PythonPackageContent .objects .bulk_update (
189227 packages_batch , ["metadata_sha256" ], batch_size = BATCH_SIZE
0 commit comments