11# Generated manually on 2025-12-15 14:00 for creating missing metadata artifacts
22
33from django .db import migrations
4+ from itertools import groupby
45
56BATCH_SIZE = 1000
67
@@ -117,7 +118,9 @@ def create_missing_metadata_artifacts(apps, schema_editor):
117118 """
118119 import tempfile
119120 from django .conf import settings
120- from django .db import models
121+ from django .db import models , transaction
122+ from django .db .utils import IntegrityError
123+ from pulpcore .plugin .util import set_domain
121124
122125 PythonPackageContent = apps .get_model ("python" , "PythonPackageContent" )
123126 ContentArtifact = apps .get_model ("core" , "ContentArtifact" )
@@ -131,59 +134,93 @@ def create_missing_metadata_artifacts(apps, schema_editor):
131134 contentartifact__relative_path = models .F ("filename" ),
132135 )
133136 .exclude (metadata_sha256 = "" )
134- .prefetch_related ("_artifacts" )
135- .only ("filename" , "metadata_sha256" )
137+ .only ("sha256" , "filename" , "metadata_sha256" , "pulp_domain_id" , "pulp_type" )
138+ .order_by ("pulp_domain_id" )
139+ .prefetch_related (
140+ models .Prefetch (
141+ "_artifacts" , queryset = Artifact .objects .only ("sha256" , "file" ), to_attr = "artifacts"
142+ )
143+ )
144+ .prefetch_related (models .Prefetch ("pulp_domain" ))
136145 )
137- artifact_batch = []
146+ artifact_batch = {}
138147 contentartifact_batch = []
139148 packages_batch = []
140149
141- with tempfile .TemporaryDirectory (dir = settings .WORKING_DIRECTORY ) as temp_dir :
142- for package in packages :
143- # Get the main artifact for package
144- main_artifact = package ._artifacts .get ()
145-
146- filename = package .filename
147- metadata_digests = {"sha256" : package .metadata_sha256 }
148- result = artifact_to_metadata_artifact (
149- filename , main_artifact , metadata_digests , temp_dir , Artifact
150- )
151- if result is None :
152- # Unset metadata_sha256 when extraction or validation fails
153- package .metadata_sha256 = None
154- packages_batch .append (package )
155- continue
156- metadata_artifact , mismatched_sha256 = result
157- if mismatched_sha256 :
158- # Fix the package if its metadata_sha256 differs from the actual value
159- package .metadata_sha256 = mismatched_sha256
160- packages_batch .append (package )
161-
162- # Set the domain on the metadata artifact to match the package's domain
163- metadata_artifact .pulp_domain = package ._pulp_domain
164-
165- contentartifact = ContentArtifact (
166- artifact = metadata_artifact ,
167- content = package ,
168- relative_path = f"{ filename } .metadata" ,
150+ def batch_save_artifacts (domain_id ):
151+ try :
152+ with transaction .atomic ():
153+ Artifact .objects .bulk_create (artifact_batch .values (), batch_size = BATCH_SIZE )
154+ except IntegrityError :
155+ # Find the existing artifacts and update the contentartifacts to point to the existing artifacts
156+ digest_cas = {}
157+ for ca in contentartifact_batch :
158+ digest_cas .setdefault (ca .artifact .sha256 , []).append (ca )
159+ artifacts = Artifact .objects .filter (
160+ sha256__in = artifact_batch .keys (), pulp_domain_id = domain_id
169161 )
170- artifact_batch .append (metadata_artifact )
171- contentartifact_batch .append (contentartifact )
172-
173- if len (artifact_batch ) == BATCH_SIZE :
174- Artifact .objects .bulk_create (artifact_batch , batch_size = BATCH_SIZE )
175- ContentArtifact .objects .bulk_create (contentartifact_batch , batch_size = BATCH_SIZE )
176- artifact_batch .clear ()
177- contentartifact_batch .clear ()
178- if len (packages_batch ) == BATCH_SIZE :
179- PythonPackageContent .objects .bulk_update (
180- packages_batch , ["metadata_sha256" ], batch_size = BATCH_SIZE
162+ for artifact in artifacts :
163+ for ca in digest_cas [artifact .sha256 ]:
164+ ca .artifact = artifact
165+ artifact_batch .pop (artifact .sha256 )
166+ Artifact .objects .bulk_create (artifact_batch .values (), batch_size = BATCH_SIZE )
167+
168+ ContentArtifact .objects .bulk_create (
169+ contentartifact_batch ,
170+ batch_size = BATCH_SIZE ,
171+ update_conflicts = True ,
172+ update_fields = ["artifact" ],
173+ unique_fields = ["content" , "relative_path" ],
174+ )
175+ artifact_batch .clear ()
176+ contentartifact_batch .clear ()
177+
178+ for domain_id , domain_packages in groupby (
179+ packages .iterator (chunk_size = BATCH_SIZE ), key = lambda x : x .pulp_domain_id
180+ ):
181+ for package in domain_packages :
182+ set_domain (package .pulp_domain )
183+ with tempfile .TemporaryDirectory (dir = settings .WORKING_DIRECTORY ) as temp_dir :
184+ # Get the main artifact for package
185+ main_artifact = [a for a in package .artifacts if a .sha256 == package .sha256 ][0 ]
186+
187+ filename = package .filename
188+ metadata_digests = {"sha256" : package .metadata_sha256 }
189+ result = artifact_to_metadata_artifact (
190+ filename , main_artifact , metadata_digests , temp_dir , Artifact
181191 )
182- packages_batch .clear ()
183-
184- if artifact_batch :
185- Artifact .objects .bulk_create (artifact_batch , batch_size = BATCH_SIZE )
186- ContentArtifact .objects .bulk_create (contentartifact_batch , batch_size = BATCH_SIZE )
192+ if result is None :
193+ # Unset metadata_sha256 when extraction or validation fails
194+ package .metadata_sha256 = None
195+ packages_batch .append (package )
196+ continue
197+ metadata_artifact , mismatched_sha256 = result
198+ if mismatched_sha256 :
199+ # Fix the package if its metadata_sha256 differs from the actual value
200+ package .metadata_sha256 = mismatched_sha256
201+ packages_batch .append (package )
202+
203+ # Set the domain on the metadata artifact to match the package's domain
204+ metadata_artifact .pulp_domain = package .pulp_domain
205+
206+ art = artifact_batch .setdefault (metadata_artifact .sha256 , metadata_artifact )
207+ contentartifact = ContentArtifact (
208+ artifact = art ,
209+ content = package ,
210+ relative_path = f"{ filename } .metadata" ,
211+ )
212+ contentartifact_batch .append (contentartifact )
213+
214+ if len (contentartifact_batch ) == BATCH_SIZE :
215+ batch_save_artifacts (domain_id )
216+ if len (packages_batch ) == BATCH_SIZE :
217+ PythonPackageContent .objects .bulk_update (
218+ packages_batch , ["metadata_sha256" ], batch_size = BATCH_SIZE
219+ )
220+ packages_batch .clear ()
221+
222+ if artifact_batch or contentartifact_batch :
223+ batch_save_artifacts (domain_id )
187224 if packages_batch :
188225 PythonPackageContent .objects .bulk_update (
189226 packages_batch , ["metadata_sha256" ], batch_size = BATCH_SIZE
0 commit comments