Skip to content

Commit dc4a082

Browse files
committed
Fix migration 19 failing on duplicate artifact saves
fixes: #1071
1 parent 5e2bf26 commit dc4a082

File tree

2 files changed

+86
-48
lines changed

2 files changed

+86
-48
lines changed

CHANGES/1071.bugfix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed migration 19 failing on duplicate metadata artifact saves.

pulp_python/app/migrations/0019_create_missing_metadata_artifacts.py

Lines changed: 85 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Generated manually on 2025-12-15 14:00 for creating missing metadata artifacts
22

33
from django.db import migrations
4+
from itertools import groupby
45

56
BATCH_SIZE = 1000
67

@@ -117,7 +118,9 @@ def create_missing_metadata_artifacts(apps, schema_editor):
117118
"""
118119
import tempfile
119120
from django.conf import settings
120-
from django.db import models
121+
from django.db import models, transaction
122+
from django.db.utils import IntegrityError
123+
from pulpcore.plugin.util import set_domain
121124

122125
PythonPackageContent = apps.get_model("python", "PythonPackageContent")
123126
ContentArtifact = apps.get_model("core", "ContentArtifact")
@@ -131,59 +134,93 @@ def create_missing_metadata_artifacts(apps, schema_editor):
131134
contentartifact__relative_path=models.F("filename"),
132135
)
133136
.exclude(metadata_sha256="")
134-
.prefetch_related("_artifacts")
135-
.only("filename", "metadata_sha256")
137+
.only("sha256", "filename", "metadata_sha256", "pulp_domain_id", "pulp_type")
138+
.order_by("pulp_domain_id")
139+
.prefetch_related(
140+
models.Prefetch(
141+
"_artifacts", queryset=Artifact.objects.only("sha256", "file"), to_attr="artifacts"
142+
)
143+
)
144+
.prefetch_related(models.Prefetch("pulp_domain"))
136145
)
137-
artifact_batch = []
146+
artifact_batch = {}
138147
contentartifact_batch = []
139148
packages_batch = []
140149

141-
with tempfile.TemporaryDirectory(dir=settings.WORKING_DIRECTORY) as temp_dir:
142-
for package in packages:
143-
# Get the main artifact for package
144-
main_artifact = package._artifacts.get()
145-
146-
filename = package.filename
147-
metadata_digests = {"sha256": package.metadata_sha256}
148-
result = artifact_to_metadata_artifact(
149-
filename, main_artifact, metadata_digests, temp_dir, Artifact
150-
)
151-
if result is None:
152-
# Unset metadata_sha256 when extraction or validation fails
153-
package.metadata_sha256 = None
154-
packages_batch.append(package)
155-
continue
156-
metadata_artifact, mismatched_sha256 = result
157-
if mismatched_sha256:
158-
# Fix the package if its metadata_sha256 differs from the actual value
159-
package.metadata_sha256 = mismatched_sha256
160-
packages_batch.append(package)
161-
162-
# Set the domain on the metadata artifact to match the package's domain
163-
metadata_artifact.pulp_domain = package._pulp_domain
164-
165-
contentartifact = ContentArtifact(
166-
artifact=metadata_artifact,
167-
content=package,
168-
relative_path=f"{filename}.metadata",
150+
def batch_save_artifacts(domain_id):
151+
try:
152+
with transaction.atomic():
153+
Artifact.objects.bulk_create(artifact_batch.values(), batch_size=BATCH_SIZE)
154+
except IntegrityError:
155+
# Find the existing artifacts and update the contentartifacts to point to the existing artifacts
156+
digest_cas = {}
157+
for ca in contentartifact_batch:
158+
digest_cas.setdefault(ca.artifact.sha256, []).append(ca)
159+
artifacts = Artifact.objects.filter(
160+
sha256__in=artifact_batch.keys(), pulp_domain_id=domain_id
169161
)
170-
artifact_batch.append(metadata_artifact)
171-
contentartifact_batch.append(contentartifact)
172-
173-
if len(artifact_batch) == BATCH_SIZE:
174-
Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE)
175-
ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE)
176-
artifact_batch.clear()
177-
contentartifact_batch.clear()
178-
if len(packages_batch) == BATCH_SIZE:
179-
PythonPackageContent.objects.bulk_update(
180-
packages_batch, ["metadata_sha256"], batch_size=BATCH_SIZE
162+
for artifact in artifacts:
163+
for ca in digest_cas[artifact.sha256]:
164+
ca.artifact = artifact
165+
artifact_batch.pop(artifact.sha256)
166+
Artifact.objects.bulk_create(artifact_batch.values(), batch_size=BATCH_SIZE)
167+
168+
ContentArtifact.objects.bulk_create(
169+
contentartifact_batch,
170+
batch_size=BATCH_SIZE,
171+
update_conflicts=True,
172+
update_fields=["artifact"],
173+
unique_fields=["content", "relative_path"],
174+
)
175+
artifact_batch.clear()
176+
contentartifact_batch.clear()
177+
178+
for domain_id, domain_packages in groupby(
179+
packages.iterator(chunk_size=BATCH_SIZE), key=lambda x: x.pulp_domain_id
180+
):
181+
for package in domain_packages:
182+
set_domain(package.pulp_domain)
183+
with tempfile.TemporaryDirectory(dir=settings.WORKING_DIRECTORY) as temp_dir:
184+
# Get the main artifact for package
185+
main_artifact = [a for a in package.artifacts if a.sha256 == package.sha256][0]
186+
187+
filename = package.filename
188+
metadata_digests = {"sha256": package.metadata_sha256}
189+
result = artifact_to_metadata_artifact(
190+
filename, main_artifact, metadata_digests, temp_dir, Artifact
181191
)
182-
packages_batch.clear()
183-
184-
if artifact_batch:
185-
Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE)
186-
ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE)
192+
if result is None:
193+
# Unset metadata_sha256 when extraction or validation fails
194+
package.metadata_sha256 = None
195+
packages_batch.append(package)
196+
continue
197+
metadata_artifact, mismatched_sha256 = result
198+
if mismatched_sha256:
199+
# Fix the package if its metadata_sha256 differs from the actual value
200+
package.metadata_sha256 = mismatched_sha256
201+
packages_batch.append(package)
202+
203+
# Set the domain on the metadata artifact to match the package's domain
204+
metadata_artifact.pulp_domain = package.pulp_domain
205+
206+
art = artifact_batch.setdefault(metadata_artifact.sha256, metadata_artifact)
207+
contentartifact = ContentArtifact(
208+
artifact=art,
209+
content=package,
210+
relative_path=f"{filename}.metadata",
211+
)
212+
contentartifact_batch.append(contentartifact)
213+
214+
if len(contentartifact_batch) == BATCH_SIZE:
215+
batch_save_artifacts(domain_id)
216+
if len(packages_batch) == BATCH_SIZE:
217+
PythonPackageContent.objects.bulk_update(
218+
packages_batch, ["metadata_sha256"], batch_size=BATCH_SIZE
219+
)
220+
packages_batch.clear()
221+
222+
if artifact_batch or contentartifact_batch:
223+
batch_save_artifacts(domain_id)
187224
if packages_batch:
188225
PythonPackageContent.objects.bulk_update(
189226
packages_batch, ["metadata_sha256"], batch_size=BATCH_SIZE

0 commit comments

Comments
 (0)