Skip to content

Commit 59be7f9

Browse files
committed
Fix migration 19 failing on duplicate artifact saves
fixes: #1071
1 parent 5e2bf26 commit 59be7f9

File tree

2 files changed

+88
-49
lines changed

2 files changed

+88
-49
lines changed

CHANGES/1071.bugfix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed migration 19 failing on duplicate metadata artifact saves.

pulp_python/app/migrations/0019_create_missing_metadata_artifacts.py

Lines changed: 87 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Generated manually on 2025-12-15 14:00 for creating missing metadata artifacts
22

33
from django.db import migrations
4+
from itertools import groupby
45

56
BATCH_SIZE = 1000
67

@@ -117,7 +118,9 @@ def create_missing_metadata_artifacts(apps, schema_editor):
117118
"""
118119
import tempfile
119120
from django.conf import settings
120-
from django.db import models
121+
from django.db import models, transaction
122+
from django.db.utils import IntegrityError
123+
from pulpcore.plugin.util import set_domain
121124

122125
PythonPackageContent = apps.get_model("python", "PythonPackageContent")
123126
ContentArtifact = apps.get_model("core", "ContentArtifact")
@@ -128,62 +131,97 @@ def create_missing_metadata_artifacts(apps, schema_editor):
128131
metadata_sha256__isnull=False,
129132
filename__endswith=".whl",
130133
contentartifact__artifact__isnull=False,
131-
contentartifact__relative_path=models.F("filename"),
132134
)
133135
.exclude(metadata_sha256="")
134-
.prefetch_related("_artifacts")
135-
.only("filename", "metadata_sha256")
136+
.only("filename", "metadata_sha256", "pulp_domain_id", "pulp_type")
137+
.order_by("pulp_domain_id")
138+
.prefetch_related(
139+
models.Prefetch(
140+
"_artifacts", queryset=Artifact.objects.only("sha256", "file"), to_attr="cartifacts"
141+
)
142+
)
143+
.prefetch_related(models.Prefetch("pulp_domain"))
136144
)
137-
artifact_batch = []
145+
artifact_batch = {}
138146
contentartifact_batch = []
139147
packages_batch = []
140148

141-
with tempfile.TemporaryDirectory(dir=settings.WORKING_DIRECTORY) as temp_dir:
142-
for package in packages:
143-
# Get the main artifact for package
144-
main_artifact = package._artifacts.get()
145-
146-
filename = package.filename
147-
metadata_digests = {"sha256": package.metadata_sha256}
148-
result = artifact_to_metadata_artifact(
149-
filename, main_artifact, metadata_digests, temp_dir, Artifact
150-
)
151-
if result is None:
152-
# Unset metadata_sha256 when extraction or validation fails
153-
package.metadata_sha256 = None
154-
packages_batch.append(package)
155-
continue
156-
metadata_artifact, mismatched_sha256 = result
157-
if mismatched_sha256:
158-
# Fix the package if its metadata_sha256 differs from the actual value
159-
package.metadata_sha256 = mismatched_sha256
160-
packages_batch.append(package)
161-
162-
# Set the domain on the metadata artifact to match the package's domain
163-
metadata_artifact.pulp_domain = package._pulp_domain
164-
165-
contentartifact = ContentArtifact(
166-
artifact=metadata_artifact,
167-
content=package,
168-
relative_path=f"{filename}.metadata",
149+
def batch_save_artifacts(domain_id):
150+
try:
151+
with transaction.atomic():
152+
Artifact.objects.bulk_create(artifact_batch.values(), batch_size=BATCH_SIZE)
153+
except IntegrityError:
154+
# Find the existing artifacts and update the contentartifacts to point to the existing artifacts
155+
digest_cas = {}
156+
for ca in contentartifact_batch:
157+
digest_cas.setdefault(ca.artifact.sha256, []).append(ca)
158+
artifacts = Artifact.objects.filter(
159+
sha256__in=artifact_batch.keys(), pulp_domain_id=domain_id
169160
)
170-
artifact_batch.append(metadata_artifact)
171-
contentartifact_batch.append(contentartifact)
172-
173-
if len(artifact_batch) == BATCH_SIZE:
174-
Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE)
175-
ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE)
176-
artifact_batch.clear()
177-
contentartifact_batch.clear()
178-
if len(packages_batch) == BATCH_SIZE:
179-
PythonPackageContent.objects.bulk_update(
180-
packages_batch, ["metadata_sha256"], batch_size=BATCH_SIZE
161+
for artifact in artifacts:
162+
for ca in digest_cas[artifact.sha256]:
163+
ca.artifact = artifact
164+
artifact_batch.pop(artifact.sha256)
165+
Artifact.objects.bulk_create(artifact_batch.values(), batch_size=BATCH_SIZE)
166+
167+
ContentArtifact.objects.bulk_create(
168+
contentartifact_batch,
169+
batch_size=BATCH_SIZE,
170+
update_conflicts=True,
171+
update_fields=["artifact"],
172+
unique_fields=["content", "relative_path"],
173+
)
174+
artifact_batch.clear()
175+
contentartifact_batch.clear()
176+
177+
for domain_id, domain_packages in groupby(
178+
packages.iterator(chunk_size=BATCH_SIZE), key=lambda x: x.pulp_domain_id
179+
):
180+
for package in domain_packages:
181+
set_domain(package.pulp_domain)
182+
with tempfile.TemporaryDirectory(dir=settings.WORKING_DIRECTORY) as temp_dir:
183+
# Get the main artifact for package
184+
main_artifact = [
185+
ca for ca in package.cartifacts if ca.relative_path.endswith(".whl")
186+
][0]
187+
188+
filename = package.filename
189+
metadata_digests = {"sha256": package.metadata_sha256}
190+
result = artifact_to_metadata_artifact(
191+
filename, main_artifact, metadata_digests, temp_dir, Artifact
181192
)
182-
packages_batch.clear()
183-
184-
if artifact_batch:
185-
Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE)
186-
ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE)
193+
if result is None:
194+
# Unset metadata_sha256 when extraction or validation fails
195+
package.metadata_sha256 = None
196+
packages_batch.append(package)
197+
continue
198+
metadata_artifact, mismatched_sha256 = result
199+
if mismatched_sha256:
200+
# Fix the package if its metadata_sha256 differs from the actual value
201+
package.metadata_sha256 = mismatched_sha256
202+
packages_batch.append(package)
203+
204+
# Set the domain on the metadata artifact to match the package's domain
205+
metadata_artifact.pulp_domain = package.pulp_domain
206+
207+
art = artifact_batch.setdefault(metadata_artifact.sha256, metadata_artifact)
208+
contentartifact = ContentArtifact(
209+
artifact=art,
210+
content=package,
211+
relative_path=f"{filename}.metadata",
212+
)
213+
contentartifact_batch.append(contentartifact)
214+
215+
if len(contentartifact_batch) == BATCH_SIZE:
216+
batch_save_artifacts(domain_id)
217+
if len(packages_batch) == BATCH_SIZE:
218+
PythonPackageContent.objects.bulk_update(
219+
packages_batch, ["metadata_sha256"], batch_size=BATCH_SIZE
220+
)
221+
packages_batch.clear()
222+
223+
if artifact_batch or contentartifact_batch:
224+
batch_save_artifacts(domain_id)
187225
if packages_batch:
188226
PythonPackageContent.objects.bulk_update(
189227
packages_batch, ["metadata_sha256"], batch_size=BATCH_SIZE

0 commit comments

Comments
 (0)