Skip to content

Commit 449b71c

Browse files
committed
Refactor repair_metadata
1 parent 1c75b34 commit 449b71c

File tree

1 file changed

+54
-18
lines changed

1 file changed

+54
-18
lines changed

pulp_python/app/tasks/repair.py

Lines changed: 54 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import logging
2-
import uuid
2+
from uuid import UUID
33
from gettext import gettext as _
44

55
from django.db.models.query import QuerySet
@@ -12,12 +12,15 @@
1212
log = logging.getLogger(__name__)
1313

1414

15-
def repair(repository_pk: uuid.UUID) -> None:
15+
BULK_SIZE = 1000
16+
17+
18+
def repair(repository_pk: UUID) -> None:
1619
"""
1720
Repairs metadata of all packages for the specified repository.
1821
1922
Args:
20-
repository_pk (uuid.UUID): The primary key of the repository to repair.
23+
repository_pk (UUID): The primary key of the repository to repair.
2124
2225
Returns:
2326
None
@@ -48,7 +51,11 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> int:
4851
int: The number of packages that were repaired.
4952
"""
5053
# TODO: Add on_demand content repair
51-
immediate_content = content.filter(contentartifact__artifact__isnull=False)
54+
immediate_content = (
55+
content.filter(contentartifact__artifact__isnull=False)
56+
.distinct()
57+
.prefetch_related("_artifacts")
58+
)
5259
domain = get_domain()
5360

5461
batch = []
@@ -63,27 +70,56 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> int:
6370
progress_report.save()
6471
with progress_report:
6572
for package in progress_report.iter(
66-
immediate_content.prefetch_related("_artifacts").iterator(chunk_size=1000)
73+
immediate_content.iterator(chunk_size=BULK_SIZE)
6774
):
6875
new_data = artifact_to_python_content_data(
6976
package.filename, package._artifacts.get(), domain
7077
)
71-
changed = False
72-
for field, value in new_data.items():
73-
if getattr(package, field) != value:
74-
setattr(package, field, value)
75-
set_of_update_fields.add(field)
76-
changed = True
77-
if changed:
78-
batch.append(package)
79-
if len(batch) == 1000:
80-
total_repaired += len(batch)
81-
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)
82-
batch = []
83-
set_of_update_fields.clear()
78+
total_repaired = update_package_if_needed(
79+
package, new_data, batch, set_of_update_fields, total_repaired
80+
)
8481

8582
if batch:
8683
total_repaired += len(batch)
8784
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)
8885

8986
return total_repaired
87+
88+
89+
def update_package_if_needed(
90+
package: PythonPackageContent,
91+
new_data: dict,
92+
batch: list[PythonPackageContent],
93+
set_of_update_fields: set[str],
94+
total_repaired: int,
95+
) -> int:
96+
"""
97+
Compares the current package data with new data and updates the package
98+
if needed ("batch" and "set_of_update_fields" are updated in-place).
99+
100+
Args:
101+
package: Package to check and update.
102+
new_data: A dict of new field values to compare against the package.
103+
batch: A list of packages that were updated.
104+
set_of_update_fields: A set of package field names that were updated.
105+
total_repaired: The current count of repaired packages.
106+
107+
Returns:
108+
The updated count of repaired packages (increments in multiples of BULK_SIZE only).
109+
"""
110+
changed = False
111+
for field, value in new_data.items():
112+
if getattr(package, field) != value:
113+
setattr(package, field, value)
114+
set_of_update_fields.add(field)
115+
changed = True
116+
if changed:
117+
batch.append(package)
118+
119+
if len(batch) == BULK_SIZE:
120+
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)
121+
total_repaired += BULK_SIZE
122+
batch.clear()
123+
set_of_update_fields.clear()
124+
125+
return total_repaired

0 commit comments

Comments
 (0)