Skip to content

Commit b273572

Browse files
committed
Refactor repair_metadata
1 parent 1c75b34 commit b273572

File tree

1 file changed

+55
-21
lines changed

1 file changed

+55
-21
lines changed

pulp_python/app/tasks/repair.py

Lines changed: 55 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,25 @@
11
import logging
2-
import uuid
32
from gettext import gettext as _
3+
from uuid import UUID
44

55
from django.db.models.query import QuerySet
6-
from pulpcore.plugin.models import ProgressReport
7-
from pulpcore.plugin.util import get_domain
8-
96
from pulp_python.app.models import PythonPackageContent, PythonRepository
107
from pulp_python.app.utils import artifact_to_python_content_data
8+
from pulpcore.plugin.models import ProgressReport
9+
from pulpcore.plugin.util import get_domain
1110

1211
log = logging.getLogger(__name__)
1312

1413

15-
def repair(repository_pk: uuid.UUID) -> None:
14+
BULK_SIZE = 1000
15+
16+
17+
def repair(repository_pk: UUID) -> None:
1618
"""
1719
Repairs metadata of all packages for the specified repository.
1820
1921
Args:
20-
repository_pk (uuid.UUID): The primary key of the repository to repair.
22+
repository_pk (UUID): The primary key of the repository to repair.
2123
2224
Returns:
2325
None
@@ -48,7 +50,11 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> int:
4850
int: The number of packages that were repaired.
4951
"""
5052
# TODO: Add on_demand content repair
51-
immediate_content = content.filter(contentartifact__artifact__isnull=False)
53+
immediate_content = (
54+
content.filter(contentartifact__artifact__isnull=False)
55+
.distinct()
56+
.prefetch_related("_artifacts")
57+
)
5258
domain = get_domain()
5359

5460
batch = []
@@ -63,27 +69,55 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> int:
6369
progress_report.save()
6470
with progress_report:
6571
for package in progress_report.iter(
66-
immediate_content.prefetch_related("_artifacts").iterator(chunk_size=1000)
72+
immediate_content.iterator(chunk_size=BULK_SIZE)
6773
):
6874
new_data = artifact_to_python_content_data(
6975
package.filename, package._artifacts.get(), domain
7076
)
71-
changed = False
72-
for field, value in new_data.items():
73-
if getattr(package, field) != value:
74-
setattr(package, field, value)
75-
set_of_update_fields.add(field)
76-
changed = True
77-
if changed:
78-
batch.append(package)
79-
if len(batch) == 1000:
80-
total_repaired += len(batch)
81-
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)
82-
batch = []
83-
set_of_update_fields.clear()
77+
total_repaired += update_package_if_needed(
78+
package, new_data, batch, set_of_update_fields
79+
)
8480

8581
if batch:
8682
total_repaired += len(batch)
8783
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)
8884

8985
return total_repaired
86+
87+
88+
def update_package_if_needed(
89+
package: PythonPackageContent,
90+
new_data: dict,
91+
batch: list[PythonPackageContent],
92+
set_of_update_fields: set[str],
93+
) -> int:
94+
"""
95+
Compares the current package data with new data and updates the package
96+
if needed ("batch" and "set_of_update_fields" are updated in-place).
97+
98+
Args:
99+
package: Package to check and update.
100+
new_data: A dict of new field values to compare against the package.
101+
batch: A list of packages that were updated.
102+
set_of_update_fields: A set of package field names that were updated.
103+
104+
Returns:
105+
The count of repaired packages (increments in multiples of BULK_SIZE only).
106+
"""
107+
total_repaired = 0
108+
changed = False
109+
for field, value in new_data.items():
110+
if getattr(package, field) != value:
111+
setattr(package, field, value)
112+
set_of_update_fields.add(field)
113+
changed = True
114+
if changed:
115+
batch.append(package)
116+
117+
if len(batch) == BULK_SIZE:
118+
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)
119+
total_repaired += BULK_SIZE
120+
batch.clear()
121+
set_of_update_fields.clear()
122+
123+
return total_repaired

0 commit comments

Comments
 (0)