Skip to content

Commit c908b2b

Browse files
committed
Add new repair_metadata endpoint
closes #805
1 parent d2ae90b commit c908b2b

File tree

5 files changed

+175
-2
lines changed

5 files changed

+175
-2
lines changed

CHANGES/805.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added new `repair_metadata` endpoint to `Repository` for fixing packages' metadata.

pulp_python/app/tasks/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,6 @@
33
"""
44

55
from .publish import publish # noqa:F401
6+
from .repair import repair # noqa:F401
67
from .sync import sync # noqa:F401
78
from .upload import upload, upload_group # noqa:F401

pulp_python/app/tasks/repair.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import logging
2+
import uuid
3+
from gettext import gettext as _
4+
5+
from django.db.models.query import QuerySet
6+
from pulpcore.plugin.constants import TASK_STATES
7+
from pulpcore.plugin.models import ProgressReport
8+
from pulpcore.plugin.util import get_domain
9+
10+
from pulp_python.app.models import PythonPackageContent, PythonRepository
11+
from pulp_python.app.utils import artifact_to_python_content_data
12+
13+
log = logging.getLogger(__name__)
14+
15+
16+
def repair(repository_pk: uuid.UUID) -> None:
17+
"""
18+
Repairs metadata of all packages for the specified repository.
19+
20+
Args:
21+
repository_pk (uuid.UUID): The primary key of the repository to repair.
22+
23+
Returns:
24+
None
25+
"""
26+
repository = PythonRepository.objects.get(pk=repository_pk)
27+
28+
log.info(
29+
_(
30+
"Repairing packages' metadata for the latest version of repository {}."
31+
).format(repository.name)
32+
)
33+
content_set = repository.latest_version().content.values_list("pk", flat=True)
34+
content = PythonPackageContent.objects.filter(pk__in=content_set)
35+
36+
progress_report = ProgressReport(
37+
message=_("Repairing packages' metadata"),
38+
code="repair.metadata",
39+
state=TASK_STATES.RUNNING,
40+
total=len(content_set),
41+
done=0,
42+
)
43+
num_repaired = repair_metadata(content, progress_report)
44+
progress_report.state = TASK_STATES.COMPLETED
45+
progress_report.save(update_fields=["state"])
46+
47+
log.info(_("{} packages' metadata repaired.").format(num_repaired))
48+
49+
50+
def repair_metadata(
51+
content: QuerySet[PythonPackageContent], progress_report: ProgressReport
52+
) -> int:
53+
"""
54+
Repairs metadata for a queryset of PythonPackageContent objects
55+
and updates the progress report after every 100 items processed.
56+
57+
Args:
58+
content (QuerySet[PythonPackageContent]): The queryset of items to repair.
59+
progress_report (ProgressReport): The progress report instance to update.
60+
61+
Returns:
62+
int: The number of packages that were repaired.
63+
"""
64+
# TODO: Add on_demand content repair
65+
immediate_content = content.filter(contentartifact__artifact__isnull=False)
66+
domain = get_domain()
67+
68+
batch = []
69+
set_of_update_fields = set()
70+
total_repaired = 0
71+
72+
i = 0
73+
for i, package in enumerate(
74+
immediate_content.prefetch_related("_artifacts").iterator(chunk_size=1000),
75+
start=1,
76+
):
77+
new_data = artifact_to_python_content_data(
78+
package.filename, package._artifacts.get(), domain
79+
)
80+
changed = False
81+
for field, value in new_data.items():
82+
if getattr(package, field) != value:
83+
setattr(package, field, value)
84+
set_of_update_fields.add(field)
85+
changed = True
86+
if changed:
87+
batch.append(package)
88+
if len(batch) == 1000:
89+
total_repaired += len(batch)
90+
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)
91+
batch = []
92+
set_of_update_fields.clear()
93+
if i % 100 == 0:
94+
progress_report.done = i
95+
progress_report.save(update_fields=["done"])
96+
97+
if batch:
98+
total_repaired += len(batch)
99+
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)
100+
if i % 100 != 0:
101+
progress_report.done = i
102+
progress_report.save(update_fields=["done"])
103+
104+
return total_repaired

pulp_python/app/viewsets.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ class PythonRepositoryViewSet(
8383
],
8484
},
8585
{
86-
"action": ["modify"],
86+
"action": ["modify", "repair_metadata"],
8787
"principal": "authenticated",
8888
"effect": "allow",
8989
"condition": [
@@ -122,6 +122,25 @@ class PythonRepositoryViewSet(
122122
"python.pythonrepository_viewer": ["python.view_pythonrepository"],
123123
}
124124

125+
@extend_schema(
126+
summary="Repair metadata",
127+
responses={202: AsyncOperationResponseSerializer},
128+
)
129+
@action(detail=True, methods=["post"], serializer_class=None)
130+
def repair_metadata(self, request, pk):
131+
"""
132+
Trigger an asynchronous task to repair Python metadata. This task will repair metadata
133+
of all packages for the specified `Repository`, without creating a new `RepositoryVersion`.
134+
"""
135+
repository = self.get_object()
136+
137+
result = dispatch(
138+
tasks.repair,
139+
exclusive_resources=[repository],
140+
kwargs={"repository_pk": str(repository.pk)},
141+
)
142+
return core_viewsets.OperationPostponedResponse(result, request)
143+
125144
@extend_schema(
126145
summary="Sync from remote",
127146
responses={202: AsyncOperationResponseSerializer}

pulp_python/tests/functional/api/test_repair.py

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import pytest
22
import subprocess
3+
from urllib.parse import urljoin
34

4-
from pulp_python.tests.functional.constants import PYTHON_EGG_FILENAME
5+
from pulp_python.tests.functional.constants import (
6+
PYTHON_EGG_FILENAME,
7+
PYTHON_FIXTURES_URL,
8+
)
59

610

711
@pytest.fixture
@@ -76,3 +80,47 @@ def test_metadata_repair_command(
7680
assert content.packagetype == "sdist"
7781
assert content.requires_python == "" # technically null
7882
assert content.author == "Austin Macdonald"
83+
84+
85+
def test_metadata_repair_endpoint(
86+
create_content_direct,
87+
download_python_file,
88+
monitor_task,
89+
move_to_repository,
90+
python_bindings,
91+
python_repo,
92+
):
93+
"""
94+
Test repairing of package metadata via `Repositories.repair_metadata` endpoint.
95+
"""
96+
python_egg_filename = "scipy-1.1.0.tar.gz"
97+
python_egg_url = urljoin(
98+
urljoin(PYTHON_FIXTURES_URL, "packages/"), python_egg_filename
99+
)
100+
python_file = download_python_file(python_egg_filename, python_egg_url)
101+
102+
data = {
103+
"name": "scipy",
104+
# Wrong metadata
105+
"author": "ME",
106+
"packagetype": "bdist",
107+
"requires_python": ">=3.8",
108+
"version": "0.2",
109+
}
110+
content = create_content_direct(python_file, python_egg_filename, data)
111+
for field, wrong_value in data.items():
112+
if field == "python_version":
113+
continue
114+
assert getattr(content, field) == wrong_value
115+
move_to_repository(python_repo.pulp_href, [content.pulp_href])
116+
117+
response = python_bindings.RepositoriesPythonApi.repair_metadata(
118+
python_repo.pulp_href
119+
)
120+
monitor_task(response.task)
121+
122+
content = python_bindings.ContentPackagesApi.read(content.pulp_href)
123+
assert content.version == "1.1.0"
124+
assert content.packagetype == "sdist"
125+
assert content.requires_python == ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*"
126+
assert content.author == ""

0 commit comments

Comments
 (0)