Skip to content

Commit 2ad0ff1

Browse files
committed
Add new repair_metadata endpoint
closes #805
1 parent d2ae90b commit 2ad0ff1

File tree

5 files changed

+160
-2
lines changed

5 files changed

+160
-2
lines changed

CHANGES/805.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added new `repair_metadata` endpoint to `Repository` for fixing packages' metadata.

pulp_python/app/tasks/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,6 @@
33
"""
44

55
from .publish import publish # noqa:F401
6+
from .repair import repair # noqa:F401
67
from .sync import sync # noqa:F401
78
from .upload import upload, upload_group # noqa:F401

pulp_python/app/tasks/repair.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import os
2+
from gettext import gettext as _
3+
import logging
4+
5+
import uuid
6+
from django.conf import settings
7+
8+
from pulp_python.app.models import PythonPackageContent, PythonRepository
9+
from pulp_python.app.utils import artifact_to_python_content_data
10+
11+
log = logging.getLogger(__name__)
12+
13+
14+
def repair(repository_pk: uuid.UUID) -> None:
15+
"""
16+
Repairs metadata of all packages for the specified repository.
17+
18+
Args:
19+
repository_pk (uuid.UUID): The primary key of the repository to repair.
20+
21+
Returns:
22+
None
23+
"""
24+
repository = PythonRepository.objects.get(pk=repository_pk)
25+
26+
log.info(
27+
_(
28+
"Repairing packages' metadata for the latest version of repository {}."
29+
).format(repository.name)
30+
)
31+
content_set = repository.latest_version().content.values_list("pk", flat=True)
32+
content = PythonPackageContent.objects.filter(pk__in=content_set)
33+
num_repaired = repair_metadata(content)
34+
log.info(
35+
_("{} packages processed, {} package metadata repaired.").format(
36+
len(content_set), num_repaired
37+
)
38+
)
39+
40+
41+
# TODO: include ContentQuerySet in pulpcore/pulpcore/app/models/__init__.py to enable type hint
42+
def repair_metadata(content) -> int:
43+
"""
44+
Repairs metadata for a queryset of PythonPackageContent objects.
45+
46+
Args:
47+
content (ContentQuerySet): The queryset of PythonPackageContent items to repair.
48+
49+
Returns:
50+
int: The number of packages that were repaired.
51+
"""
52+
# TODO: Add on_demand content repair
53+
os.chdir(settings.WORKING_DIRECTORY)
54+
content = content.select_related("pulp_domain")
55+
immediate_content = content.filter(contentartifact__artifact__isnull=False)
56+
57+
batch = []
58+
set_of_update_fields = set()
59+
total_repaired = 0
60+
61+
for package in immediate_content.prefetch_related("_artifacts").iterator(
62+
chunk_size=1000
63+
):
64+
import epdb
65+
66+
# epdb.serve()
67+
new_data = artifact_to_python_content_data(
68+
package.filename, package._artifacts.get(), package.pulp_domain
69+
)
70+
changed = False
71+
for field, value in new_data.items():
72+
if getattr(package, field) != value:
73+
setattr(package, field, value)
74+
set_of_update_fields.add(field)
75+
changed = True
76+
if changed:
77+
batch.append(package)
78+
if len(batch) == 1000:
79+
total_repaired += len(batch)
80+
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)
81+
batch = []
82+
set_of_update_fields.clear()
83+
84+
if batch:
85+
total_repaired += len(batch)
86+
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)
87+
88+
return total_repaired

pulp_python/app/viewsets.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ class PythonRepositoryViewSet(
8383
],
8484
},
8585
{
86-
"action": ["modify"],
86+
"action": ["modify", "repair_metadata"],
8787
"principal": "authenticated",
8888
"effect": "allow",
8989
"condition": [
@@ -122,6 +122,26 @@ class PythonRepositoryViewSet(
122122
"python.pythonrepository_viewer": ["python.view_pythonrepository"],
123123
}
124124

125+
@extend_schema(
126+
summary="Repair metadata",
127+
responses={202: AsyncOperationResponseSerializer},
128+
)
129+
@action(detail=True, methods=["post"], serializer_class=None)
130+
def repair_metadata(self, request, pk):
131+
"""
132+
<!-- User-facing documentation, rendered as html-->
133+
Trigger an asynchronous task to repair Python metadata. This task will repair metadata
134+
of all packages for the specified `Repository`, without creating a new `RepositoryVersion`.
135+
"""
136+
repository = self.get_object()
137+
138+
result = dispatch(
139+
tasks.repair,
140+
exclusive_resources=[repository],
141+
kwargs={"repository_pk": str(repository.pk)},
142+
)
143+
return core_viewsets.OperationPostponedResponse(result, request)
144+
125145
@extend_schema(
126146
summary="Sync from remote",
127147
responses={202: AsyncOperationResponseSerializer}

pulp_python/tests/functional/api/test_repair.py

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import pytest
22
import subprocess
3+
from urllib.parse import urljoin
34

4-
from pulp_python.tests.functional.constants import PYTHON_EGG_FILENAME
5+
from pulp_python.tests.functional.constants import (
6+
PYTHON_EGG_FILENAME,
7+
PYTHON_FIXTURES_URL,
8+
)
59

610

711
@pytest.fixture
@@ -76,3 +80,47 @@ def test_metadata_repair_command(
7680
assert content.packagetype == "sdist"
7781
assert content.requires_python == "" # technically null
7882
assert content.author == "Austin Macdonald"
83+
84+
85+
def test_metadata_repair_endpoint(
86+
create_content_direct,
87+
download_python_file,
88+
monitor_task,
89+
move_to_repository,
90+
python_bindings,
91+
python_repo,
92+
):
93+
"""
94+
Test repairing of package metadata via `Repositories.repair_metadata` endpoint.
95+
"""
96+
python_egg_filename = "scipy-1.1.0.tar.gz"
97+
python_egg_url = urljoin(
98+
urljoin(PYTHON_FIXTURES_URL, "packages/"), python_egg_filename
99+
)
100+
python_file = download_python_file(python_egg_filename, python_egg_url)
101+
102+
data = {
103+
"name": "scipy",
104+
# Wrong metadata
105+
"author": "ME",
106+
"packagetype": "bdist",
107+
"requires_python": ">=3.8",
108+
"version": "0.2",
109+
}
110+
content = create_content_direct(python_file, python_egg_filename, data)
111+
for field, wrong_value in data.items():
112+
if field == "python_version":
113+
continue
114+
assert getattr(content, field) == wrong_value
115+
move_to_repository(python_repo.pulp_href, [content.pulp_href])
116+
117+
response = python_bindings.RepositoriesPythonApi.repair_metadata(
118+
python_repo.pulp_href
119+
)
120+
_ = monitor_task(response.task)
121+
122+
content = python_bindings.ContentPackagesApi.read(content.pulp_href)
123+
assert content.version == "1.1.0"
124+
assert content.packagetype == "sdist"
125+
assert content.requires_python == ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*"
126+
assert content.author == ""

0 commit comments

Comments
 (0)