Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/805.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added new `repair_metadata` endpoint to `Repository` for fixing packages' metadata.
1 change: 1 addition & 0 deletions pulp_python/app/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
"""

from .publish import publish # noqa:F401
from .repair import repair # noqa:F401
from .sync import sync # noqa:F401
from .upload import upload, upload_group # noqa:F401
89 changes: 89 additions & 0 deletions pulp_python/app/tasks/repair.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import logging
import uuid
from gettext import gettext as _

from django.db.models.query import QuerySet
from pulpcore.plugin.models import ProgressReport
from pulpcore.plugin.util import get_domain

from pulp_python.app.models import PythonPackageContent, PythonRepository
from pulp_python.app.utils import artifact_to_python_content_data

log = logging.getLogger(__name__)


def repair(repository_pk: uuid.UUID) -> None:
"""
Repairs metadata of all packages for the specified repository.

Args:
repository_pk (uuid.UUID): The primary key of the repository to repair.

Returns:
None
"""
repository = PythonRepository.objects.get(pk=repository_pk)

log.info(
_(
"Repairing packages' metadata for the latest version of repository {}."
).format(repository.name)
)
content_set = repository.latest_version().content.values_list("pk", flat=True)
content = PythonPackageContent.objects.filter(pk__in=content_set)

num_repaired = repair_metadata(content)
log.info(_("{} packages' metadata repaired.").format(num_repaired))


def repair_metadata(content: QuerySet[PythonPackageContent]) -> int:
"""
Repairs metadata for a queryset of PythonPackageContent objects
and updates the progress report.

Args:
content (QuerySet[PythonPackageContent]): The queryset of items to repair.

Returns:
int: The number of packages that were repaired.
"""
# TODO: Add on_demand content repair
immediate_content = content.filter(contentartifact__artifact__isnull=False)
domain = get_domain()

batch = []
set_of_update_fields = set()
total_repaired = 0

progress_report = ProgressReport(
message="Repairing packages' metadata",
code="repair.metadata",
total=immediate_content.count(),
)
progress_report.save()
with progress_report:
for package in progress_report.iter(
immediate_content.prefetch_related("_artifacts").iterator(chunk_size=1000)
):
new_data = artifact_to_python_content_data(
package.filename, package._artifacts.get(), domain
)
changed = False
for field, value in new_data.items():
if getattr(package, field) != value:
setattr(package, field, value)
set_of_update_fields.add(field)
changed = True
if changed:
batch.append(package)
if len(batch) == 1000:
total_repaired += len(batch)
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)
batch = []
set_of_update_fields.clear()

if batch:
total_repaired += len(batch)
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)

return total_repaired
21 changes: 20 additions & 1 deletion pulp_python/app/viewsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class PythonRepositoryViewSet(
],
},
{
"action": ["modify"],
"action": ["modify", "repair_metadata"],
"principal": "authenticated",
"effect": "allow",
"condition": [
Expand Down Expand Up @@ -122,6 +122,25 @@ class PythonRepositoryViewSet(
"python.pythonrepository_viewer": ["python.view_pythonrepository"],
}

@extend_schema(
summary="Repair metadata",
responses={202: AsyncOperationResponseSerializer},
)
@action(detail=True, methods=["post"], serializer_class=None)
def repair_metadata(self, request, pk):
"""
Trigger an asynchronous task to repair Python metadata. This task will repair metadata
of all packages for the specified `Repository`, without creating a new `RepositoryVersion`.
"""
repository = self.get_object()

result = dispatch(
tasks.repair,
exclusive_resources=[repository],
kwargs={"repository_pk": str(repository.pk)},
)
return core_viewsets.OperationPostponedResponse(result, request)

@extend_schema(
summary="Sync from remote",
responses={202: AsyncOperationResponseSerializer}
Expand Down
50 changes: 49 additions & 1 deletion pulp_python/tests/functional/api/test_repair.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import pytest
import subprocess
from urllib.parse import urljoin

from pulp_python.tests.functional.constants import PYTHON_EGG_FILENAME
from pulp_python.tests.functional.constants import (
PYTHON_EGG_FILENAME,
PYTHON_FIXTURES_URL,
)


@pytest.fixture
Expand Down Expand Up @@ -76,3 +80,47 @@ def test_metadata_repair_command(
assert content.packagetype == "sdist"
assert content.requires_python == "" # technically null
assert content.author == "Austin Macdonald"


def test_metadata_repair_endpoint(
create_content_direct,
download_python_file,
monitor_task,
move_to_repository,
python_bindings,
python_repo,
):
"""
Test repairing of package metadata via `Repositories.repair_metadata` endpoint.
"""
python_egg_filename = "scipy-1.1.0.tar.gz"
python_egg_url = urljoin(
urljoin(PYTHON_FIXTURES_URL, "packages/"), python_egg_filename
)
python_file = download_python_file(python_egg_filename, python_egg_url)

data = {
"name": "scipy",
# Wrong metadata
"author": "ME",
"packagetype": "bdist",
"requires_python": ">=3.8",
"version": "0.2",
}
content = create_content_direct(python_file, python_egg_filename, data)
for field, wrong_value in data.items():
if field == "python_version":
continue
assert getattr(content, field) == wrong_value
move_to_repository(python_repo.pulp_href, [content.pulp_href])

response = python_bindings.RepositoriesPythonApi.repair_metadata(
python_repo.pulp_href
)
monitor_task(response.task)

content = python_bindings.ContentPackagesApi.read(content.pulp_href)
assert content.version == "1.1.0"
assert content.packagetype == "sdist"
assert content.requires_python == ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*"
assert content.author == ""