diff --git a/CHANGES/805.feature b/CHANGES/805.feature new file mode 100644 index 00000000..5b41d624 --- /dev/null +++ b/CHANGES/805.feature @@ -0,0 +1 @@ +Added new `repair_metadata` endpoint to `Repository` for fixing packages' metadata. diff --git a/pulp_python/app/tasks/__init__.py b/pulp_python/app/tasks/__init__.py index fb88725d..b40ec245 100644 --- a/pulp_python/app/tasks/__init__.py +++ b/pulp_python/app/tasks/__init__.py @@ -3,5 +3,6 @@ """ from .publish import publish # noqa:F401 +from .repair import repair # noqa:F401 from .sync import sync # noqa:F401 from .upload import upload, upload_group # noqa:F401 diff --git a/pulp_python/app/tasks/repair.py b/pulp_python/app/tasks/repair.py new file mode 100644 index 00000000..c1fa6a71 --- /dev/null +++ b/pulp_python/app/tasks/repair.py @@ -0,0 +1,89 @@ +import logging +import uuid +from gettext import gettext as _ + +from django.db.models.query import QuerySet +from pulpcore.plugin.models import ProgressReport +from pulpcore.plugin.util import get_domain + +from pulp_python.app.models import PythonPackageContent, PythonRepository +from pulp_python.app.utils import artifact_to_python_content_data + +log = logging.getLogger(__name__) + + +def repair(repository_pk: uuid.UUID) -> None: + """ + Repairs metadata of all packages for the specified repository. + + Args: + repository_pk (uuid.UUID): The primary key of the repository to repair. + + Returns: + None + """ + repository = PythonRepository.objects.get(pk=repository_pk) + + log.info( + _( + "Repairing packages' metadata for the latest version of repository {}." + ).format(repository.name) + ) + content_set = repository.latest_version().content.values_list("pk", flat=True) + content = PythonPackageContent.objects.filter(pk__in=content_set) + + num_repaired = repair_metadata(content) + log.info(_("{} packages' metadata repaired.").format(num_repaired)) + + +def repair_metadata(content: QuerySet[PythonPackageContent]) -> int: + """ + Repairs metadata for a queryset of PythonPackageContent objects + and updates the progress report. + + Args: + content (QuerySet[PythonPackageContent]): The queryset of items to repair. + + Returns: + int: The number of packages that were repaired. + """ + # TODO: Add on_demand content repair + immediate_content = content.filter(contentartifact__artifact__isnull=False) + domain = get_domain() + + batch = [] + set_of_update_fields = set() + total_repaired = 0 + + progress_report = ProgressReport( + message="Repairing packages' metadata", + code="repair.metadata", + total=immediate_content.count(), + ) + progress_report.save() + with progress_report: + for package in progress_report.iter( + immediate_content.prefetch_related("_artifacts").iterator(chunk_size=1000) + ): + new_data = artifact_to_python_content_data( + package.filename, package._artifacts.get(), domain + ) + changed = False + for field, value in new_data.items(): + if getattr(package, field) != value: + setattr(package, field, value) + set_of_update_fields.add(field) + changed = True + if changed: + batch.append(package) + if len(batch) == 1000: + total_repaired += len(batch) + PythonPackageContent.objects.bulk_update(batch, set_of_update_fields) + batch = [] + set_of_update_fields.clear() + + if batch: + total_repaired += len(batch) + PythonPackageContent.objects.bulk_update(batch, set_of_update_fields) + + return total_repaired diff --git a/pulp_python/app/viewsets.py b/pulp_python/app/viewsets.py index 596e76fb..7dccf239 100644 --- a/pulp_python/app/viewsets.py +++ b/pulp_python/app/viewsets.py @@ -83,7 +83,7 @@ class PythonRepositoryViewSet( ], }, { - "action": ["modify"], + "action": ["modify", "repair_metadata"], "principal": "authenticated", "effect": "allow", "condition": [ @@ -122,6 +122,25 @@ class PythonRepositoryViewSet( "python.pythonrepository_viewer": ["python.view_pythonrepository"], } + @extend_schema( + summary="Repair metadata", + responses={202: AsyncOperationResponseSerializer}, + ) + @action(detail=True, methods=["post"], serializer_class=None) + def repair_metadata(self, request, pk): + """ + Trigger an asynchronous task to repair Python metadata. This task will repair metadata + of all packages for the specified `Repository`, without creating a new `RepositoryVersion`. + """ + repository = self.get_object() + + result = dispatch( + tasks.repair, + exclusive_resources=[repository], + kwargs={"repository_pk": str(repository.pk)}, + ) + return core_viewsets.OperationPostponedResponse(result, request) + @extend_schema( summary="Sync from remote", responses={202: AsyncOperationResponseSerializer} diff --git a/pulp_python/tests/functional/api/test_repair.py b/pulp_python/tests/functional/api/test_repair.py index 792d49c2..4b2bce55 100644 --- a/pulp_python/tests/functional/api/test_repair.py +++ b/pulp_python/tests/functional/api/test_repair.py @@ -1,7 +1,11 @@ import pytest import subprocess +from urllib.parse import urljoin -from pulp_python.tests.functional.constants import PYTHON_EGG_FILENAME +from pulp_python.tests.functional.constants import ( + PYTHON_EGG_FILENAME, + PYTHON_FIXTURES_URL, +) @pytest.fixture @@ -76,3 +80,47 @@ def test_metadata_repair_command( assert content.packagetype == "sdist" assert content.requires_python == "" # technically null assert content.author == "Austin Macdonald" + + +def test_metadata_repair_endpoint( + create_content_direct, + download_python_file, + monitor_task, + move_to_repository, + python_bindings, + python_repo, +): + """ + Test repairing of package metadata via `Repositories.repair_metadata` endpoint. + """ + python_egg_filename = "scipy-1.1.0.tar.gz" + python_egg_url = urljoin( + urljoin(PYTHON_FIXTURES_URL, "packages/"), python_egg_filename + ) + python_file = download_python_file(python_egg_filename, python_egg_url) + + data = { + "name": "scipy", + # Wrong metadata + "author": "ME", + "packagetype": "bdist", + "requires_python": ">=3.8", + "version": "0.2", + } + content = create_content_direct(python_file, python_egg_filename, data) + for field, wrong_value in data.items(): + if field == "python_version": + continue + assert getattr(content, field) == wrong_value + move_to_repository(python_repo.pulp_href, [content.pulp_href]) + + response = python_bindings.RepositoriesPythonApi.repair_metadata( + python_repo.pulp_href + ) + monitor_task(response.task) + + content = python_bindings.ContentPackagesApi.read(content.pulp_href) + assert content.version == "1.1.0" + assert content.packagetype == "sdist" + assert content.requires_python == ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*" + assert content.author == ""