diff --git a/CHANGES/+attestations.feature b/CHANGES/+attestations.feature new file mode 100644 index 00000000..cb59ba18 --- /dev/null +++ b/CHANGES/+attestations.feature @@ -0,0 +1 @@ +Added the ability to upload PEP 740 Provenance files to repositories. diff --git a/pulp_python/app/migrations/0018_packageprovenance.py b/pulp_python/app/migrations/0018_packageprovenance.py new file mode 100644 index 00000000..2c172f70 --- /dev/null +++ b/pulp_python/app/migrations/0018_packageprovenance.py @@ -0,0 +1,54 @@ +# Generated by Django 4.2.26 on 2025-11-10 09:11 + +from django.db import migrations, models +import django.db.models.deletion +import pulpcore.app.util + + +class Migration(migrations.Migration): + + dependencies = [ + ("python", "0017_pythonpackagecontent_size"), + ] + + operations = [ + migrations.CreateModel( + name="PackageProvenance", + fields=[ + ( + "content_ptr", + models.OneToOneField( + auto_created=True, + on_delete=django.db.models.deletion.CASCADE, + parent_link=True, + primary_key=True, + serialize=False, + to="core.content", + ), + ), + ("provenance", models.JSONField()), + ("sha256", models.CharField(max_length=64)), + ( + "_pulp_domain", + models.ForeignKey( + default=pulpcore.app.util.get_domain_pk, + on_delete=django.db.models.deletion.PROTECT, + to="core.domain", + ), + ), + ( + "package", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="provenances", + to="python.pythonpackagecontent", + ), + ), + ], + options={ + "default_related_name": "%(app_label)s_%(model_name)s", + "unique_together": {("sha256", "_pulp_domain")}, + }, + bases=("core.content",), + ), + ] diff --git a/pulp_python/app/models.py b/pulp_python/app/models.py index d554297f..4361489d 100644 --- a/pulp_python/app/models.py +++ b/pulp_python/app/models.py @@ -1,3 +1,5 @@ +import hashlib +import json from logging import getLogger from aiohttp.web import json_response @@ -5,6 +7,10 @@ from django.core.exceptions import ObjectDoesNotExist from django.db import models from django.conf import settings +from django_lifecycle import ( + BEFORE_SAVE, + hook, +) from pulpcore.plugin.models import ( AutoAddObjPermsMixin, Content, @@ -15,6 +21,7 @@ ) from pulpcore.plugin.responses import ArtifactResponse +from pypi_attestations import Provenance from pathlib import PurePath from .utils import ( artifact_to_python_content_data, @@ -235,6 +242,43 @@ class Meta: ] +class PackageProvenance(Content): + """ + PEP 740 provenance objects. + """ + + TYPE = "provenance" + repo_key_fields = ("package_id",) + + package = models.ForeignKey( + PythonPackageContent, on_delete=models.CASCADE, related_name="provenances" + ) + provenance = models.JSONField(null=False) + sha256 = models.CharField(max_length=64, null=False) + + _pulp_domain = models.ForeignKey("core.Domain", default=get_domain_pk, on_delete=models.PROTECT) + + @staticmethod + def calculate_sha256(provenance): + """Calculates the sha256 from the provenance.""" + provenance_json = json.dumps(provenance, sort_keys=True).encode("utf-8") + hasher = hashlib.sha256(provenance_json) + return hasher.hexdigest() + + @hook(BEFORE_SAVE) + def set_sha256_hook(self): + """Ensure that sha256 is set before saving.""" + self.sha256 = self.calculate_sha256(self.provenance) + + @property + def as_model(self): + return Provenance.model_validate(self.provenance) + + class Meta: + default_related_name = "%(app_label)s_%(model_name)s" + unique_together = ("sha256", "_pulp_domain") + + class PythonPublication(Publication, AutoAddObjPermsMixin): """ A Publication for PythonContent. @@ -295,7 +339,7 @@ class PythonRepository(Repository, AutoAddObjPermsMixin): """ TYPE = "python" - CONTENT_TYPES = [PythonPackageContent] + CONTENT_TYPES = [PythonPackageContent, PackageProvenance] REMOTE_TYPES = [PythonRemote] PULL_THROUGH_SUPPORTED = True diff --git a/pulp_python/app/serializers.py b/pulp_python/app/serializers.py index 6a0e974e..a16310fe 100644 --- a/pulp_python/app/serializers.py +++ b/pulp_python/app/serializers.py @@ -5,6 +5,8 @@ from django.db.utils import IntegrityError from packaging.requirements import Requirement from rest_framework import serializers +from pydantic import ValidationError +from pypi_attestations import Distribution, Provenance, VerificationError from pulpcore.plugin import models as core_models from pulpcore.plugin import serializers as core_serializers @@ -464,6 +466,65 @@ class Meta: model = python_models.PythonPackageContent +class PackageProvenanceSerializer(core_serializers.NoArtifactContentUploadSerializer): + """ + A Serializer for PackageProvenance. + """ + + package = core_serializers.DetailRelatedField( + help_text=_("The package that the provenance is for."), + view_name_pattern=r"content(-.*/.*)-detail", + queryset=python_models.PythonPackageContent.objects.all(), + ) + provenance = serializers.JSONField(read_only=True, default=dict) + sha256 = serializers.CharField(read_only=True) + verify = serializers.BooleanField( + default=True, + write_only=True, + help_text=_("Verify each attestation in the provenance."), + ) + + def deferred_validate(self, data): + """ + Validate that the provenance is valid and pointing to the correct package. + """ + data = super().deferred_validate(data) + try: + provenance = Provenance.model_validate_json(data["file"].read()) + data["provenance"] = provenance.model_dump(mode="json") + except ValidationError as e: + raise serializers.ValidationError( + _("The uploaded provenance is not valid: {}".format(e)) + ) + if data.pop("verify"): + dist = Distribution(name=data["package"].filename, digest=data["package"].sha256) + try: + for attestation_bundle in provenance.attestation_bundles: + publisher = attestation_bundle.publisher + policy = publisher._as_policy() + for attestation in attestation_bundle.attestations: + attestation.verify(policy, dist) + except VerificationError as e: + raise serializers.ValidationError(_("Provenance verification failed: {}".format(e))) + return data + + def retrieve(self, validated_data): + sha256 = python_models.PackageProvenance.calculate_sha256(validated_data["provenance"]) + content = python_models.PackageProvenance.objects.filter( + sha256=sha256, _pulp_domain=get_domain() + ).first() + return content + + class Meta: + fields = core_serializers.NoArtifactContentUploadSerializer.Meta.fields + ( + "package", + "provenance", + "sha256", + "verify", + ) + model = python_models.PackageProvenance + + class MultipleChoiceArrayField(serializers.MultipleChoiceField): """ A wrapper to make sure this DRF serializer works properly with ArrayFields. diff --git a/pulp_python/app/viewsets.py b/pulp_python/app/viewsets.py index e303ca44..f5d34f06 100644 --- a/pulp_python/app/viewsets.py +++ b/pulp_python/app/viewsets.py @@ -428,6 +428,39 @@ def upload(self, request): return Response(serializer.data, status=status.HTTP_201_CREATED, headers=headers) +class PackageProvenanceViewSet(core_viewsets.NoArtifactContentUploadViewSet): + """ + PackageProvenance represents a PEP 740 provenance object for a Python package. + + Use ?minimal=true to get a human readable representation of the provenance. + """ + + endpoint_name = "provenance" + queryset = python_models.PackageProvenance.objects.all() + serializer_class = python_serializers.PackageProvenanceSerializer + + DEFAULT_ACCESS_POLICY = { + "statements": [ + { + "action": ["list", "retrieve"], + "principal": "authenticated", + "effect": "allow", + }, + { + "action": ["create"], + "principal": "authenticated", + "effect": "allow", + "condition": [ + "has_required_repo_perms_on_upload:python.modify_pythonrepository", + "has_required_repo_perms_on_upload:python.view_pythonrepository", + "has_upload_param_model_or_domain_or_obj_perms:core.change_upload", + ], + }, + ], + "queryset_scoping": {"function": "scope_queryset"}, + } + + class PythonRemoteViewSet(core_viewsets.RemoteViewSet, core_viewsets.RolesMixin): """ diff --git a/pulp_python/tests/functional/api/test_attestations.py b/pulp_python/tests/functional/api/test_attestations.py new file mode 100644 index 00000000..4695e293 --- /dev/null +++ b/pulp_python/tests/functional/api/test_attestations.py @@ -0,0 +1,62 @@ +import pytest +import requests + +from pypi_simple import PyPISimple + +from pulpcore.tests.functional.utils import PulpTaskError + + +@pytest.mark.parallel +def test_crd_provenance(python_bindings, python_content_factory, monitor_task): + """ + Test creating and reading a provenance. + """ + filename = "twine-6.2.0-py3-none-any.whl" + with PyPISimple() as client: + page = client.get_project_page("twine") + for package in page.packages: + if package.filename == filename: + content = python_content_factory(filename, url=package.url) + break + provenance = python_bindings.ContentProvenanceApi.create( + package=content.pulp_href, + file_url=package.provenance_url, + ) + task = monitor_task(provenance.task) + provenance = python_bindings.ContentProvenanceApi.read(task.created_resources[0]) + assert provenance.package == content.pulp_href + r = requests.get(package.provenance_url) + assert r.status_code == 200 + assert r.json() == provenance.provenance + + +@pytest.mark.parallel +def test_verify_provenance(python_bindings, python_content_factory, monitor_task): + """ + Test verifying a provenance. + """ + filename = "twine-6.2.0.tar.gz" + with PyPISimple() as client: + page = client.get_project_page("twine") + for package in page.packages: + if package.filename == filename: + break + wrong_content = python_content_factory() # shelf-reader-0.1.tar.gz + provenance = python_bindings.ContentProvenanceApi.create( + package=wrong_content.pulp_href, + file_url=package.provenance_url, + ) + with pytest.raises(PulpTaskError) as e: + monitor_task(provenance.task) + assert e.value.task.state == "failed" + assert "twine-6.2.0.tar.gz != shelf-reader-0.1.tar.gz" in e.value.task.error["description"] + + # Test creating a provenance without verifying + provenance = python_bindings.ContentProvenanceApi.create( + package=wrong_content.pulp_href, + file_url=package.provenance_url, + verify=False, + ) + task = monitor_task(provenance.task) + provenance = python_bindings.ContentProvenanceApi.read(task.created_resources[0]) + assert provenance.package == wrong_content.pulp_href diff --git a/pyproject.toml b/pyproject.toml index 3d069a6c..78de0879 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,8 @@ dependencies = [ "pulpcore>=3.85.3,<3.100", "pkginfo>=1.12.0,<1.13.0", "bandersnatch>=6.6.0,<6.7", - "pypi-simple>=1.5.0,<2.0", + "pypi-simple>=1.8.0,<2.0", + "pypi-attestations==0.0.28", # API is not stable ] [project.urls]