diff --git a/CHANGES/625.feature b/CHANGES/625.feature
new file mode 100644
index 00000000..38928e60
--- /dev/null
+++ b/CHANGES/625.feature
@@ -0,0 +1 @@
+Added JSON-based Simple API (PEP 691).
diff --git a/pulp_python/app/migrations/0016_pythonpackagecontent_metadata_sha256.py b/pulp_python/app/migrations/0016_pythonpackagecontent_metadata_sha256.py
new file mode 100644
index 00000000..0dc68d31
--- /dev/null
+++ b/pulp_python/app/migrations/0016_pythonpackagecontent_metadata_sha256.py
@@ -0,0 +1,18 @@
+# Generated by Django 4.2.25 on 2025-11-04 07:34
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("python", "0015_alter_pythonpackagecontent_options"),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name="pythonpackagecontent",
+ name="metadata_sha256",
+ field=models.CharField(max_length=64, null=True),
+ ),
+ ]
diff --git a/pulp_python/app/models.py b/pulp_python/app/models.py
index 3bd9d605..14059397 100644
--- a/pulp_python/app/models.py
+++ b/pulp_python/app/models.py
@@ -192,6 +192,8 @@ class PythonPackageContent(Content):
packagetype = models.TextField(choices=PACKAGE_TYPES)
python_version = models.TextField()
sha256 = models.CharField(db_index=True, max_length=64)
+ metadata_sha256 = models.CharField(max_length=64, null=True)
+ # yanked and yanked_reason are not implemented because they are mutable
# From pulpcore
PROTECTED_FROM_RECLAIM = False
diff --git a/pulp_python/app/pypi/views.py b/pulp_python/app/pypi/views.py
index bd8bc2af..c0b636bb 100644
--- a/pulp_python/app/pypi/views.py
+++ b/pulp_python/app/pypi/views.py
@@ -3,7 +3,9 @@
from aiohttp.client_exceptions import ClientError
from rest_framework.viewsets import ViewSet
+from rest_framework.renderers import BrowsableAPIRenderer, JSONRenderer, TemplateHTMLRenderer
from rest_framework.response import Response
+from rest_framework.exceptions import NotAcceptable
from django.core.exceptions import ObjectDoesNotExist
from django.shortcuts import redirect
from datetime import datetime, timezone, timedelta
@@ -43,7 +45,9 @@
)
from pulp_python.app.utils import (
write_simple_index,
+ write_simple_index_json,
write_simple_detail,
+ write_simple_detail_json,
python_content_to_json,
PYPI_LAST_SERIAL,
PYPI_SERIAL_CONSTANT,
@@ -57,6 +61,17 @@
ORIGIN_HOST = settings.CONTENT_ORIGIN if settings.CONTENT_ORIGIN else settings.PYPI_API_HOSTNAME
BASE_CONTENT_URL = urljoin(ORIGIN_HOST, settings.CONTENT_PATH_PREFIX)
+PYPI_SIMPLE_V1_HTML = "application/vnd.pypi.simple.v1+html"
+PYPI_SIMPLE_V1_JSON = "application/vnd.pypi.simple.v1+json"
+
+
+class PyPISimpleHTMLRenderer(TemplateHTMLRenderer):
+ media_type = PYPI_SIMPLE_V1_HTML
+
+
+class PyPISimpleJSONRenderer(JSONRenderer):
+ media_type = PYPI_SIMPLE_V1_JSON
+
class PyPIMixin:
"""Mixin to get index specific info."""
@@ -235,6 +250,25 @@ class SimpleView(PackageUploadMixin, ViewSet):
],
}
+ def perform_content_negotiation(self, request, force=False):
+ """
+ Uses standard content negotiation, defaulting to HTML if no acceptable renderer is found.
+ """
+ try:
+ return super().perform_content_negotiation(request, force)
+ except NotAcceptable:
+ return TemplateHTMLRenderer(), TemplateHTMLRenderer.media_type # text/html
+
+ def get_renderers(self):
+ """
+ Uses custom renderers for PyPI Simple API endpoints, defaulting to standard ones.
+ """
+ if self.action in ["list", "retrieve"]:
+ # Ordered by priority if multiple content types are present
+ return [TemplateHTMLRenderer(), PyPISimpleHTMLRenderer(), PyPISimpleJSONRenderer()]
+ else:
+ return [JSONRenderer(), BrowsableAPIRenderer()]
+
@extend_schema(summary="Get index simple page")
def list(self, request, path):
"""Gets the simple api html page for the index."""
@@ -242,9 +276,18 @@ def list(self, request, path):
if self.should_redirect(repo_version=repo_version):
return redirect(urljoin(self.base_content_url, f"{path}/simple/"))
names = content.order_by("name").values_list("name", flat=True).distinct().iterator()
- return StreamingHttpResponse(write_simple_index(names, streamed=True))
+ media_type = request.accepted_renderer.media_type
+ headers = {"X-PyPI-Last-Serial": str(PYPI_SERIAL_CONSTANT)}
+
+ if media_type == PYPI_SIMPLE_V1_JSON:
+ index_data = write_simple_index_json(names)
+ return Response(index_data, headers=headers)
+ else:
+ index_data = write_simple_index(names, streamed=True)
+ kwargs = {"content_type": media_type, "headers": headers}
+ return StreamingHttpResponse(index_data, **kwargs)
- def pull_through_package_simple(self, package, path, remote):
+ def pull_through_package_simple(self, package, path, remote, media_type):
"""Gets the package's simple page from remote."""
def parse_package(release_package):
@@ -252,7 +295,13 @@ def parse_package(release_package):
stripped_url = urlunsplit(chain(parsed[:3], ("", "")))
redirect_path = f"{path}/{release_package.filename}?redirect={stripped_url}"
d_url = urljoin(self.base_content_url, redirect_path)
- return release_package.filename, d_url, release_package.digests.get("sha256", "")
+ return {
+ "filename": release_package.filename,
+ "url": d_url,
+ "sha256": release_package.digests.get("sha256", ""),
+ "requires_python": release_package.requires_python,
+ "metadata_sha256": (release_package.metadata_digests or {}).get("sha256"),
+ }
rfilter = get_remote_package_filter(remote)
if not rfilter.filter_project(package):
@@ -269,28 +318,40 @@ def parse_package(release_package):
except TimeoutException:
return HttpResponse(f"{remote.url} timed out while fetching {package}.", status=504)
- if d.headers["content-type"] == "application/vnd.pypi.simple.v1+json":
+ if d.headers["content-type"] == PYPI_SIMPLE_V1_JSON:
page = ProjectPage.from_json_data(json.load(open(d.path, "rb")), base_url=url)
else:
page = ProjectPage.from_html(package, open(d.path, "rb").read(), base_url=url)
packages = [
parse_package(p) for p in page.packages if rfilter.filter_release(package, p.version)
]
- return HttpResponse(write_simple_detail(package, packages))
+ headers = {"X-PyPI-Last-Serial": str(PYPI_SERIAL_CONSTANT)}
+
+ if media_type == PYPI_SIMPLE_V1_JSON:
+ detail_data = write_simple_detail_json(package, packages)
+ return Response(detail_data, headers=headers)
+ else:
+ detail_data = write_simple_detail(package, packages)
+ kwargs = {"content_type": media_type, "headers": headers}
+ return HttpResponse(detail_data, **kwargs)
@extend_schema(operation_id="pypi_simple_package_read", summary="Get package simple page")
def retrieve(self, request, path, package):
- """Retrieves the simple api html page for a package."""
+ """Retrieves the simple api html/json page for a package."""
+ media_type = request.accepted_renderer.media_type
+
repo_ver, content = self.get_rvc()
# Should I redirect if the normalized name is different?
normalized = canonicalize_name(package)
if self.distribution.remote:
- return self.pull_through_package_simple(normalized, path, self.distribution.remote)
+ return self.pull_through_package_simple(
+ normalized, path, self.distribution.remote, media_type
+ )
if self.should_redirect(repo_version=repo_ver):
return redirect(urljoin(self.base_content_url, f"{path}/simple/{normalized}/"))
packages = (
content.filter(name__normalize=normalized)
- .values_list("filename", "sha256", "name")
+ .values_list("filename", "sha256", "name", "metadata_sha256", "requires_python")
.iterator()
)
try:
@@ -300,8 +361,26 @@ def retrieve(self, request, path, package):
else:
packages = chain([present], packages)
name = present[2]
- releases = ((f, urljoin(self.base_content_url, f"{path}/{f}"), d) for f, d, _ in packages)
- return StreamingHttpResponse(write_simple_detail(name, releases, streamed=True))
+ releases = (
+ {
+ "filename": filename,
+ "url": urljoin(self.base_content_url, f"{path}/{filename}"),
+ "sha256": sha256,
+ "metadata_sha256": metadata_sha256,
+ "requires_python": requires_python,
+ }
+ for filename, sha256, _, metadata_sha256, requires_python in packages
+ )
+ media_type = request.accepted_renderer.media_type
+ headers = {"X-PyPI-Last-Serial": str(PYPI_SERIAL_CONSTANT)}
+
+ if media_type == PYPI_SIMPLE_V1_JSON:
+ detail_data = write_simple_detail_json(name, releases)
+ return Response(detail_data, headers=headers)
+ else:
+ detail_data = write_simple_detail(name, releases, streamed=True)
+ kwargs = {"content_type": media_type, "headers": headers}
+ return StreamingHttpResponse(detail_data, **kwargs)
@extend_schema(
request=PackageUploadSerializer,
diff --git a/pulp_python/app/serializers.py b/pulp_python/app/serializers.py
index d8387adf..2ba95ba4 100644
--- a/pulp_python/app/serializers.py
+++ b/pulp_python/app/serializers.py
@@ -281,6 +281,11 @@ class PythonPackageContentSerializer(core_serializers.SingleArtifactContentUploa
default="",
help_text=_("The SHA256 digest of this package."),
)
+ metadata_sha256 = serializers.CharField(
+ required=False,
+ allow_null=True,
+ help_text=_("The SHA256 digest of the package's METADATA file."),
+ )
def deferred_validate(self, data):
"""
@@ -364,6 +369,7 @@ class Meta:
"packagetype",
"python_version",
"sha256",
+ "metadata_sha256",
)
model = python_models.PythonPackageContent
diff --git a/pulp_python/app/tasks/publish.py b/pulp_python/app/tasks/publish.py
index 3ab44501..39102eb5 100644
--- a/pulp_python/app/tasks/publish.py
+++ b/pulp_python/app/tasks/publish.py
@@ -101,7 +101,7 @@ def write_simple_api(publication):
relative_path = release["filename"]
path = f"../../{relative_path}"
checksum = release["sha256"]
- package_releases.append((relative_path, path, checksum))
+ package_releases.append({"filename": relative_path, "url": path, "sha256": checksum})
# Write the final project's page
write_project_page(
name=canonicalize_name(current_name),
diff --git a/pulp_python/app/utils.py b/pulp_python/app/utils.py
index 533caba8..365de503 100644
--- a/pulp_python/app/utils.py
+++ b/pulp_python/app/utils.py
@@ -1,7 +1,9 @@
+import hashlib
import pkginfo
import re
import shutil
import tempfile
+import zipfile
import json
from collections import defaultdict
from django.conf import settings
@@ -16,15 +18,17 @@
"""TODO This serial constant is temporary until Python repositories implements serials"""
PYPI_SERIAL_CONSTANT = 1000000000
+SIMPLE_API_VERSION = "1.0"
+
simple_index_template = """
Simple Index
-
+
{% for name, canonical_name in projects %}
- {{ name }}
+ {{ name }}
{% endfor %}
@@ -32,16 +36,16 @@
simple_detail_template = """
-
- Links for {{ project_name }}
-
-
-
+
+ Links for {{ project_name }}
+
+
+
Links for {{ project_name }}
- {% for name, path, sha256 in project_packages %}
- {{ name }}
+ {% for pkg in project_packages %}
+ {{ pkg.filename }}
{% endfor %}
-
+
"""
@@ -128,6 +132,7 @@ def parse_project_metadata(project):
# Release metadata
"packagetype": project.get("packagetype") or "",
"python_version": project.get("python_version") or "",
+ "metadata_sha256": project.get("metadata_sha256"),
}
@@ -154,10 +159,8 @@ def parse_metadata(project, version, distribution):
package["version"] = version
package["url"] = distribution.get("url") or ""
package["sha256"] = distribution.get("digests", {}).get("sha256") or ""
- package["python_version"] = distribution.get("python_version") or package.get("python_version")
- package["requires_python"] = distribution.get("requires_python") or package.get(
- "requires_python"
- ) # noqa: E501
+ package["python_version"] = distribution.get("python_version") or ""
+ package["requires_python"] = distribution.get("requires_python") or ""
return package
@@ -175,6 +178,7 @@ def get_project_metadata_from_file(filename):
packagetype = DIST_EXTENSIONS[extensions[pkg_type_index]]
metadata = DIST_TYPES[packagetype](filename)
+ metadata.metadata_sha256 = compute_metadata_sha256(filename)
metadata.packagetype = packagetype
if packagetype == "sdist":
metadata.python_version = "source"
@@ -187,6 +191,25 @@ def get_project_metadata_from_file(filename):
return metadata
+def compute_metadata_sha256(filename: str) -> str | None:
+ """
+ Compute SHA256 hash of the metadata file from a Python package.
+
+ Returns SHA256 hash or None if metadata cannot be extracted.
+ """
+ if not filename.endswith(".whl"):
+ return None
+ try:
+ with zipfile.ZipFile(filename, "r") as f:
+ for file_path in f.namelist():
+ if file_path.endswith(".dist-info/METADATA"):
+ metadata_content = f.read(file_path)
+ return hashlib.sha256(metadata_content).hexdigest()
+ except (zipfile.BadZipFile, KeyError, OSError):
+ pass
+ return None
+
+
def artifact_to_python_content_data(filename, artifact, domain=None):
"""
Takes the artifact/filename and returns the metadata needed to create a PythonPackageContent.
@@ -403,17 +426,65 @@ def find_artifact():
def write_simple_index(project_names, streamed=False):
"""Writes the simple index."""
simple = Template(simple_index_template)
- context = {"projects": ((x, canonicalize_name(x)) for x in project_names)}
+ context = {
+ "SIMPLE_API_VERSION": SIMPLE_API_VERSION,
+ "projects": ((x, canonicalize_name(x)) for x in project_names),
+ }
return simple.stream(**context) if streamed else simple.render(**context)
def write_simple_detail(project_name, project_packages, streamed=False):
"""Writes the simple detail page of a package."""
detail = Template(simple_detail_template)
- context = {"project_name": project_name, "project_packages": project_packages}
+ context = {
+ "SIMPLE_API_VERSION": SIMPLE_API_VERSION,
+ "project_name": project_name,
+ "project_packages": project_packages,
+ }
return detail.stream(**context) if streamed else detail.render(**context)
+def write_simple_index_json(project_names):
+ """Writes the simple index in JSON format."""
+ return {
+ "meta": {"api-version": SIMPLE_API_VERSION, "_last-serial": PYPI_SERIAL_CONSTANT},
+ "projects": [
+ {"name": name, "_last-serial": PYPI_SERIAL_CONSTANT} for name in project_names
+ ],
+ }
+
+
+def write_simple_detail_json(project_name, project_packages):
+ """Writes the simple detail page in JSON format."""
+ return {
+ "meta": {"api-version": SIMPLE_API_VERSION, "_last-serial": PYPI_SERIAL_CONSTANT},
+ "name": canonicalize_name(project_name),
+ "files": [
+ {
+ # v1.0, PEP 691
+ "filename": package["filename"],
+ "url": package["url"],
+ "hashes": {"sha256": package["sha256"]},
+ "requires-python": package["requires_python"] or None,
+ # data-dist-info-metadata is deprecated alias for core-metadata
+ "data-dist-info-metadata": (
+ {"sha256": package["metadata_sha256"]} if package["metadata_sha256"] else False
+ ),
+ # yanked and yanked_reason are not implemented because they are mutable
+ # TODO in the future:
+ # size, upload-time (v1.1, PEP 700)
+ # core-metadata (PEP 7.14)
+ # provenance (v1.3, PEP 740)
+ }
+ for package in project_packages
+ ],
+ # TODO in the future:
+ # versions (v1.1, PEP 700)
+ # alternate-locations (v1.2, PEP 708)
+ # project-status (v1.4, PEP 792 - pypi and docs differ)
+ }
+
+
class PackageIncludeFilter:
"""A special class to help filter Package's based on a remote's include/exclude"""
diff --git a/pulp_python/tests/functional/api/test_full_mirror.py b/pulp_python/tests/functional/api/test_full_mirror.py
index b2e9b404..f03ae475 100644
--- a/pulp_python/tests/functional/api/test_full_mirror.py
+++ b/pulp_python/tests/functional/api/test_full_mirror.py
@@ -58,6 +58,24 @@ def test_pull_through_simple(python_remote_factory, python_distribution_factory,
assert PYTHON_XS_FIXTURE_CHECKSUMS[package.filename] == package.digests["sha256"]
+@pytest.mark.parallel
+@pytest.mark.parametrize("media_type", ["application/vnd.pypi.simple.v1+json", "text/html"])
+def test_pull_through_simple_media_types(
+ media_type, python_remote_factory, python_distribution_factory
+):
+ """Tests pull-through with different media types (JSON and HTML)."""
+ remote = python_remote_factory(url=PYPI_URL, includes=["shelf-reader"])
+ distro = python_distribution_factory(remote=remote.pulp_href)
+
+ url = f"{distro.base_url}simple/shelf-reader/"
+ headers = {"Accept": media_type}
+ response = requests.get(url, headers=headers)
+
+ assert response.status_code == 200
+ assert media_type in response.headers["Content-Type"]
+ assert "X-PyPI-Last-Serial" in response.headers
+
+
@pytest.mark.parallel
def test_pull_through_filter(python_remote_factory, python_distribution_factory):
"""Tests that pull-through respects the includes/excludes filter on the remote."""
@@ -66,7 +84,7 @@ def test_pull_through_filter(python_remote_factory, python_distribution_factory)
r = requests.get(f"{distro.base_url}simple/pulpcore/")
assert r.status_code == 404
- assert r.json() == {"detail": "pulpcore does not exist."}
+ assert r.text == "404 Not Found"
r = requests.get(f"{distro.base_url}simple/shelf-reader/")
assert r.status_code == 200
@@ -86,7 +104,7 @@ def test_pull_through_filter(python_remote_factory, python_distribution_factory)
r = requests.get(f"{distro.base_url}simple/django/")
assert r.status_code == 404
- assert r.json() == {"detail": "django does not exist."}
+ assert r.text == "404 Not Found"
r = requests.get(f"{distro.base_url}simple/pulpcore/")
assert r.status_code == 502
diff --git a/pulp_python/tests/functional/api/test_pypi_simple_json_api.py b/pulp_python/tests/functional/api/test_pypi_simple_json_api.py
new file mode 100644
index 00000000..b8c7aa3a
--- /dev/null
+++ b/pulp_python/tests/functional/api/test_pypi_simple_json_api.py
@@ -0,0 +1,126 @@
+from urllib.parse import urljoin
+
+import pytest
+import requests
+
+from pulp_python.tests.functional.constants import (
+ PYTHON_EGG_FILENAME,
+ PYTHON_EGG_URL,
+ PYTHON_SM_PROJECT_SPECIFIER,
+ PYTHON_WHEEL_FILENAME,
+ PYTHON_WHEEL_URL,
+)
+
+API_VERSION = "1.0"
+PYPI_SERIAL_CONSTANT = 1000000000
+
+PYPI_TEXT_HTML = "text/html"
+PYPI_SIMPLE_V1_HTML = "application/vnd.pypi.simple.v1+html"
+PYPI_SIMPLE_V1_JSON = "application/vnd.pypi.simple.v1+json"
+
+
+@pytest.mark.parallel
+def test_simple_json_index_api(
+ python_remote_factory, python_repo_with_sync, python_distribution_factory
+):
+ remote = python_remote_factory(includes=PYTHON_SM_PROJECT_SPECIFIER)
+ repo = python_repo_with_sync(remote)
+ distro = python_distribution_factory(repository=repo)
+
+ url = urljoin(distro.base_url, "simple/")
+ headers = {"Accept": PYPI_SIMPLE_V1_JSON}
+
+ response = requests.get(url, headers=headers)
+ assert response.headers["Content-Type"] == PYPI_SIMPLE_V1_JSON
+ assert response.headers["X-PyPI-Last-Serial"] == str(PYPI_SERIAL_CONSTANT)
+
+ data = response.json()
+ assert data["meta"] == {"api-version": API_VERSION, "_last-serial": PYPI_SERIAL_CONSTANT}
+ assert data["projects"]
+ for project in data["projects"]:
+ for i in ["_last-serial", "name"]:
+ assert i in project
+
+
+def test_simple_json_detail_api(
+ delete_orphans_pre,
+ monitor_task,
+ python_bindings,
+ python_content_factory,
+ python_distribution_factory,
+ python_repo_factory,
+):
+ content_1 = python_content_factory(PYTHON_WHEEL_FILENAME, url=PYTHON_WHEEL_URL)
+ content_2 = python_content_factory(PYTHON_EGG_FILENAME, url=PYTHON_EGG_URL)
+ body = {"add_content_units": [content_1.pulp_href, content_2.pulp_href]}
+
+ repo = python_repo_factory()
+ monitor_task(python_bindings.RepositoriesPythonApi.modify(repo.pulp_href, body).task)
+ distro = python_distribution_factory(repository=repo)
+
+ url = f'{urljoin(distro.base_url, "simple/")}shelf-reader'
+ headers = {"Accept": PYPI_SIMPLE_V1_JSON}
+
+ response = requests.get(url, headers=headers)
+ assert response.headers["Content-Type"] == PYPI_SIMPLE_V1_JSON
+ assert response.headers["X-PyPI-Last-Serial"] == str(PYPI_SERIAL_CONSTANT)
+
+ data = response.json()
+ assert data["meta"] == {"api-version": API_VERSION, "_last-serial": PYPI_SERIAL_CONSTANT}
+ assert data["name"] == "shelf-reader"
+ assert data["files"]
+
+ # Check data of a wheel
+ file_whl = next(
+ (i for i in data["files"] if i["filename"] == "shelf_reader-0.1-py2-none-any.whl"), None
+ )
+ assert file_whl is not None, "wheel file not found"
+ assert file_whl["url"]
+ assert file_whl["hashes"] == {
+ "sha256": "2eceb1643c10c5e4a65970baf63bde43b79cbdac7de81dae853ce47ab05197e9"
+ }
+ assert file_whl["requires-python"] is None
+ assert file_whl["data-dist-info-metadata"] == {
+ "sha256": "ed333f0db05d77e933a157b7225b403ada9a2f93318d77b41b662eba78bac350"
+ }
+
+ # Check data of a tarball
+ file_tar = next((i for i in data["files"] if i["filename"] == "shelf-reader-0.1.tar.gz"), None)
+ assert file_tar is not None, "tar file not found"
+ assert file_tar["url"]
+ assert file_tar["hashes"] == {
+ "sha256": "04cfd8bb4f843e35d51bfdef2035109bdea831b55a57c3e6a154d14be116398c"
+ }
+ assert file_tar["requires-python"] is None
+ assert file_tar["data-dist-info-metadata"] is False
+
+
+@pytest.mark.parallel
+@pytest.mark.parametrize(
+ "header, result",
+ [
+ (PYPI_TEXT_HTML, PYPI_TEXT_HTML),
+ (PYPI_SIMPLE_V1_HTML, PYPI_SIMPLE_V1_HTML),
+ (PYPI_SIMPLE_V1_JSON, PYPI_SIMPLE_V1_JSON),
+ # Follows defined ordering (html, pypi html, pypi json)
+ (f"{PYPI_SIMPLE_V1_JSON}, {PYPI_SIMPLE_V1_HTML}", PYPI_SIMPLE_V1_HTML),
+ # Everything else should be html
+ ("", PYPI_TEXT_HTML),
+ ("application/json", PYPI_TEXT_HTML),
+ ("sth/else", PYPI_TEXT_HTML),
+ ],
+)
+def test_simple_api_content_headers(
+ python_remote_factory, python_repo_with_sync, python_distribution_factory, header, result
+):
+ remote = python_remote_factory(includes=PYTHON_SM_PROJECT_SPECIFIER)
+ repo = python_repo_with_sync(remote)
+ distro = python_distribution_factory(repository=repo)
+
+ index_url = urljoin(distro.base_url, "simple/")
+ detail_url = f"{index_url}aiohttp"
+
+ for url in [index_url, detail_url]:
+ response = requests.get(url, headers={"Accept": header})
+ assert response.status_code == 200
+ assert result in response.headers["Content-Type"]