Skip to content

Commit 546e0ad

Browse files
committed
Add JSON-based Simple API
1 parent 12e605a commit 546e0ad

File tree

7 files changed

+254
-12
lines changed

7 files changed

+254
-12
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Generated by Django 4.2.25 on 2025-11-04 07:34
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("python", "0015_alter_pythonpackagecontent_options"),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name="pythonpackagecontent",
15+
name="metadata_sha256",
16+
field=models.CharField(max_length=64, null=True),
17+
),
18+
]

pulp_python/app/models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,8 @@ class PythonPackageContent(Content):
192192
packagetype = models.TextField(choices=PACKAGE_TYPES)
193193
python_version = models.TextField()
194194
sha256 = models.CharField(db_index=True, max_length=64)
195+
metadata_sha256 = models.CharField(max_length=64, null=True)
196+
# yanked and yanked_reason are not implemented because they are mutable
195197

196198
# From pulpcore
197199
PROTECTED_FROM_RECLAIM = False

pulp_python/app/pypi/views.py

Lines changed: 78 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33

44
from aiohttp.client_exceptions import ClientError
55
from rest_framework.viewsets import ViewSet
6+
from rest_framework.renderers import BrowsableAPIRenderer, JSONRenderer, TemplateHTMLRenderer
67
from rest_framework.response import Response
8+
from rest_framework.exceptions import NotAcceptable
79
from django.core.exceptions import ObjectDoesNotExist
810
from django.shortcuts import redirect
911
from datetime import datetime, timezone, timedelta
@@ -43,7 +45,9 @@
4345
)
4446
from pulp_python.app.utils import (
4547
write_simple_index,
48+
write_simple_index_json,
4649
write_simple_detail,
50+
write_simple_detail_json,
4751
python_content_to_json,
4852
PYPI_LAST_SERIAL,
4953
PYPI_SERIAL_CONSTANT,
@@ -57,6 +61,17 @@
5761
ORIGIN_HOST = settings.CONTENT_ORIGIN if settings.CONTENT_ORIGIN else settings.PYPI_API_HOSTNAME
5862
BASE_CONTENT_URL = urljoin(ORIGIN_HOST, settings.CONTENT_PATH_PREFIX)
5963

64+
PYPI_SIMPLE_V1_HTML = "application/vnd.pypi.simple.v1+html"
65+
PYPI_SIMPLE_V1_JSON = "application/vnd.pypi.simple.v1+json"
66+
67+
68+
class PyPISimpleHTMLRenderer(TemplateHTMLRenderer):
69+
media_type = PYPI_SIMPLE_V1_HTML
70+
71+
72+
class PyPISimpleJSONRenderer(JSONRenderer):
73+
media_type = PYPI_SIMPLE_V1_JSON
74+
6075

6176
class PyPIMixin:
6277
"""Mixin to get index specific info."""
@@ -235,14 +250,42 @@ class SimpleView(PackageUploadMixin, ViewSet):
235250
],
236251
}
237252

253+
def perform_content_negotiation(self, request, force=False):
254+
"""
255+
Uses standard content negotiation, defaulting to HTML if no acceptable renderer is found.
256+
"""
257+
try:
258+
return super().perform_content_negotiation(request, force)
259+
except NotAcceptable:
260+
return TemplateHTMLRenderer(), TemplateHTMLRenderer.media_type # text/html
261+
262+
def get_renderers(self):
263+
"""
264+
Uses custom renderers for PyPI Simple API endpoints, defaulting to standard ones.
265+
"""
266+
if self.action in ["list", "retrieve"]:
267+
# Ordered by priority if multiple content types are present
268+
return [TemplateHTMLRenderer(), PyPISimpleHTMLRenderer(), PyPISimpleJSONRenderer()]
269+
else:
270+
return [JSONRenderer(), BrowsableAPIRenderer()]
271+
238272
@extend_schema(summary="Get index simple page")
239273
def list(self, request, path):
240274
"""Gets the simple api html page for the index."""
241275
repo_version, content = self.get_rvc()
242276
if self.should_redirect(repo_version=repo_version):
243277
return redirect(urljoin(self.base_content_url, f"{path}/simple/"))
244278
names = content.order_by("name").values_list("name", flat=True).distinct().iterator()
245-
return StreamingHttpResponse(write_simple_index(names, streamed=True))
279+
media_type = request.accepted_renderer.media_type
280+
281+
if media_type == PYPI_SIMPLE_V1_JSON:
282+
index_data = write_simple_index_json(names)
283+
headers = {"X-PyPI-Last-Serial": str(PYPI_SERIAL_CONSTANT)}
284+
return Response(index_data, headers=headers)
285+
else:
286+
index_data = write_simple_index(names, streamed=True)
287+
kwargs = {"content_type": media_type}
288+
return StreamingHttpResponse(index_data, **kwargs)
246289

247290
def pull_through_package_simple(self, package, path, remote):
248291
"""Gets the package's simple page from remote."""
@@ -252,7 +295,12 @@ def parse_package(release_package):
252295
stripped_url = urlunsplit(chain(parsed[:3], ("", "")))
253296
redirect_path = f"{path}/{release_package.filename}?redirect={stripped_url}"
254297
d_url = urljoin(self.base_content_url, redirect_path)
255-
return release_package.filename, d_url, release_package.digests.get("sha256", "")
298+
return {
299+
"filename": release_package.filename,
300+
"url": d_url,
301+
"sha256": release_package.digests.get("sha256", ""),
302+
# todo: more fields?
303+
}
256304

257305
rfilter = get_remote_package_filter(remote)
258306
if not rfilter.filter_project(package):
@@ -269,7 +317,7 @@ def parse_package(release_package):
269317
except TimeoutException:
270318
return HttpResponse(f"{remote.url} timed out while fetching {package}.", status=504)
271319

272-
if d.headers["content-type"] == "application/vnd.pypi.simple.v1+json":
320+
if d.headers["content-type"] == PYPI_SIMPLE_V1_JSON:
273321
page = ProjectPage.from_json_data(json.load(open(d.path, "rb")), base_url=url)
274322
else:
275323
page = ProjectPage.from_html(package, open(d.path, "rb").read(), base_url=url)
@@ -290,7 +338,13 @@ def retrieve(self, request, path, package):
290338
return redirect(urljoin(self.base_content_url, f"{path}/simple/{normalized}/"))
291339
packages = (
292340
content.filter(name__normalize=normalized)
293-
.values_list("filename", "sha256", "name")
341+
.values_list(
342+
"filename",
343+
"sha256",
344+
"name",
345+
"metadata_sha256",
346+
"requires_python",
347+
)
294348
.iterator()
295349
)
296350
try:
@@ -300,8 +354,26 @@ def retrieve(self, request, path, package):
300354
else:
301355
packages = chain([present], packages)
302356
name = present[2]
303-
releases = ((f, urljoin(self.base_content_url, f"{path}/{f}"), d) for f, d, _ in packages)
304-
return StreamingHttpResponse(write_simple_detail(name, releases, streamed=True))
357+
releases = (
358+
{
359+
"filename": f,
360+
"url": urljoin(self.base_content_url, f"{path}/{f}"),
361+
"sha256": s,
362+
"metadata_sha256": ms,
363+
"requires_python": rp,
364+
}
365+
for f, s, _, ms, rp in packages
366+
)
367+
media_type = request.accepted_renderer.media_type
368+
369+
if media_type == PYPI_SIMPLE_V1_JSON:
370+
detail_data = write_simple_detail_json(name, releases)
371+
headers = {"X-PyPI-Last-Serial": str(PYPI_SERIAL_CONSTANT)}
372+
return Response(detail_data, headers=headers)
373+
else:
374+
detail_data = write_simple_detail(name, releases, streamed=True)
375+
kwargs = {"content_type": media_type}
376+
return StreamingHttpResponse(detail_data, kwargs)
305377

306378
@extend_schema(
307379
request=PackageUploadSerializer,

pulp_python/app/tasks/publish.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,13 @@ def write_simple_api(publication):
101101
relative_path = release["filename"]
102102
path = f"../../{relative_path}"
103103
checksum = release["sha256"]
104-
package_releases.append((relative_path, path, checksum))
104+
package_releases.append(
105+
{
106+
"filename": relative_path,
107+
"url": path,
108+
"sha256": checksum,
109+
}
110+
)
105111
# Write the final project's page
106112
write_project_page(
107113
name=canonicalize_name(current_name),

pulp_python/app/utils.py

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,13 @@
1616
"""TODO This serial constant is temporary until Python repositories implements serials"""
1717
PYPI_SERIAL_CONSTANT = 1000000000
1818

19+
SIMPLE_API_VERSION = "1.0"
20+
1921
simple_index_template = """<!DOCTYPE html>
2022
<html>
2123
<head>
2224
<title>Simple Index</title>
23-
<meta name="api-version" value="2" />
25+
<meta name="pypi:repository-version" content="{SIMPLE_API_VERSION}">
2426
</head>
2527
<body>
2628
{% for name, canonical_name in projects %}
@@ -38,8 +40,8 @@
3840
</head>
3941
<body>
4042
<h1>Links for {{ project_name }}</h1>
41-
{% for name, path, sha256 in project_packages %}
42-
<a href="{{ path }}#sha256={{ sha256 }}" rel="internal">{{ name }}</a><br/>
43+
{% for pkg in project_packages %}
44+
<a href="{{ pkg.url }}#sha256={{ pkg.sha256 }}" rel="internal">{{ pkg.filename }}</a><br/>
4345
{% endfor %}
4446
</body>
4547
</html>
@@ -128,6 +130,7 @@ def parse_project_metadata(project):
128130
# Release metadata
129131
"packagetype": project.get("packagetype") or "",
130132
"python_version": project.get("python_version") or "",
133+
"metadata_sha256": "", # TODO
131134
}
132135

133136

@@ -158,6 +161,9 @@ def parse_metadata(project, version, distribution):
158161
package["requires_python"] = distribution.get("requires_python") or package.get(
159162
"requires_python"
160163
) # noqa: E501
164+
package["metadata_sha256"] = distribution.get("data-dist-info-metadata", {}).get(
165+
"sha256"
166+
) or package.get("metadata_sha256")
161167

162168
return package
163169

@@ -414,6 +420,47 @@ def write_simple_detail(project_name, project_packages, streamed=False):
414420
return detail.stream(**context) if streamed else detail.render(**context)
415421

416422

423+
def write_simple_index_json(project_names):
424+
"""Writes the simple index in JSON format."""
425+
return {
426+
"meta": {"api-version": SIMPLE_API_VERSION, "_last-serial": PYPI_SERIAL_CONSTANT},
427+
"projects": [
428+
{"name": name, "_last-serial": PYPI_SERIAL_CONSTANT} for name in project_names
429+
],
430+
}
431+
432+
433+
def write_simple_detail_json(project_name, project_packages):
434+
"""Writes the simple detail page in JSON format."""
435+
return {
436+
"meta": {"api-version": SIMPLE_API_VERSION, "_last-serial": PYPI_SERIAL_CONSTANT},
437+
"name": canonicalize_name(project_name),
438+
"files": [
439+
{
440+
# v1.0, PEP 691
441+
"filename": package["filename"],
442+
"url": package["url"],
443+
"hashes": {"sha256": package["sha256"]},
444+
"requires_python": package["requires_python"] or None,
445+
# data-dist-info-metadata is deprecated alias for core-metadata
446+
"data-dist-info-metadata": (
447+
{"sha256": package["metadata_sha256"]} if package["metadata_sha256"] else False
448+
),
449+
# yanked and yanked_reason are not implemented because they are mutable
450+
# TODO in the future:
451+
# size, upload-time (v1.1, PEP 700)
452+
# core-metadata (PEP 7.14)
453+
# provenance (v1.3, PEP 740)
454+
}
455+
for package in project_packages
456+
],
457+
# TODO in the future:
458+
# versions (v1.1, PEP 700)
459+
# alternate-locations (v1.2, PEP 708)
460+
# project-status (v1.4, PEP 792 - pypi and docs differ)
461+
}
462+
463+
417464
class PackageIncludeFilter:
418465
"""A special class to help filter Package's based on a remote's include/exclude"""
419466

pulp_python/tests/functional/api/test_full_mirror.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def test_pull_through_filter(python_remote_factory, python_distribution_factory)
6666

6767
r = requests.get(f"{distro.base_url}simple/pulpcore/")
6868
assert r.status_code == 404
69-
assert r.json() == {"detail": "pulpcore does not exist."}
69+
assert r.text == "404 Not Found"
7070

7171
r = requests.get(f"{distro.base_url}simple/shelf-reader/")
7272
assert r.status_code == 200
@@ -86,7 +86,7 @@ def test_pull_through_filter(python_remote_factory, python_distribution_factory)
8686

8787
r = requests.get(f"{distro.base_url}simple/django/")
8888
assert r.status_code == 404
89-
assert r.json() == {"detail": "django does not exist."}
89+
assert r.text == "404 Not Found"
9090

9191
r = requests.get(f"{distro.base_url}simple/pulpcore/")
9292
assert r.status_code == 502
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
from urllib.parse import urljoin
2+
3+
import pytest
4+
import requests
5+
6+
from pulp_python.tests.functional.constants import PYTHON_SM_PROJECT_SPECIFIER
7+
8+
API_VERSION = "1.0"
9+
PYPI_SERIAL_CONSTANT = 1000000000
10+
11+
PYPI_TEXT_HTML = "text/html"
12+
PYPI_SIMPLE_V1_HTML = "application/vnd.pypi.simple.v1+html"
13+
PYPI_SIMPLE_V1_JSON = "application/vnd.pypi.simple.v1+json"
14+
15+
16+
@pytest.mark.parallel
17+
def test_simple_json_index_api(
18+
python_remote_factory, python_repo_with_sync, python_distribution_factory
19+
):
20+
remote = python_remote_factory(includes=PYTHON_SM_PROJECT_SPECIFIER)
21+
repo = python_repo_with_sync(remote)
22+
distro = python_distribution_factory(repository=repo)
23+
24+
url = urljoin(distro.base_url, "simple/")
25+
headers = {"Accept": PYPI_SIMPLE_V1_JSON}
26+
27+
response = requests.get(url, headers=headers)
28+
assert response.headers["Content-Type"] == PYPI_SIMPLE_V1_JSON
29+
assert response.headers["X-PyPI-Last-Serial"] == str(PYPI_SERIAL_CONSTANT)
30+
31+
data = response.json()
32+
assert data["meta"] == {"api-version": API_VERSION, "_last-serial": PYPI_SERIAL_CONSTANT}
33+
assert data["projects"]
34+
for project in data["projects"]:
35+
for i in ["_last-serial", "name"]:
36+
assert i in project
37+
38+
39+
@pytest.mark.parallel
40+
def test_simple_json_detail_api(
41+
python_remote_factory, python_repo_with_sync, python_distribution_factory
42+
):
43+
remote = python_remote_factory(includes=PYTHON_SM_PROJECT_SPECIFIER)
44+
repo = python_repo_with_sync(remote)
45+
distro = python_distribution_factory(repository=repo)
46+
47+
url = f'{urljoin(distro.base_url, "simple/")}aiohttp'
48+
headers = {"Accept": PYPI_SIMPLE_V1_JSON}
49+
50+
response = requests.get(url, headers=headers)
51+
assert response.headers["Content-Type"] == PYPI_SIMPLE_V1_JSON
52+
assert response.headers["X-PyPI-Last-Serial"] == str(PYPI_SERIAL_CONSTANT)
53+
54+
data = response.json()
55+
assert data["meta"] == {"api-version": API_VERSION, "_last-serial": PYPI_SERIAL_CONSTANT}
56+
assert data["name"] == "aiohttp"
57+
assert data["files"]
58+
for file in data["files"]:
59+
for i in [
60+
"filename",
61+
"url",
62+
"hashes",
63+
"data-dist-info-metadata",
64+
"requires_python",
65+
]:
66+
assert i in file
67+
68+
69+
@pytest.mark.parallel
70+
@pytest.mark.parametrize(
71+
"header, result",
72+
[
73+
(PYPI_TEXT_HTML, PYPI_TEXT_HTML),
74+
(PYPI_SIMPLE_V1_HTML, PYPI_SIMPLE_V1_HTML),
75+
(PYPI_SIMPLE_V1_JSON, PYPI_SIMPLE_V1_JSON),
76+
# Follows defined ordering (html, pypi html, pypi json)
77+
(f"{PYPI_SIMPLE_V1_JSON}, {PYPI_SIMPLE_V1_HTML}", PYPI_SIMPLE_V1_HTML),
78+
# Everything else should be html
79+
("", PYPI_TEXT_HTML),
80+
("application/json", PYPI_TEXT_HTML),
81+
("sth/else", PYPI_TEXT_HTML),
82+
],
83+
)
84+
def test_simple_api_content_headers(
85+
python_remote_factory, python_repo_with_sync, python_distribution_factory, header, result
86+
):
87+
remote = python_remote_factory(includes=PYTHON_SM_PROJECT_SPECIFIER)
88+
repo = python_repo_with_sync(remote)
89+
distro = python_distribution_factory(repository=repo)
90+
91+
index_url = urljoin(distro.base_url, "simple/")
92+
detail_url = f"{index_url}aiohttp"
93+
94+
for url in [index_url, detail_url]:
95+
response = requests.get(url, headers={"Accept": header})
96+
assert response.status_code == 200
97+
assert result in response.headers["Content-Type"]

0 commit comments

Comments
 (0)