Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/1004.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed pull-through caching not checking the repository if package was not present on remote.
77 changes: 31 additions & 46 deletions pulp_python/app/pypi/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from django.db.utils import DatabaseError
from django.http.response import (
Http404,
HttpResponseNotFound,
HttpResponseForbidden,
HttpResponseBadRequest,
StreamingHttpResponse,
Expand Down Expand Up @@ -287,7 +288,7 @@ def list(self, request, path):
kwargs = {"content_type": media_type, "headers": headers}
return StreamingHttpResponse(index_data, **kwargs)

def pull_through_package_simple(self, package, path, remote, media_type):
def pull_through_package_simple(self, package, path, remote):
"""Gets the package's simple page from remote."""

def parse_package(release_package):
Expand All @@ -305,35 +306,27 @@ def parse_package(release_package):

rfilter = get_remote_package_filter(remote)
if not rfilter.filter_project(package):
raise Http404(f"{package} does not exist.")
return {}

url = remote.get_remote_artifact_url(f"simple/{package}/")
remote.headers = remote.headers or []
remote.headers.append({"Accept": ACCEPT_JSON_PREFERRED})
downloader = remote.get_downloader(url=url, max_retries=1)
try:
d = downloader.fetch()
except ClientError:
return HttpResponse(f"Failed to fetch {package} from {remote.url}.", status=502)
except TimeoutException:
return HttpResponse(f"{remote.url} timed out while fetching {package}.", status=504)
except (ClientError, TimeoutException):
log.info(f"Failed to fetch {package} simple page from {remote.url}")
return {}

if d.headers["content-type"] == PYPI_SIMPLE_V1_JSON:
page = ProjectPage.from_json_data(json.load(open(d.path, "rb")), base_url=url)
else:
page = ProjectPage.from_html(package, open(d.path, "rb").read(), base_url=url)
packages = [
parse_package(p) for p in page.packages if rfilter.filter_release(package, p.version)
]
headers = {"X-PyPI-Last-Serial": str(PYPI_SERIAL_CONSTANT)}

if media_type == PYPI_SIMPLE_V1_JSON:
detail_data = write_simple_detail_json(package, packages)
return Response(detail_data, headers=headers)
else:
detail_data = write_simple_detail(package, packages)
kwargs = {"content_type": media_type, "headers": headers}
return HttpResponse(detail_data, **kwargs)
return {
p.filename: parse_package(p)
for p in page.packages
if rfilter.filter_release(package, p.version)
}

@extend_schema(operation_id="pypi_simple_package_read", summary="Get package simple page")
def retrieve(self, request, path, package):
Expand All @@ -343,44 +336,36 @@ def retrieve(self, request, path, package):
repo_ver, content = self.get_rvc()
# Should I redirect if the normalized name is different?
normalized = canonicalize_name(package)
releases = {}
if self.distribution.remote:
return self.pull_through_package_simple(
normalized, path, self.distribution.remote, media_type
)
if self.should_redirect(repo_version=repo_ver):
releases = self.pull_through_package_simple(normalized, path, self.distribution.remote)
elif self.should_redirect(repo_version=repo_ver):
return redirect(urljoin(self.base_content_url, f"{path}/simple/{normalized}/"))
packages = (
content.filter(name__normalize=normalized)
.values_list("filename", "sha256", "name", "metadata_sha256", "requires_python")
.iterator()
)
try:
present = next(packages)
except StopIteration:
raise Http404(f"{normalized} does not exist.")
else:
packages = chain([present], packages)
name = present[2]
releases = (
{
"filename": filename,
"url": urljoin(self.base_content_url, f"{path}/{filename}"),
"sha256": sha256,
"metadata_sha256": metadata_sha256,
"requires_python": requires_python,
if content:
packages = content.filter(name__normalize=normalized).values(
"filename", "sha256", "metadata_sha256", "requires_python"
)
local_releases = {
p["filename"]: {
**p,
"url": urljoin(self.base_content_url, f"{path}/{p['filename']}"),
}
for p in packages
}
for filename, sha256, _, metadata_sha256, requires_python in packages
)
releases.update(local_releases)
if not releases:
return HttpResponseNotFound(f"{normalized} does not exist.")

media_type = request.accepted_renderer.media_type
headers = {"X-PyPI-Last-Serial": str(PYPI_SERIAL_CONSTANT)}

if media_type == PYPI_SIMPLE_V1_JSON:
detail_data = write_simple_detail_json(name, releases)
detail_data = write_simple_detail_json(normalized, releases.values())
return Response(detail_data, headers=headers)
else:
detail_data = write_simple_detail(name, releases, streamed=True)
detail_data = write_simple_detail(normalized, releases.values())
kwargs = {"content_type": media_type, "headers": headers}
return StreamingHttpResponse(detail_data, **kwargs)
return HttpResponse(detail_data, **kwargs)

@extend_schema(
request=PackageUploadSerializer,
Expand Down
34 changes: 30 additions & 4 deletions pulp_python/tests/functional/api/test_full_mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def test_pull_through_filter(python_remote_factory, python_distribution_factory)

r = requests.get(f"{distro.base_url}simple/pulpcore/")
assert r.status_code == 404
assert r.text == "404 Not Found"
assert r.text == "pulpcore does not exist."

r = requests.get(f"{distro.base_url}simple/shelf-reader/")
assert r.status_code == 200
Expand All @@ -104,11 +104,11 @@ def test_pull_through_filter(python_remote_factory, python_distribution_factory)

r = requests.get(f"{distro.base_url}simple/django/")
assert r.status_code == 404
assert r.text == "404 Not Found"
assert r.text == "django does not exist."

r = requests.get(f"{distro.base_url}simple/pulpcore/")
assert r.status_code == 502
assert r.text == f"Failed to fetch pulpcore from {remote.url}."
assert r.status_code == 404
assert r.text == "pulpcore does not exist."

r = requests.get(f"{distro.base_url}simple/shelf-reader/")
assert r.status_code == 200
Expand Down Expand Up @@ -156,3 +156,29 @@ def test_pull_through_with_repo(
assert r.status_code == 200
tasks = pulpcore_bindings.TasksApi.list(reserved_resources=repo.prn)
assert tasks.count == 3


@pytest.mark.parallel
def test_pull_through_local_only(
python_remote_factory, python_distribution_factory, python_repo_with_sync
):
"""Tests that pull-through checks the repository if the package is not present on the remote."""
remote = python_remote_factory(url=PYPI_URL, includes=["pulpcore"])
repo = python_repo_with_sync(remote=remote)
remote2 = python_remote_factory(includes=[]) # Fixtures does not have pulpcore
distro = python_distribution_factory(repository=repo.pulp_href, remote=remote2.pulp_href)

url = f"{distro.base_url}simple/pulpcore/"
r = requests.get(url)
assert r.status_code == 200
assert "?redirect=" not in r.text

url = f"{distro.base_url}simple/shelf-reader/"
r = requests.get(url)
assert r.status_code == 200
assert "?redirect=" in r.text

url = f"{distro.base_url}simple/pulp_python/"
r = requests.get(url)
assert r.status_code == 404
assert r.text == "pulp-python does not exist."