From 26cdeff88397940c014eeae2b5b9d9ad594848b5 Mon Sep 17 00:00:00 2001 From: Gerrod Ubben Date: Fri, 7 Nov 2025 16:29:10 -0500 Subject: [PATCH] Fix pull-through failing to check repository when package was not in remote fixes: #1004 --- CHANGES/1004.bugfix | 1 + pulp_python/app/pypi/views.py | 77 ++++++++----------- .../tests/functional/api/test_full_mirror.py | 34 +++++++- 3 files changed, 62 insertions(+), 50 deletions(-) create mode 100644 CHANGES/1004.bugfix diff --git a/CHANGES/1004.bugfix b/CHANGES/1004.bugfix new file mode 100644 index 00000000..8d1132fe --- /dev/null +++ b/CHANGES/1004.bugfix @@ -0,0 +1 @@ +Fixed pull-through caching not checking the repository if package was not present on remote. diff --git a/pulp_python/app/pypi/views.py b/pulp_python/app/pypi/views.py index c0b636bb..dc4660cd 100644 --- a/pulp_python/app/pypi/views.py +++ b/pulp_python/app/pypi/views.py @@ -15,6 +15,7 @@ from django.db.utils import DatabaseError from django.http.response import ( Http404, + HttpResponseNotFound, HttpResponseForbidden, HttpResponseBadRequest, StreamingHttpResponse, @@ -287,7 +288,7 @@ def list(self, request, path): kwargs = {"content_type": media_type, "headers": headers} return StreamingHttpResponse(index_data, **kwargs) - def pull_through_package_simple(self, package, path, remote, media_type): + def pull_through_package_simple(self, package, path, remote): """Gets the package's simple page from remote.""" def parse_package(release_package): @@ -305,7 +306,7 @@ def parse_package(release_package): rfilter = get_remote_package_filter(remote) if not rfilter.filter_project(package): - raise Http404(f"{package} does not exist.") + return {} url = remote.get_remote_artifact_url(f"simple/{package}/") remote.headers = remote.headers or [] @@ -313,27 +314,19 @@ def parse_package(release_package): downloader = remote.get_downloader(url=url, max_retries=1) try: d = downloader.fetch() - except ClientError: - return HttpResponse(f"Failed to fetch {package} from {remote.url}.", status=502) - except TimeoutException: - return HttpResponse(f"{remote.url} timed out while fetching {package}.", status=504) + except (ClientError, TimeoutException): + log.info(f"Failed to fetch {package} simple page from {remote.url}") + return {} if d.headers["content-type"] == PYPI_SIMPLE_V1_JSON: page = ProjectPage.from_json_data(json.load(open(d.path, "rb")), base_url=url) else: page = ProjectPage.from_html(package, open(d.path, "rb").read(), base_url=url) - packages = [ - parse_package(p) for p in page.packages if rfilter.filter_release(package, p.version) - ] - headers = {"X-PyPI-Last-Serial": str(PYPI_SERIAL_CONSTANT)} - - if media_type == PYPI_SIMPLE_V1_JSON: - detail_data = write_simple_detail_json(package, packages) - return Response(detail_data, headers=headers) - else: - detail_data = write_simple_detail(package, packages) - kwargs = {"content_type": media_type, "headers": headers} - return HttpResponse(detail_data, **kwargs) + return { + p.filename: parse_package(p) + for p in page.packages + if rfilter.filter_release(package, p.version) + } @extend_schema(operation_id="pypi_simple_package_read", summary="Get package simple page") def retrieve(self, request, path, package): @@ -343,44 +336,36 @@ def retrieve(self, request, path, package): repo_ver, content = self.get_rvc() # Should I redirect if the normalized name is different? normalized = canonicalize_name(package) + releases = {} if self.distribution.remote: - return self.pull_through_package_simple( - normalized, path, self.distribution.remote, media_type - ) - if self.should_redirect(repo_version=repo_ver): + releases = self.pull_through_package_simple(normalized, path, self.distribution.remote) + elif self.should_redirect(repo_version=repo_ver): return redirect(urljoin(self.base_content_url, f"{path}/simple/{normalized}/")) - packages = ( - content.filter(name__normalize=normalized) - .values_list("filename", "sha256", "name", "metadata_sha256", "requires_python") - .iterator() - ) - try: - present = next(packages) - except StopIteration: - raise Http404(f"{normalized} does not exist.") - else: - packages = chain([present], packages) - name = present[2] - releases = ( - { - "filename": filename, - "url": urljoin(self.base_content_url, f"{path}/{filename}"), - "sha256": sha256, - "metadata_sha256": metadata_sha256, - "requires_python": requires_python, + if content: + packages = content.filter(name__normalize=normalized).values( + "filename", "sha256", "metadata_sha256", "requires_python" + ) + local_releases = { + p["filename"]: { + **p, + "url": urljoin(self.base_content_url, f"{path}/{p['filename']}"), + } + for p in packages } - for filename, sha256, _, metadata_sha256, requires_python in packages - ) + releases.update(local_releases) + if not releases: + return HttpResponseNotFound(f"{normalized} does not exist.") + media_type = request.accepted_renderer.media_type headers = {"X-PyPI-Last-Serial": str(PYPI_SERIAL_CONSTANT)} if media_type == PYPI_SIMPLE_V1_JSON: - detail_data = write_simple_detail_json(name, releases) + detail_data = write_simple_detail_json(normalized, releases.values()) return Response(detail_data, headers=headers) else: - detail_data = write_simple_detail(name, releases, streamed=True) + detail_data = write_simple_detail(normalized, releases.values()) kwargs = {"content_type": media_type, "headers": headers} - return StreamingHttpResponse(detail_data, **kwargs) + return HttpResponse(detail_data, **kwargs) @extend_schema( request=PackageUploadSerializer, diff --git a/pulp_python/tests/functional/api/test_full_mirror.py b/pulp_python/tests/functional/api/test_full_mirror.py index f03ae475..f9850ba4 100644 --- a/pulp_python/tests/functional/api/test_full_mirror.py +++ b/pulp_python/tests/functional/api/test_full_mirror.py @@ -84,7 +84,7 @@ def test_pull_through_filter(python_remote_factory, python_distribution_factory) r = requests.get(f"{distro.base_url}simple/pulpcore/") assert r.status_code == 404 - assert r.text == "404 Not Found" + assert r.text == "pulpcore does not exist." r = requests.get(f"{distro.base_url}simple/shelf-reader/") assert r.status_code == 200 @@ -104,11 +104,11 @@ def test_pull_through_filter(python_remote_factory, python_distribution_factory) r = requests.get(f"{distro.base_url}simple/django/") assert r.status_code == 404 - assert r.text == "404 Not Found" + assert r.text == "django does not exist." r = requests.get(f"{distro.base_url}simple/pulpcore/") - assert r.status_code == 502 - assert r.text == f"Failed to fetch pulpcore from {remote.url}." + assert r.status_code == 404 + assert r.text == "pulpcore does not exist." r = requests.get(f"{distro.base_url}simple/shelf-reader/") assert r.status_code == 200 @@ -156,3 +156,29 @@ def test_pull_through_with_repo( assert r.status_code == 200 tasks = pulpcore_bindings.TasksApi.list(reserved_resources=repo.prn) assert tasks.count == 3 + + +@pytest.mark.parallel +def test_pull_through_local_only( + python_remote_factory, python_distribution_factory, python_repo_with_sync +): + """Tests that pull-through checks the repository if the package is not present on the remote.""" + remote = python_remote_factory(url=PYPI_URL, includes=["pulpcore"]) + repo = python_repo_with_sync(remote=remote) + remote2 = python_remote_factory(includes=[]) # Fixtures does not have pulpcore + distro = python_distribution_factory(repository=repo.pulp_href, remote=remote2.pulp_href) + + url = f"{distro.base_url}simple/pulpcore/" + r = requests.get(url) + assert r.status_code == 200 + assert "?redirect=" not in r.text + + url = f"{distro.base_url}simple/shelf-reader/" + r = requests.get(url) + assert r.status_code == 200 + assert "?redirect=" in r.text + + url = f"{distro.base_url}simple/pulp_python/" + r = requests.get(url) + assert r.status_code == 404 + assert r.text == "pulp-python does not exist."