|
| 1 | +import json |
1 | 2 | import logging |
2 | | -import requests |
3 | 3 |
|
| 4 | +from aiohttp.client_exceptions import ClientError |
4 | 5 | from rest_framework.viewsets import ViewSet |
5 | 6 | from rest_framework.response import Response |
6 | 7 | from django.core.exceptions import ObjectDoesNotExist |
|
15 | 16 | Http404, |
16 | 17 | HttpResponseForbidden, |
17 | 18 | HttpResponseBadRequest, |
18 | | - StreamingHttpResponse |
| 19 | + StreamingHttpResponse, |
| 20 | + HttpResponse, |
19 | 21 | ) |
20 | 22 | from drf_spectacular.utils import extend_schema |
21 | 23 | from dynaconf import settings |
22 | 24 | from itertools import chain |
23 | 25 | from packaging.utils import canonicalize_name |
24 | 26 | from urllib.parse import urljoin, urlparse, urlunsplit |
25 | 27 | from pathlib import PurePath |
26 | | -from pypi_simple import parse_links_stream_response |
| 28 | +from pypi_simple import ACCEPT_JSON_PREFERRED, ProjectPage |
27 | 29 |
|
28 | 30 | from pulpcore.plugin.viewsets import OperationPostponedResponse |
29 | 31 | from pulpcore.plugin.tasking import dispatch |
30 | 32 | from pulpcore.plugin.util import get_domain |
| 33 | +from pulpcore.plugin.exceptions import TimeoutException |
31 | 34 | from pulp_python.app.models import ( |
32 | 35 | PythonDistribution, |
33 | 36 | PythonPackageContent, |
|
37 | 40 | SummarySerializer, |
38 | 41 | PackageMetadataSerializer, |
39 | 42 | PackageUploadSerializer, |
40 | | - PackageUploadTaskSerializer |
| 43 | + PackageUploadTaskSerializer, |
41 | 44 | ) |
42 | 45 | from pulp_python.app.utils import ( |
43 | 46 | write_simple_index, |
44 | 47 | write_simple_detail, |
45 | 48 | python_content_to_json, |
46 | 49 | PYPI_LAST_SERIAL, |
47 | 50 | PYPI_SERIAL_CONSTANT, |
| 51 | + get_remote_package_filter, |
48 | 52 | ) |
49 | 53 |
|
50 | 54 | from pulp_python.app import tasks |
@@ -233,27 +237,36 @@ def list(self, request, path): |
233 | 237 |
|
234 | 238 | def pull_through_package_simple(self, package, path, remote): |
235 | 239 | """Gets the package's simple page from remote.""" |
236 | | - def parse_url(link): |
237 | | - parsed = urlparse(link.url) |
238 | | - digest, _, value = parsed.fragment.partition('=') |
| 240 | + def parse_package(release_package): |
| 241 | + parsed = urlparse(release_package.url) |
239 | 242 | stripped_url = urlunsplit(chain(parsed[:3], ("", ""))) |
240 | | - redirect = f'{path}/{link.text}?redirect={stripped_url}' |
241 | | - d_url = urljoin(self.base_content_url, redirect) |
242 | | - return link.text, d_url, value if digest == 'sha256' else '' |
| 243 | + redirect_path = f'{path}/{release_package.filename}?redirect={stripped_url}' |
| 244 | + d_url = urljoin(self.base_content_url, redirect_path) |
| 245 | + return release_package.filename, d_url, release_package.digests.get("sha256", "") |
| 246 | + |
| 247 | + rfilter = get_remote_package_filter(remote) |
| 248 | + if not rfilter.filter_project(package): |
| 249 | + raise Http404(f"{package} does not exist.") |
243 | 250 |
|
244 | 251 | url = remote.get_remote_artifact_url(f'simple/{package}/') |
245 | | - kwargs = {} |
246 | | - if proxy_url := remote.proxy_url: |
247 | | - if remote.proxy_username or remote.proxy_password: |
248 | | - parsed_proxy = urlparse(proxy_url) |
249 | | - netloc = f"{remote.proxy_username}:{remote.proxy_password}@{parsed_proxy.netloc}" |
250 | | - proxy_url = urlunsplit((parsed_proxy.scheme, netloc, "", "", "")) |
251 | | - kwargs["proxies"] = {"http": proxy_url, "https": proxy_url} |
252 | | - |
253 | | - response = requests.get(url, stream=True, **kwargs) |
254 | | - links = parse_links_stream_response(response) |
255 | | - packages = (parse_url(link) for link in links) |
256 | | - return StreamingHttpResponse(write_simple_detail(package, packages, streamed=True)) |
| 252 | + remote.headers = remote.headers or [] |
| 253 | + remote.headers.append({"Accept": ACCEPT_JSON_PREFERRED}) |
| 254 | + downloader = remote.get_downloader(url=url, max_retries=1) |
| 255 | + try: |
| 256 | + d = downloader.fetch() |
| 257 | + except ClientError: |
| 258 | + raise HttpResponse(f"Failed to fetch {package} from {remote.url}.", status=502) |
| 259 | + except TimeoutException: |
| 260 | + raise HttpResponse(f"{remote.url} timed out while fetching {package}.", status=504) |
| 261 | + |
| 262 | + if d.headers["content-type"] == "application/vnd.pypi.simple.v1+json": |
| 263 | + page = ProjectPage.from_json_data(json.load(open(d.path, "rb")), base_url=remote.url) |
| 264 | + else: |
| 265 | + page = ProjectPage.from_html(package, open(d.path, "rb").read(), base_url=remote.url) |
| 266 | + packages = [ |
| 267 | + parse_package(p) for p in page.packages if rfilter.filter_release(package, p.version) |
| 268 | + ] |
| 269 | + return HttpResponse(write_simple_detail(package, packages)) |
257 | 270 |
|
258 | 271 | @extend_schema(operation_id="pypi_simple_package_read", summary="Get package simple page") |
259 | 272 | def retrieve(self, request, path, package): |
|
0 commit comments