Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions component_catalog/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
from django.db import transaction
from django.forms.widgets import HiddenInput
from django.http import FileResponse
from django.http.response import StreamingHttpResponse

import django_filters
import requests
from packageurl.contrib import url2purl
from packageurl.contrib.django.filters import PackageURLFilter
from rest_framework import serializers
Expand Down Expand Up @@ -879,6 +881,11 @@ class ScanDataUnavailable(APIException):
default_detail = "Scan data is not available"


class ScanFetchError(APIException):
status_code = status.HTTP_400_BAD_REQUEST
default_detail = "Could not fetch scan data"


class PackageViewSet(
SendAboutFilesMixin,
AboutCodeFilesActionMixin,
Expand Down Expand Up @@ -956,17 +963,29 @@ def scan_info(self, request, uuid):

@action(detail=True, name="Scan results")
def scan_results(self, request, uuid):
"""Return the scan results from ScanCode.io."""
"""
Stream scan results directly from ScanCode.io back to the client.

The response body is not loaded in memory but proxied chunk by chunk,
making it suitable for large scan result payloads.
"""
package = self.get_object()
dataspace = request.user.dataspace
scancodeio = ScanCodeIO(dataspace)
project_info = self._get_scancodeio_project_info(scancodeio, package)

project_uuid = project_info.get("uuid")
scan_results_url = scancodeio.get_scan_action_url(project_uuid, "results")
scan_results = scancodeio.fetch_scan_data(scan_results_url)

return Response(scan_results)
try:
scan_response = scancodeio.stream_scan_data(scan_results_url)
except requests.RequestException:
raise ScanFetchError()

return StreamingHttpResponse(
scan_response.iter_content(chunk_size=8192),
content_type=scan_response.headers.get("Content-Type", "application/json"),
)

@action(detail=True, name="Scan summary")
def scan_summary(self, request, uuid):
Expand Down
22 changes: 18 additions & 4 deletions component_catalog/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from django.test import override_settings
from django.urls import reverse

import requests
from rest_framework import status
from rest_framework.exceptions import ErrorDetail
from rest_framework.test import APIClient
Expand Down Expand Up @@ -1523,19 +1524,32 @@ def test_api_package_viewset_scan_info_action(self, mock_is_available, mock_get_
self.assertEqual(project_info, response.data)

@mock.patch("dejacode_toolkit.scancodeio.ScanCodeIO.get_project_info")
@mock.patch("dejacode_toolkit.scancodeio.ScanCodeIO.fetch_scan_data")
@mock.patch("dejacode_toolkit.scancodeio.ScanCodeIO.stream_scan_data")
@mock.patch("dejacode_toolkit.scancodeio.ScanCodeIO.is_available")
def test_api_package_viewset_scan_results_action(
self, mock_is_available, mock_fetch_scan_data, mock_get_project_info
self, mock_is_available, mock_stream_scan_data, mock_get_project_info
):
self.client.login(username=self.base_user.username, password="secret")
action_url = reverse("api_v2:package-scan-results", args=[self.package1.uuid])
mock_is_available.return_value = True
mock_get_project_info.return_value = {"uuid": "abcdef"}
mock_fetch_scan_data.return_value = {"results": ""}

mock_stream_scan_data.side_effect = requests.RequestException
response = self.client.get(action_url)
self.assertEqual(400, response.status_code)
error = {"detail": ErrorDetail(string="Could not fetch scan data", code="error")}
self.assertEqual(error, response.data)

mock_response = mock.Mock()
mock_response.iter_content.return_value = iter([b'{"results": ""}'])
mock_response.headers = {"Content-Type": "application/json"}
mock_stream_scan_data.side_effect = None
mock_stream_scan_data.return_value = mock_response

response = self.client.get(action_url)
self.assertEqual(200, response.status_code)
self.assertEqual({"results": ""}, response.data)
self.assertEqual(b'{"results": ""}', b"".join(response.streaming_content))
self.assertEqual("application/json", response.headers["Content-Type"])

@mock.patch("dejacode_toolkit.scancodeio.ScanCodeIO.get_project_info")
@mock.patch("dejacode_toolkit.scancodeio.ScanCodeIO.fetch_scan_data")
Expand Down
3 changes: 3 additions & 0 deletions dejacode/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,9 @@ def get_fake_redis_connection(config, use_strict_redis):
# during the Django 6.x release cycle.
URLIZE_ASSUME_HTTPS = env.bool("DEJACODE_URLIZE_ASSUME_HTTPS", default=True)

# Default to 5 seconds.
DEJACODE_INTEGRATION_REQUESTS_TIMEOUT = env.int("DEJACODE_INTEGRATION_REQUESTS_TIMEOUT", default=5)

if IS_TESTS:
# Silent the django-axes logging during tests
LOGGING["loggers"].update({"axes": {"handlers": ["null"]}})
Expand Down
7 changes: 5 additions & 2 deletions dejacode_toolkit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,13 @@ def get_settings(var_name, default=None):
return getenv(var_name) or getattr(settings, var_name, default)


REQUESTS_TIMEOUT = get_settings("DEJACODE_INTEGRATION_REQUESTS_TIMEOUT", default=5)


def is_service_available(label, session, url, raise_exceptions):
"""Check if a configured integration service is available."""
try:
response = session.head(url, timeout=5)
response = session.head(url, timeout=REQUESTS_TIMEOUT)
response.raise_for_status()
except requests.exceptions.RequestException as request_exception:
logger.debug(f"{label} is_available() error: {request_exception}")
Expand All @@ -40,7 +43,7 @@ class BaseService:
settings_prefix = None
url_field_name = None
api_key_field_name = None
default_timeout = 5
default_timeout = REQUESTS_TIMEOUT

def __init__(self, dataspace):
if not dataspace:
Expand Down
10 changes: 9 additions & 1 deletion dejacode_toolkit/scancodeio.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,16 @@ def fetch_scan_data(self, data_url):
return self.request_get(url=data_url)

def stream_scan_data(self, data_url):
"""
Stream scan data from the given URL.

With stream=True, only headers are fetched initially, so raise_for_status()
can fail fast on errors before any body content is downloaded.
"""
logger.debug(f"{self.label}: stream scan data data_url={data_url}")
return self.session.get(url=data_url, stream=True)
response = self.session.get(url=data_url, stream=True)
response.raise_for_status()
return response

def delete_scan(self, detail_url):
logger.debug(f"{self.label}: delete scan detail_url={detail_url}")
Expand Down
2 changes: 1 addition & 1 deletion dejacode_toolkit/vulnerablecode.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def get_vulnerable_cpes(self, components):
if not cpes:
return []

search_results = self.bulk_search_by_cpes(cpes, timeout=5)
search_results = self.bulk_search_by_cpes(cpes)
if not search_results:
return []

Expand Down