-
-
Notifications
You must be signed in to change notification settings - Fork 251
Add V2_importer to collect advisories from EUVD #2046
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 3 commits
19d4d45
a3a5c36
8b21b2f
4fb6ec5
506d03f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,244 @@ | ||
| # | ||
| # Copyright (c) nexB Inc. and others. All rights reserved. | ||
| # VulnerableCode is a trademark of nexB Inc. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. | ||
| # See https://github.com/aboutcode-org/vulnerablecode for support or download. | ||
| # See https://aboutcode.org for more information about nexB OSS projects. | ||
| # | ||
|
|
||
| import json | ||
| import logging | ||
| import math | ||
| import time | ||
| from datetime import datetime | ||
| from http import HTTPStatus | ||
| from typing import Iterable | ||
|
|
||
| import requests | ||
| from dateutil import parser as dateparser | ||
|
|
||
| from vulnerabilities.importer import AdvisoryData | ||
| from vulnerabilities.importer import ReferenceV2 | ||
| from vulnerabilities.importer import VulnerabilitySeverity | ||
| from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 | ||
| from vulnerabilities.severity_systems import SCORING_SYSTEMS | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class EUVDImporterPipeline(VulnerableCodeBaseImporterPipelineV2): | ||
| """ | ||
| EUVD (EU Vulnerability Database) Importer Pipeline | ||
|
|
||
| This pipeline imports security advisories from the European Union Vulnerability Database (EUVD). | ||
| """ | ||
|
|
||
| pipeline_id = "euvd_importer_v2" | ||
| spdx_license_expression = "LicenseRef-scancode-other-permissive" | ||
| license_url = "https://www.enisa.europa.eu/about-enisa/legal-notice/" | ||
| url = "https://euvdservices.enisa.europa.eu/api/search" | ||
|
|
||
| def __init__(self): | ||
| super().__init__() | ||
| self._cached_data = None | ||
|
|
||
| @classmethod | ||
| def steps(cls): | ||
| return (cls.collect_and_store_advisories,) | ||
|
|
||
| def fetch_data(self): | ||
| if self._cached_data is not None: | ||
| logger.info(f"Using cached data: {len(self._cached_data)} items") | ||
| return self._cached_data | ||
|
Comment on lines
+51
to
+53
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we have _cached_data? It is because the API returns repeated data
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| all_items = [] | ||
| size = 100 | ||
| max_retries = 2 | ||
|
|
||
| logger.info(f"Fetching data from EUVD API: {self.url}") | ||
|
|
||
| total_count = self._fetch_total_count(size, max_retries) | ||
| if total_count is None: | ||
| logger.error("Failed to fetch total count from API") | ||
| return all_items | ||
|
|
||
| total_pages = math.ceil(total_count / size) | ||
| logger.info(f"Total advisories: {total_count}, Total pages: {total_pages}") | ||
|
|
||
| first_page_data = self._fetch_page(0, size, max_retries) | ||
| if first_page_data: | ||
| all_items.extend(first_page_data) | ||
| logger.info(f"Fetched page 0: {len(first_page_data)} items (total: {len(all_items)})") | ||
|
|
||
| for page in range(1, total_pages): | ||
| page_data = self._fetch_page(page, size, max_retries) | ||
| if page_data is None: | ||
| logger.warning(f"Skipping page {page} after failed retries") | ||
| continue | ||
|
|
||
| if not page_data: | ||
| logger.info(f"No items in response for page {page}; stopping fetch.") | ||
| break | ||
|
|
||
| all_items.extend(page_data) | ||
| logger.info(f"Fetched page {page}: {len(page_data)} items (total: {len(all_items)})") | ||
|
|
||
| logger.info(f"Fetch completed successfully. Total items collected: {len(all_items)}") | ||
|
|
||
| self._cached_data = all_items | ||
| logger.info(f"Cached {len(all_items)} items for reuse") | ||
|
|
||
| return all_items | ||
|
|
||
| def _make_request_with_retry(self, params, max_retries, context): | ||
| headers = {"User-Agent": "VulnerableCode"} | ||
|
|
||
| for attempt in range(max_retries): | ||
| try: | ||
| response = requests.get(self.url, headers=headers, params=params, timeout=30) | ||
|
|
||
| if response.status_code != HTTPStatus.OK: | ||
| logger.error(f"API returned status {response.status_code} for {context}") | ||
| if attempt < max_retries - 1: | ||
| logger.info(f"Retrying {context} (attempt {attempt + 1}/{max_retries})") | ||
| time.sleep(3) | ||
| continue | ||
| return None | ||
|
|
||
| return response.json() | ||
|
|
||
| except requests.exceptions.Timeout: | ||
| logger.warning(f"Timeout on {context} (attempt {attempt + 1}/{max_retries})") | ||
| if attempt < max_retries - 1: | ||
| time.sleep(3) | ||
| continue | ||
| return None | ||
|
|
||
| except requests.exceptions.RequestException as e: | ||
| logger.error( | ||
| f"Network error on {context}: {e} (attempt {attempt + 1}/{max_retries})" | ||
| ) | ||
| if attempt < max_retries - 1: | ||
| time.sleep(3) | ||
| continue | ||
| return None | ||
|
|
||
| except (ValueError, KeyError) as e: | ||
| logger.error(f"Error parsing response for {context}: {e}") | ||
| return None | ||
|
|
||
| return None | ||
|
|
||
| def _fetch_total_count(self, size, max_retries): | ||
| """Fetch the total count of advisories from the API.""" | ||
| params = {"size": size, "page": 0} | ||
| data = self._make_request_with_retry(params, max_retries, "total count") | ||
|
|
||
| if data is None: | ||
| return None | ||
|
|
||
| total = data.get("total") | ||
| if total is None: | ||
| logger.error("No 'total' field in API response") | ||
|
|
||
| return total | ||
|
|
||
| def _fetch_page(self, page, size, max_retries): | ||
| """Fetch a single page of advisories from the API.""" | ||
| params = {"size": size, "page": page} | ||
| data = self._make_request_with_retry(params, max_retries, f"page {page}") | ||
|
|
||
| if data is None: | ||
| return None | ||
|
|
||
| return data.get("items", []) | ||
|
|
||
| def advisories_count(self) -> int: | ||
| return len(self.fetch_data()) | ||
|
|
||
| def collect_advisories(self) -> Iterable[AdvisoryData]: | ||
| for raw_data in self.fetch_data(): | ||
| try: | ||
| advisory = self.parse_advisory(raw_data) | ||
| if advisory: | ||
| yield advisory | ||
| except (ValueError, KeyError, TypeError) as e: | ||
| logger.error(f"Failed to parse advisory: {e}") | ||
| logger.debug(f"Raw data: {raw_data}") | ||
| continue | ||
|
|
||
| def parse_advisory(self, raw_data: dict) -> AdvisoryData: | ||
| advisory_id = raw_data.get("id", "") | ||
|
|
||
| aliases = [advisory_id] if advisory_id else [] | ||
| aliases_str = raw_data.get("aliases", "") | ||
| if aliases_str: | ||
| cve_aliases = [alias.strip() for alias in aliases_str.split("\n") if alias.strip()] | ||
| aliases.extend(cve_aliases) | ||
|
|
||
| summary = raw_data.get("description", "") | ||
|
|
||
| date_published = None | ||
| date_str = raw_data.get("datePublished", "") | ||
| if date_str: | ||
| try: | ||
| date_published = dateparser.parse(date_str) | ||
| if date_published and date_published.tzinfo is None: | ||
| date_published = date_published.replace( | ||
| tzinfo=datetime.now().astimezone().tzinfo | ||
| ) | ||
| except (ValueError, TypeError) as e: | ||
| logger.warning(f"Failed to parse date '{date_str}': {e}") | ||
|
|
||
| references = [] | ||
| references_str = raw_data.get("references", "") | ||
| if references_str: | ||
| urls = [url.strip() for url in references_str.split("\n") if url.strip()] | ||
| for url in urls: | ||
| references.append(ReferenceV2(url=url)) | ||
|
|
||
| if advisory_id: | ||
| advisory_url = f"https://euvd.enisa.europa.eu/vulnerability/{advisory_id}" | ||
| references.append(ReferenceV2(url=advisory_url)) | ||
|
|
||
| severities = [] | ||
| base_score = raw_data.get("baseScore") | ||
| base_score_version = raw_data.get("baseScoreVersion") | ||
| base_score_vector = raw_data.get("baseScoreVector") | ||
|
|
||
| if base_score and base_score_version: | ||
| scoring_system = self.get_scoring_system(base_score_version) | ||
| if scoring_system: | ||
| severity = VulnerabilitySeverity( | ||
| system=scoring_system, | ||
| value=str(base_score), | ||
| scoring_elements=base_score_vector or "", | ||
| ) | ||
| severities.append(severity) | ||
|
|
||
| return AdvisoryData( | ||
| advisory_id=advisory_id, | ||
| aliases=aliases, | ||
| summary=summary, | ||
| references_v2=references, | ||
| affected_packages=[], | ||
| date_published=date_published, | ||
| url=advisory_url if advisory_id else "", | ||
| severities=severities, | ||
| original_advisory_text=json.dumps(raw_data, indent=2, ensure_ascii=False), | ||
| ) | ||
|
|
||
| @staticmethod | ||
| def get_scoring_system(version: str): | ||
| version_map = { | ||
| "4.0": "cvssv4", | ||
| "3.1": "cvssv3.1", | ||
| "3.0": "cvssv3", | ||
| "2.0": "cvssv2", | ||
| } | ||
| system_key = version_map.get(version) | ||
| if system_key: | ||
| return SCORING_SYSTEMS.get(system_key) | ||
| logger.warning(f"Unknown CVSS version: {version}") | ||
| return None | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| # | ||
| # Copyright (c) nexB Inc. and others. All rights reserved. | ||
| # VulnerableCode is a trademark of nexB Inc. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. | ||
| # See https://github.com/aboutcode-org/vulnerablecode for support or download. | ||
| # See https://aboutcode.org for more information about nexB OSS projects. | ||
| # | ||
|
|
||
| import json | ||
| from pathlib import Path | ||
| from unittest import TestCase | ||
| from unittest.mock import Mock | ||
| from unittest.mock import patch | ||
|
|
||
| from vulnerabilities.pipelines.v2_importers.euvd_importer import EUVDImporterPipeline | ||
| from vulnerabilities.tests import util_tests | ||
|
|
||
| TEST_DATA = Path(__file__).parent.parent.parent / "test_data" / "euvd" | ||
|
|
||
|
|
||
| class TestEUVDImporterPipeline(TestCase): | ||
| @patch("vulnerabilities.pipelines.v2_importers.euvd_importer.requests.get") | ||
| def test_collect_advisories(self, mock_get): | ||
| """Test collecting and parsing advisories from test data""" | ||
| sample1_path = TEST_DATA / "euvd_sample1.json" | ||
| sample2_path = TEST_DATA / "euvd_sample2.json" | ||
|
|
||
| sample1 = json.loads(sample1_path.read_text(encoding="utf-8")) | ||
| sample2 = json.loads(sample2_path.read_text(encoding="utf-8")) | ||
|
|
||
| mock_responses = [ | ||
| Mock(status_code=200, json=lambda: sample1), | ||
| Mock(status_code=200, json=lambda: sample1), | ||
| Mock(status_code=200, json=lambda: sample2), | ||
| ] | ||
| mock_get.side_effect = mock_responses | ||
|
|
||
| pipeline = EUVDImporterPipeline() | ||
| advisories = [data.to_dict() for data in list(pipeline.collect_advisories())] | ||
|
|
||
| expected_file = TEST_DATA / "euvd-expected.json" | ||
| util_tests.check_results_against_json(advisories, expected_file) | ||
|
|
||
| def test_get_scoring_system(self): | ||
| """Test CVSS version to scoring system mapping""" | ||
| pipeline = EUVDImporterPipeline() | ||
|
|
||
| system_v4 = pipeline.get_scoring_system("4.0") | ||
| assert system_v4 is not None | ||
| assert system_v4.identifier == "cvssv4" | ||
|
|
||
| system_v31 = pipeline.get_scoring_system("3.1") | ||
| assert system_v31 is not None | ||
| assert system_v31.identifier == "cvssv3.1" | ||
|
|
||
| system_v3 = pipeline.get_scoring_system("3.0") | ||
| assert system_v3 is not None | ||
| assert system_v3.identifier == "cvssv3" | ||
|
|
||
| system_v2 = pipeline.get_scoring_system("2.0") | ||
| assert system_v2 is not None | ||
| assert system_v2.identifier == "cvssv2" | ||
|
|
||
| system_unknown = pipeline.get_scoring_system("unknown") | ||
| assert system_unknown is None | ||
|
|
||
| @patch("vulnerabilities.pipelines.v2_importers.euvd_importer.requests.get") | ||
| def test_advisories_count(self, mock_get): | ||
| """Test counting advisories""" | ||
| sample_data = {"items": [{"id": "1"}, {"id": "2"}, {"id": "3"}], "total": 3} | ||
| mock_responses = [ | ||
| Mock(status_code=200, json=lambda: sample_data), | ||
| Mock(status_code=200, json=lambda: sample_data), | ||
| ] | ||
| mock_get.side_effect = mock_responses | ||
|
|
||
| pipeline = EUVDImporterPipeline() | ||
| count = pipeline.advisories_count() | ||
|
|
||
| assert count == 3 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is CC-by-4.0 as per:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks a lot Ayan