Skip to content

Commit d148c12

Browse files
committed
add tests for amazon-linux
Signed-off-by: ambuj <kulshreshthaak.12@gmail.com>
1 parent 87f1af6 commit d148c12

File tree

10 files changed

+1071
-25
lines changed

10 files changed

+1071
-25
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#
99

1010
from vulnerabilities.importers import alpine_linux
11+
from vulnerabilities.importers import amazon_linux
1112
from vulnerabilities.importers import apache_httpd
1213
from vulnerabilities.importers import apache_kafka
1314
from vulnerabilities.importers import apache_tomcat
@@ -75,6 +76,7 @@
7576
github_osv.GithubOSVImporter,
7677
epss.EPSSImporter,
7778
vulnrichment.VulnrichImporter,
79+
amazon_linux.AmazonLinuxImporter,
7880
]
7981

8082
IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY}

vulnerabilities/importers/amazon_linux.py

Lines changed: 76 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,35 +13,30 @@
1313
from typing import Any
1414
from typing import Iterable
1515
from typing import List
16-
from typing import Mapping
1716
from typing import Optional
18-
from urllib.parse import urljoin
1917

2018
import pytz
2119
from bs4 import BeautifulSoup
2220
from packageurl import PackageURL
23-
from univers.version_range import RpmVersionRange
21+
from univers.versions import RpmVersion
2422

2523
from vulnerabilities.importer import AdvisoryData
2624
from vulnerabilities.importer import AffectedPackage
2725
from vulnerabilities.importer import Importer
2826
from vulnerabilities.importer import Reference
2927
from vulnerabilities.importer import VulnerabilitySeverity
30-
from vulnerabilities.references import WireSharkReference
31-
from vulnerabilities.references import XsaReference
32-
from vulnerabilities.references import ZbxReference
28+
from vulnerabilities.rpm_utils import rpm_to_purl
3329
from vulnerabilities.severity_systems import SCORING_SYSTEMS
3430
from vulnerabilities.utils import fetch_response
3531
from vulnerabilities.utils import is_cve
3632

3733
LOGGER = logging.getLogger(__name__)
3834
BASE_URL = "https://alas.aws.amazon.com/"
39-
other_url = "https://explore.alas.aws.amazon.com/{cve_id.json}" # use this in the url in code to get details for the specific cve.
4035

4136

4237
class AmazonLinuxImporter(Importer):
43-
spdx_license_expression = "CC BY 4.0" # check if this is correct
44-
license_url = " " # todo
38+
spdx_license_expression = "CC BY 4.0"
39+
license_url = " " # TODO
4540

4641
importer_name = "Amazon Linux Importer"
4742

@@ -107,6 +102,18 @@ def fetch_alas_id_and_advisory_links(page_url: str) -> dict[str, str]:
107102

108103
def process_advisory_data(alas_id, alas_advisory_page_content, alas_url) -> Optional[AdvisoryData]:
109104

105+
"""
106+
Processes an Amazon Linux Security Advisory HTML page to extract relevant data and return it in a structured format.
107+
108+
Args:
109+
alas_id (str): The unique identifier for the Amazon Linux Security Advisory (e.g., "ALAS-2024-2628").
110+
alas_advisory_page_content (str): The HTML content of the advisory page.
111+
alas_url (str): The URL of the advisory page.
112+
113+
Returns:
114+
Optional[AdvisoryData]: An object containing the processed advisory data, or None if the necessary data couldn't be extracted.
115+
"""
116+
110117
soup = BeautifulSoup(alas_advisory_page_content, "html.parser")
111118
aliases = []
112119
aliases.append(alas_id)
@@ -131,8 +138,18 @@ def process_advisory_data(alas_id, alas_advisory_page_content, alas_url) -> Opti
131138
# Extract Issue Overview (all points of issue overviews texts)
132139
issue_overview = []
133140
for p in soup.find("div", id="issue_overview").find_all("p"):
134-
issue_overview.append(p.text.strip())
135-
summary = create_summary(issue_overview)
141+
# Replace <br> tags with a newline, then split the text
142+
text_parts = p.decode_contents().split("<br/>")
143+
144+
# Clean and append each part
145+
for part in text_parts:
146+
clean_text = part.strip()
147+
if clean_text: # Avoid adding empty strings
148+
issue_overview.append(clean_text)
149+
# Filter out any blank entries from the list
150+
issue_overview_filtered = [item for item in issue_overview if item]
151+
152+
summary = create_summary(issue_overview_filtered)
136153

137154
# Extract Affected Packages (list of strings)
138155
processed_affected_packages = []
@@ -152,12 +169,33 @@ def process_advisory_data(alas_id, alas_advisory_page_content, alas_url) -> Opti
152169
else:
153170
new_packages_list = []
154171

155-
for package in affected_packages:
156-
purl = PackageURL(type="rpm", namespace="alas.aws.amazon", name=package)
157-
# fixed_version = get_fixed_versions(new_packages_list)
158-
processed_affected_packages.append(
159-
AffectedPackage(package=purl, affected_version_range=None, fixed_version=None)
160-
)
172+
exclude_items = ["i686:", "noarch:", "src:", "x86_64:", "aarch64:"]
173+
filtered_new_packages_list = [
174+
package for package in new_packages_list if package not in exclude_items
175+
]
176+
177+
# new packages are the fixed packages
178+
for new_package in filtered_new_packages_list:
179+
new_package_purl = rpm_to_purl(new_package, "alas.aws.amazon")
180+
if new_package_purl:
181+
try:
182+
processed_affected_packages.append(
183+
AffectedPackage(
184+
package=PackageURL(
185+
type="rpm",
186+
namespace="alas.aws.amazon",
187+
name=new_package_purl.name,
188+
qualifiers=new_package_purl.qualifiers,
189+
subpath=new_package_purl.subpath,
190+
),
191+
affected_version_range=None,
192+
fixed_version=RpmVersion(new_package_purl.version),
193+
)
194+
)
195+
except ValueError as e:
196+
logging.error(
197+
f"Invalid RPM version '{new_package_purl.version}' for package '{new_package_purl.name}': {e}"
198+
)
161199

162200
cve_list = []
163201
for link in soup.find("div", id="references").find_all("a", href=True):
@@ -166,7 +204,8 @@ def process_advisory_data(alas_id, alas_advisory_page_content, alas_url) -> Opti
166204

167205
references: List[Reference] = []
168206
for cve_id, cve_url in cve_list:
169-
cve_json_url = f"https://explore.alas.aws.amazon.com/{cve_id}"
207+
aliases.append(cve_id)
208+
cve_json_url = f"https://explore.alas.aws.amazon.com/{cve_id}.json"
170209
response = fetch_response(cve_json_url)
171210

172211
# Parse the JSON data
@@ -183,6 +222,20 @@ def process_advisory_data(alas_id, alas_advisory_page_content, alas_url) -> Opti
183222
)
184223
references.append(Reference(reference_id=cve_id, url=cve_url, severities=severity))
185224

225+
additional_references = []
226+
# Find all <p> tags within the links-container div
227+
links_container = soup.find("div", class_="links-container")
228+
if links_container:
229+
p_tags = links_container.find_all("p")
230+
for p_tag in p_tags:
231+
a_tag = p_tag.find("a")
232+
if a_tag:
233+
cve_id = a_tag.get_text(strip=True) # Extract the CVE ID text
234+
url = a_tag["href"] # Extract the URL from href attribute
235+
additional_references.append((cve_id, url))
236+
for cve_id, ref_link in additional_references:
237+
references.append(Reference(reference_id=cve_id, url=ref_link, severities=[]))
238+
186239
url = alas_url
187240

188241
return AdvisoryData(
@@ -198,8 +251,11 @@ def process_advisory_data(alas_id, alas_advisory_page_content, alas_url) -> Opti
198251
def get_date_published(release_date_string):
199252

200253
# Parse the date and time
201-
date_part = release_date_string[:16]
202-
time_zone = release_date_string[17:]
254+
if release_date_string:
255+
date_part = release_date_string[:16]
256+
time_zone = release_date_string[17:]
257+
else:
258+
return None
203259

204260
# Convert to datetime object (naive)
205261
naive_date = datetime.strptime(date_part, "%Y-%m-%d %H:%M")
@@ -212,7 +268,6 @@ def get_date_published(release_date_string):
212268

213269
def create_summary(summary_point: List):
214270
summary = ". ".join(summary_point)
215-
216271
# Add a period at the end if the final sentence doesn't end with one
217272
if not summary.endswith("."):
218273
summary += "."

vulnerabilities/improvers/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
vulnerability_status.VulnerabilityStatusImprover,
3232
vulnerability_kev.VulnerabilityKevImprover,
3333
flag_ghost_packages.FlagGhostPackagePipeline,
34+
valid_versions.AmazonLinuxImprover,
3435
]
3536

3637
IMPROVERS_REGISTRY = {x.qualified_name: x for x in IMPROVERS_REGISTRY}

vulnerabilities/improvers/valid_versions.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from vulnerabilities.importer import AffectedPackage
2626
from vulnerabilities.importer import Importer
2727
from vulnerabilities.importer import UnMergeablePackageError
28+
from vulnerabilities.importers.amazon_linux import AmazonLinuxImporter
2829
from vulnerabilities.importers.apache_httpd import ApacheHTTPDImporter
2930
from vulnerabilities.importers.apache_kafka import ApacheKafkaImporter
3031
from vulnerabilities.importers.apache_tomcat import ApacheTomcatImporter
@@ -472,3 +473,8 @@ class RubyImprover(ValidVersionImprover):
472473
class GithubOSVImprover(ValidVersionImprover):
473474
importer = GithubOSVImporter
474475
ignorable_versions = []
476+
477+
478+
class AmazonLinuxImprover(ValidVersionImprover):
479+
importer = AmazonLinuxImporter
480+
ignorable_versions = []

vulnerabilities/tests/test_amazon_linux.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
# See https://github.com/nexB/vulnerablecode for support or download.
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
9+
910
import json
1011
import os
1112
from unittest import TestCase
@@ -26,8 +27,29 @@ def test_process_advisory_data1(self):
2627
) as file:
2728
html_content = file.read()
2829
result = process_advisory_data(
29-
"ALAS-2024-1943", html_content, "https://test-url.com/ALAS-2024-1943.html"
30+
"ALAS-2024-1943", html_content, "https://alas.aws.amazon.com/ALAS-2024-1943.html"
31+
).to_dict()
32+
expected_file = os.path.join(TEST_DATA, "amazon_linux_expected1.json")
33+
util_tests.check_results_against_json(result, expected_file)
34+
35+
def test_process_advisory_data2(self):
36+
with open(
37+
os.path.join(TEST_DATA, "amazon_linux_advisory_test2.html"), "r", encoding="utf-8"
38+
) as file:
39+
html_content = file.read()
40+
result = process_advisory_data(
41+
"ALAS-2024-2628", html_content, "https://alas.aws.amazon.com/AL2/ALAS-2024-2628.html"
42+
).to_dict()
43+
expected_file = os.path.join(TEST_DATA, "amazon_linux_expected2.json")
44+
util_tests.check_results_against_json(result, expected_file)
45+
46+
def test_process_advisory_data3(self):
47+
with open(
48+
os.path.join(TEST_DATA, "amazon_linux_advisory_test3.html"), "r", encoding="utf-8"
49+
) as file:
50+
html_content = file.read()
51+
result = process_advisory_data(
52+
"ALAS-2024-676", html_content, "https://alas.aws.amazon.com/AL2023/ALAS-2024-676.html"
3053
).to_dict()
31-
# expected_file = os.path.join(TEST_DATA, "github_osv_expected_1.json")
32-
print(f"Output is {result}")
33-
# util_tests.check_results_against_json(result, expected_file)
54+
expected_file = os.path.join(TEST_DATA, "amazon_linux_expected3.json")
55+
util_tests.check_results_against_json(result, expected_file)

0 commit comments

Comments
 (0)