1313from typing import Any
1414from typing import Iterable
1515from typing import List
16- from typing import Mapping
1716from typing import Optional
18- from urllib .parse import urljoin
1917
2018import pytz
2119from bs4 import BeautifulSoup
2220from packageurl import PackageURL
23- from univers .version_range import RpmVersionRange
21+ from univers .versions import RpmVersion
2422
2523from vulnerabilities .importer import AdvisoryData
2624from vulnerabilities .importer import AffectedPackage
2725from vulnerabilities .importer import Importer
2826from vulnerabilities .importer import Reference
2927from vulnerabilities .importer import VulnerabilitySeverity
30- from vulnerabilities .references import WireSharkReference
31- from vulnerabilities .references import XsaReference
32- from vulnerabilities .references import ZbxReference
28+ from vulnerabilities .rpm_utils import rpm_to_purl
3329from vulnerabilities .severity_systems import SCORING_SYSTEMS
3430from vulnerabilities .utils import fetch_response
3531from vulnerabilities .utils import is_cve
3632
3733LOGGER = logging .getLogger (__name__ )
3834BASE_URL = "https://alas.aws.amazon.com/"
39- other_url = "https://explore.alas.aws.amazon.com/{cve_id.json}" # use this in the url in code to get details for the specific cve.
4035
4136
4237class AmazonLinuxImporter (Importer ):
43- spdx_license_expression = "CC BY 4.0" # check if this is correct
44- license_url = " " # todo
38+ spdx_license_expression = "CC BY 4.0"
39+ license_url = " " # TODO
4540
4641 importer_name = "Amazon Linux Importer"
4742
@@ -107,6 +102,18 @@ def fetch_alas_id_and_advisory_links(page_url: str) -> dict[str, str]:
107102
108103def process_advisory_data (alas_id , alas_advisory_page_content , alas_url ) -> Optional [AdvisoryData ]:
109104
105+ """
106+ Processes an Amazon Linux Security Advisory HTML page to extract relevant data and return it in a structured format.
107+
108+ Args:
109+ alas_id (str): The unique identifier for the Amazon Linux Security Advisory (e.g., "ALAS-2024-2628").
110+ alas_advisory_page_content (str): The HTML content of the advisory page.
111+ alas_url (str): The URL of the advisory page.
112+
113+ Returns:
114+ Optional[AdvisoryData]: An object containing the processed advisory data, or None if the necessary data couldn't be extracted.
115+ """
116+
110117 soup = BeautifulSoup (alas_advisory_page_content , "html.parser" )
111118 aliases = []
112119 aliases .append (alas_id )
@@ -131,8 +138,18 @@ def process_advisory_data(alas_id, alas_advisory_page_content, alas_url) -> Opti
131138 # Extract Issue Overview (all points of issue overviews texts)
132139 issue_overview = []
133140 for p in soup .find ("div" , id = "issue_overview" ).find_all ("p" ):
134- issue_overview .append (p .text .strip ())
135- summary = create_summary (issue_overview )
141+ # Replace <br> tags with a newline, then split the text
142+ text_parts = p .decode_contents ().split ("<br/>" )
143+
144+ # Clean and append each part
145+ for part in text_parts :
146+ clean_text = part .strip ()
147+ if clean_text : # Avoid adding empty strings
148+ issue_overview .append (clean_text )
149+ # Filter out any blank entries from the list
150+ issue_overview_filtered = [item for item in issue_overview if item ]
151+
152+ summary = create_summary (issue_overview_filtered )
136153
137154 # Extract Affected Packages (list of strings)
138155 processed_affected_packages = []
@@ -152,12 +169,33 @@ def process_advisory_data(alas_id, alas_advisory_page_content, alas_url) -> Opti
152169 else :
153170 new_packages_list = []
154171
155- for package in affected_packages :
156- purl = PackageURL (type = "rpm" , namespace = "alas.aws.amazon" , name = package )
157- # fixed_version = get_fixed_versions(new_packages_list)
158- processed_affected_packages .append (
159- AffectedPackage (package = purl , affected_version_range = None , fixed_version = None )
160- )
172+ exclude_items = ["i686:" , "noarch:" , "src:" , "x86_64:" , "aarch64:" ]
173+ filtered_new_packages_list = [
174+ package for package in new_packages_list if package not in exclude_items
175+ ]
176+
177+ # new packages are the fixed packages
178+ for new_package in filtered_new_packages_list :
179+ new_package_purl = rpm_to_purl (new_package , "alas.aws.amazon" )
180+ if new_package_purl :
181+ try :
182+ processed_affected_packages .append (
183+ AffectedPackage (
184+ package = PackageURL (
185+ type = "rpm" ,
186+ namespace = "alas.aws.amazon" ,
187+ name = new_package_purl .name ,
188+ qualifiers = new_package_purl .qualifiers ,
189+ subpath = new_package_purl .subpath ,
190+ ),
191+ affected_version_range = None ,
192+ fixed_version = RpmVersion (new_package_purl .version ),
193+ )
194+ )
195+ except ValueError as e :
196+ logging .error (
197+ f"Invalid RPM version '{ new_package_purl .version } ' for package '{ new_package_purl .name } ': { e } "
198+ )
161199
162200 cve_list = []
163201 for link in soup .find ("div" , id = "references" ).find_all ("a" , href = True ):
@@ -166,7 +204,8 @@ def process_advisory_data(alas_id, alas_advisory_page_content, alas_url) -> Opti
166204
167205 references : List [Reference ] = []
168206 for cve_id , cve_url in cve_list :
169- cve_json_url = f"https://explore.alas.aws.amazon.com/{ cve_id } "
207+ aliases .append (cve_id )
208+ cve_json_url = f"https://explore.alas.aws.amazon.com/{ cve_id } .json"
170209 response = fetch_response (cve_json_url )
171210
172211 # Parse the JSON data
@@ -183,6 +222,20 @@ def process_advisory_data(alas_id, alas_advisory_page_content, alas_url) -> Opti
183222 )
184223 references .append (Reference (reference_id = cve_id , url = cve_url , severities = severity ))
185224
225+ additional_references = []
226+ # Find all <p> tags within the links-container div
227+ links_container = soup .find ("div" , class_ = "links-container" )
228+ if links_container :
229+ p_tags = links_container .find_all ("p" )
230+ for p_tag in p_tags :
231+ a_tag = p_tag .find ("a" )
232+ if a_tag :
233+ cve_id = a_tag .get_text (strip = True ) # Extract the CVE ID text
234+ url = a_tag ["href" ] # Extract the URL from href attribute
235+ additional_references .append ((cve_id , url ))
236+ for cve_id , ref_link in additional_references :
237+ references .append (Reference (reference_id = cve_id , url = ref_link , severities = []))
238+
186239 url = alas_url
187240
188241 return AdvisoryData (
@@ -198,8 +251,11 @@ def process_advisory_data(alas_id, alas_advisory_page_content, alas_url) -> Opti
198251def get_date_published (release_date_string ):
199252
200253 # Parse the date and time
201- date_part = release_date_string [:16 ]
202- time_zone = release_date_string [17 :]
254+ if release_date_string :
255+ date_part = release_date_string [:16 ]
256+ time_zone = release_date_string [17 :]
257+ else :
258+ return None
203259
204260 # Convert to datetime object (naive)
205261 naive_date = datetime .strptime (date_part , "%Y-%m-%d %H:%M" )
@@ -212,7 +268,6 @@ def get_date_published(release_date_string):
212268
213269def create_summary (summary_point : List ):
214270 summary = ". " .join (summary_point )
215-
216271 # Add a period at the end if the final sentence doesn't end with one
217272 if not summary .endswith ("." ):
218273 summary += "."
0 commit comments