|
| 1 | +# |
| 2 | +# Copyright (c) nexB Inc. and others. All rights reserved. |
| 3 | +# VulnerableCode is a trademark of nexB Inc. |
| 4 | +# SPDX-License-Identifier: Apache-2.0 |
| 5 | +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. |
| 6 | +# See https://github.com/aboutcode-org/vulnerablecode for support or download. |
| 7 | +# See https://aboutcode.org for more information about nexB OSS projects. |
| 8 | +# |
| 9 | + |
| 10 | +from typing import NamedTuple |
| 11 | +from urllib.parse import urljoin |
| 12 | + |
| 13 | +import requests |
| 14 | +from bs4 import BeautifulSoup |
| 15 | +from packageurl import PackageURL |
| 16 | +from univers.version_constraint import VersionConstraint |
| 17 | +from univers.version_constraint import validate_comparators |
| 18 | +from univers.version_range import NginxVersionRange |
| 19 | +from univers.versions import InvalidVersion |
| 20 | + |
| 21 | +from vulnerabilities.importer import AdvisoryData |
| 22 | +from vulnerabilities.importer import AffectedPackageV2 |
| 23 | +from vulnerabilities.importer import PatchData |
| 24 | +from vulnerabilities.importer import ReferenceV2 |
| 25 | +from vulnerabilities.importer import VulnerabilitySeverity |
| 26 | +from vulnerabilities.importer import logger |
| 27 | +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 |
| 28 | +from vulnerabilities.severity_systems import GENERIC |
| 29 | + |
| 30 | + |
| 31 | +class NginxImporterPipeline(VulnerableCodeBaseImporterPipelineV2): |
| 32 | + """Collect Nginx security advisories.""" |
| 33 | + |
| 34 | + pipeline_id = "nginx_importer_v2" |
| 35 | + |
| 36 | + spdx_license_expression = "BSD-2-Clause" |
| 37 | + license_url = "https://nginx.org/LICENSE" |
| 38 | + url = "https://nginx.org/en/security_advisories.html" |
| 39 | + |
| 40 | + @classmethod |
| 41 | + def steps(cls): |
| 42 | + return ( |
| 43 | + cls.fetch, |
| 44 | + cls.collect_and_store_advisories, |
| 45 | + ) |
| 46 | + |
| 47 | + def fetch(self): |
| 48 | + self.log(f"Fetch `{self.url}`") |
| 49 | + self.advisory_data = requests.get(self.url).text |
| 50 | + |
| 51 | + def advisories_count(self): |
| 52 | + return self.advisory_data.count("<li><p>") |
| 53 | + |
| 54 | + def collect_advisories(self): |
| 55 | + """ |
| 56 | + Yield AdvisoryData from nginx security advisories HTML |
| 57 | + web page. |
| 58 | + """ |
| 59 | + soup = BeautifulSoup(self.advisory_data, features="lxml") |
| 60 | + vulnerability_list = soup.select("li p") |
| 61 | + for vulnerability_info in vulnerability_list: |
| 62 | + ngnix_advisory = parse_advisory_data_from_paragraph(vulnerability_info) |
| 63 | + yield to_advisory_data(ngnix_advisory) |
| 64 | + |
| 65 | + |
| 66 | +class NginxAdvisory(NamedTuple): |
| 67 | + advisory_id: str |
| 68 | + aliases: list |
| 69 | + summary: str |
| 70 | + severities: list |
| 71 | + patches: list |
| 72 | + not_vulnerable: str |
| 73 | + vulnerable: str |
| 74 | + references: list |
| 75 | + |
| 76 | + def to_dict(self): |
| 77 | + return self._asdict() |
| 78 | + |
| 79 | + |
| 80 | +def to_advisory_data(nginx_adv: NginxAdvisory) -> AdvisoryData: |
| 81 | + """ |
| 82 | + Return AdvisoryData from an NginxAdvisory tuple. |
| 83 | + """ |
| 84 | + qualifiers = {} |
| 85 | + _, _, affected_versions = nginx_adv.vulnerable.partition(":") |
| 86 | + affected_versions = affected_versions.strip() |
| 87 | + |
| 88 | + if "nginx/Windows" in affected_versions: |
| 89 | + qualifiers["os"] = "windows" |
| 90 | + affected_versions = affected_versions.replace("nginx/Windows", "") |
| 91 | + |
| 92 | + _, _, fixed_versions = nginx_adv.not_vulnerable.partition(":") |
| 93 | + fixed_versions = fixed_versions.strip() |
| 94 | + |
| 95 | + purl = PackageURL(type="nginx", name="nginx", qualifiers=qualifiers) |
| 96 | + |
| 97 | + fixed_version_range = None |
| 98 | + try: |
| 99 | + fixed_version_range = NginxVersionRange.from_native(fixed_versions) |
| 100 | + except InvalidVersion as e: |
| 101 | + logger.error(f"InvalidVersionRange fixed_version_range: {fixed_versions} - error: {e}") |
| 102 | + |
| 103 | + affected_version_range = None |
| 104 | + try: |
| 105 | + affected_version_range = NginxVersionRange.from_native(affected_versions) |
| 106 | + except InvalidVersion as e: |
| 107 | + logger.error( |
| 108 | + f"InvalidVersionRange affected_version_range: {affected_versions} - error: {e}" |
| 109 | + ) |
| 110 | + |
| 111 | + affected_packages = [] |
| 112 | + if purl and affected_version_range or fixed_version_range: |
| 113 | + try: |
| 114 | + if affected_version_range: |
| 115 | + validate_comparators(affected_version_range.constraints) |
| 116 | + except ValueError as e: |
| 117 | + affected_version_range = None |
| 118 | + logger.error( |
| 119 | + f"Invalid version_range affected_version_range:{affected_version_range} - error: {e}" |
| 120 | + ) |
| 121 | + |
| 122 | + try: |
| 123 | + if fixed_version_range: |
| 124 | + fixed_version_constraints = VersionConstraint.simplify( |
| 125 | + fixed_version_range.constraints |
| 126 | + ) |
| 127 | + fixed_version_range = NginxVersionRange(constraints=fixed_version_constraints) |
| 128 | + validate_comparators(fixed_version_range.constraints) |
| 129 | + except ValueError as e: |
| 130 | + fixed_version_range = None |
| 131 | + logger.error( |
| 132 | + f"Invalid version_range fixed_version_range:{fixed_version_range} - error: {e}" |
| 133 | + ) |
| 134 | + |
| 135 | + affected_packages.append( |
| 136 | + AffectedPackageV2( |
| 137 | + package=purl, |
| 138 | + affected_version_range=affected_version_range, |
| 139 | + fixed_version_range=fixed_version_range, |
| 140 | + ) |
| 141 | + ) |
| 142 | + |
| 143 | + return AdvisoryData( |
| 144 | + advisory_id=nginx_adv.advisory_id, |
| 145 | + aliases=nginx_adv.aliases, |
| 146 | + summary=nginx_adv.summary, |
| 147 | + affected_packages=affected_packages, |
| 148 | + references_v2=nginx_adv.references, |
| 149 | + patches=nginx_adv.patches, |
| 150 | + url="https://nginx.org/en/security_advisories.html", |
| 151 | + ) |
| 152 | + |
| 153 | + |
| 154 | +def parse_advisory_data_from_paragraph(vulnerability_info): |
| 155 | + """ |
| 156 | + Return an NginxAdvisory from a ``vulnerability_info`` bs4 paragraph. |
| 157 | +
|
| 158 | + An advisory paragraph, without html markup, looks like this: |
| 159 | +
|
| 160 | + 1-byte memory overwrite in resolver |
| 161 | + Severity: medium |
| 162 | + Advisory |
| 163 | + CVE-2021-23017 |
| 164 | + Not vulnerable: 1.21.0+, 1.20.1+ |
| 165 | + Vulnerable: 0.6.18-1.20.0 |
| 166 | + The patch pgp |
| 167 | +
|
| 168 | + """ |
| 169 | + aliases = [] |
| 170 | + summary = None |
| 171 | + severities = [] |
| 172 | + patches = [] |
| 173 | + not_vulnerable = None |
| 174 | + vulnerable = None |
| 175 | + references = [] |
| 176 | + is_first = True |
| 177 | + |
| 178 | + # we iterate on the children to accumulate values in variables |
| 179 | + # FIXME: using an explicit xpath-like query could be simpler |
| 180 | + for child in vulnerability_info.children: |
| 181 | + if is_first: |
| 182 | + summary = child |
| 183 | + is_first = False |
| 184 | + continue |
| 185 | + |
| 186 | + text = child.text.strip() |
| 187 | + text_low = text.lower() |
| 188 | + |
| 189 | + if text.startswith( |
| 190 | + ( |
| 191 | + "CVE-", |
| 192 | + "CORE-", |
| 193 | + "VU#", |
| 194 | + ) |
| 195 | + ): |
| 196 | + aliases.append(text) |
| 197 | + if text.startswith("CVE-"): |
| 198 | + # always keep the CVE as a reference too |
| 199 | + link = f"https://nvd.nist.gov/vuln/detail/{text}" |
| 200 | + reference = ReferenceV2(reference_id=text, url=link) |
| 201 | + references.append(reference) |
| 202 | + |
| 203 | + elif "severity" in text_low: |
| 204 | + severity = build_severity(severity=text) |
| 205 | + if severity: |
| 206 | + severities.append(severity) |
| 207 | + |
| 208 | + elif "not vulnerable" in text_low: |
| 209 | + not_vulnerable = text |
| 210 | + |
| 211 | + elif "vulnerable" in text_low: |
| 212 | + vulnerable = text |
| 213 | + |
| 214 | + elif hasattr(child, "attrs"): |
| 215 | + link = child.attrs.get("href") |
| 216 | + if link: |
| 217 | + if "cve.mitre.org" in link: |
| 218 | + references.append(ReferenceV2(reference_id=text, url=link)) |
| 219 | + elif "mailman.nginx.org" in link: |
| 220 | + references.append(ReferenceV2(url=link)) |
| 221 | + elif "/download/patch" in link: |
| 222 | + link = urljoin("https://nginx.org", link) |
| 223 | + patch = PatchData( |
| 224 | + patch_url=link, |
| 225 | + ) |
| 226 | + patches.append(patch) |
| 227 | + else: |
| 228 | + link = urljoin("https://nginx.org", link) |
| 229 | + references.append(ReferenceV2(url=link)) |
| 230 | + |
| 231 | + advisory_id = aliases.pop() |
| 232 | + return NginxAdvisory( |
| 233 | + advisory_id=advisory_id, |
| 234 | + aliases=aliases, |
| 235 | + summary=summary, |
| 236 | + severities=severities, |
| 237 | + not_vulnerable=not_vulnerable, |
| 238 | + vulnerable=vulnerable, |
| 239 | + references=references, |
| 240 | + patches=patches, |
| 241 | + ) |
| 242 | + |
| 243 | + |
| 244 | +def build_severity(severity): |
| 245 | + """ |
| 246 | + Return a VulnerabilitySeverity built from a ``severity`` string, or None. |
| 247 | +
|
| 248 | + For example:: |
| 249 | + >>> severity = "Severity: medium" |
| 250 | + >>> expected = VulnerabilitySeverity(system=GENERIC, value="medium") |
| 251 | + >>> assert build_severity(severity) == expected |
| 252 | + """ |
| 253 | + if severity.startswith("Severity:"): |
| 254 | + _, _, severity = severity.partition("Severity:") |
| 255 | + |
| 256 | + severity = severity.strip() |
| 257 | + if severity: |
| 258 | + return VulnerabilitySeverity(system=GENERIC, value=severity) |
0 commit comments