Skip to content

Commit f86eae3

Browse files
committed
Add mozilla importer
Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
1 parent 7b73307 commit f86eae3

File tree

3 files changed

+319
-0
lines changed

3 files changed

+319
-0
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,11 @@
4848
)
4949
from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2
5050
from vulnerabilities.pipelines.v2_importers import istio_importer as istio_importer_v2
51+
from vulnerabilities.pipelines.v2_importers import mozilla_importer as mozilla_importer_v2
5152
from vulnerabilities.pipelines.v2_importers import npm_importer as npm_importer_v2
5253
from vulnerabilities.pipelines.v2_importers import nvd_importer as nvd_importer_v2
5354
from vulnerabilities.pipelines.v2_importers import oss_fuzz as oss_fuzz_v2
55+
from vulnerabilities.pipelines.v2_importers import postgresql_importer as postgresql_importer_v2
5456
from vulnerabilities.pipelines.v2_importers import pypa_importer as pypa_importer_v2
5557
from vulnerabilities.pipelines.v2_importers import pysec_importer as pysec_importer_v2
5658
from vulnerabilities.pipelines.v2_importers import vulnrichment_importer as vulnrichment_importer_v2
@@ -71,6 +73,8 @@
7173
curl_importer_v2.CurlImporterPipeline,
7274
oss_fuzz_v2.OSSFuzzImporterPipeline,
7375
istio_importer_v2.IstioImporterPipeline,
76+
postgresql_importer_v2.PostgreSQLImporterPipeline,
77+
mozilla_importer_v2.MozillaImporterPipeline,
7478
nvd_importer.NVDImporterPipeline,
7579
github_importer.GitHubAPIImporterPipeline,
7680
gitlab_importer.GitLabImporterPipeline,
Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
import json
10+
import logging
11+
import re
12+
from pathlib import Path
13+
from typing import Iterable
14+
15+
import yaml
16+
from bs4 import BeautifulSoup
17+
from dateutil import parser as date_parser
18+
from fetchcode.vcs import fetch_via_vcs
19+
from markdown import markdown
20+
from packageurl import PackageURL
21+
from univers.versions import SemverVersion
22+
23+
from vulnerabilities.importer import AdvisoryData
24+
from vulnerabilities.importer import AffectedPackage
25+
from vulnerabilities.importer import ReferenceV2
26+
from vulnerabilities.importer import VulnerabilitySeverity
27+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
28+
from vulnerabilities.severity_systems import GENERIC
29+
from vulnerabilities.utils import get_advisory_url
30+
from vulnerabilities.utils import is_cve
31+
from vulnerabilities.utils import split_markdown_front_matter
32+
33+
logger = logging.getLogger(__name__)
34+
35+
MFSA_FILENAME_RE = re.compile(r"mfsa(\d{4}-\d{2,3})\.(md|yml)$")
36+
37+
38+
class MozillaImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
39+
"""
40+
Pipeline-based importer for Mozilla Foundation Security Advisories.
41+
"""
42+
43+
pipeline_id = "mozilla_importer_v2"
44+
repo_url = "git+https://github.com/mozilla/foundation-security-advisories"
45+
spdx_license_expression = "MPL-2.0"
46+
license_url = "https://github.com/mozilla/foundation-security-advisories/blob/master/LICENSE"
47+
48+
@classmethod
49+
def steps(cls):
50+
return (
51+
cls.clone,
52+
cls.collect_and_store_advisories,
53+
)
54+
55+
def clone(self):
56+
self.log(f"Cloning `{self.repo_url}`")
57+
self.vcs_response = fetch_via_vcs(self.repo_url)
58+
59+
def advisories_count(self) -> int:
60+
base_path = Path(self.vcs_response.dest_dir)
61+
yml = list((base_path / "announce").glob("**/*.yml"))
62+
md = list((base_path / "announce").glob("**/*.md"))
63+
return len(yml) + len(md)
64+
65+
def collect_advisories(self) -> Iterable[AdvisoryData]:
66+
base_path = Path(self.vcs_response.dest_dir)
67+
advisory_dir = base_path / "announce"
68+
69+
for file_path in advisory_dir.glob("**/*"):
70+
if file_path.suffix not in [".yml", ".md"]:
71+
continue
72+
yield from parse_advisory(file_path, base_path)
73+
74+
75+
def parse_advisory(file_path: Path, base_path: Path) -> Iterable[AdvisoryData]:
76+
advisory_url = get_advisory_url(
77+
file=file_path,
78+
base_path=base_path,
79+
url="https://github.com/mozilla/foundation-security-advisories/blob/master/",
80+
)
81+
82+
mfsa_id = mfsa_id_from_filename(file_path.name)
83+
if not mfsa_id:
84+
return []
85+
86+
with open(file_path) as lines:
87+
if file_path.suffix == ".md":
88+
yield from parse_md_advisory(mfsa_id, lines, advisory_url)
89+
elif file_path.suffix == ".yml":
90+
yield from parse_yml_advisory(mfsa_id, lines, advisory_url)
91+
92+
93+
def parse_yml_advisory(mfsa_id, lines, advisory_url) -> Iterable[AdvisoryData]:
94+
data = yaml.safe_load(lines)
95+
96+
affected_packages = list(parse_affected_packages(data.get("fixed_in") or []))
97+
reference = ReferenceV2(
98+
url=f"https://www.mozilla.org/en-US/security/advisories/{mfsa_id}",
99+
)
100+
severity = get_severity_from_impact(data.get("impact"), url=reference.url)
101+
date_published = data.get("announced")
102+
mfsa_summary = data.get("description", "")
103+
mfsa_summary = BeautifulSoup(mfsa_summary, features="lxml").get_text()
104+
105+
advisories = data.get("advisories", {})
106+
107+
if not advisories:
108+
yield AdvisoryData(
109+
advisory_id=mfsa_id,
110+
aliases=[],
111+
summary=mfsa_summary,
112+
affected_packages=affected_packages,
113+
references_v2=[reference],
114+
severities=[severity],
115+
url=advisory_url,
116+
date_published=date_parser.parse(date_published) if date_published else None,
117+
original_advisory_text=json.dumps(data, indent=2, ensure_ascii=False),
118+
)
119+
120+
for cve, advisory in advisories.items():
121+
if not is_cve(cve):
122+
continue
123+
124+
advisory_summary = BeautifulSoup(
125+
advisory.get("description", ""), features="lxml"
126+
).get_text()
127+
impact = advisory.get("impact", "")
128+
advisory_severity = get_severity_from_impact(impact, url=reference.url)
129+
130+
yield AdvisoryData(
131+
advisory_id=f"{mfsa_id}/{cve}",
132+
aliases=[cve],
133+
summary=mfsa_summary + "\n" + advisory_summary,
134+
affected_packages=affected_packages,
135+
references_v2=[reference],
136+
url=advisory_url,
137+
severities=[advisory_severity],
138+
date_published=date_parser.parse(date_published) if date_published else None,
139+
original_advisory_text=json.dumps(advisory, indent=2, ensure_ascii=False),
140+
)
141+
142+
143+
def parse_md_advisory(mfsa_id, lines, advisory_url) -> Iterable[AdvisoryData]:
144+
yamltext, mdtext = split_markdown_front_matter(lines.read())
145+
data = yaml.safe_load(yamltext)
146+
147+
affected_packages = list(parse_affected_packages(data.get("fixed_in") or []))
148+
reference = ReferenceV2(
149+
url=f"https://www.mozilla.org/en-US/security/advisories/{mfsa_id}",
150+
)
151+
severity = get_severity_from_impact(data.get("impact"), url=reference.url)
152+
description = extract_description_from_html(mdtext)
153+
154+
yield AdvisoryData(
155+
advisory_id=mfsa_id,
156+
aliases=[],
157+
summary=description,
158+
affected_packages=affected_packages,
159+
references_v2=[reference],
160+
severities=[severity],
161+
url=advisory_url,
162+
date_published=date_parser.parse(data.get("announced")) if data.get("announced") else None,
163+
original_advisory_text=json.dumps(data, indent=2, ensure_ascii=False),
164+
)
165+
166+
167+
def extract_description_from_html(md_text: str) -> str:
168+
html = markdown(md_text)
169+
soup = BeautifulSoup(html, features="lxml")
170+
h3tag = soup.find("h3", string=lambda s: s and s.lower() == "description")
171+
if not h3tag:
172+
return ""
173+
174+
description_parts = []
175+
for sibling in h3tag.find_next_siblings():
176+
if sibling.name != "p":
177+
break
178+
description_parts.append(sibling.get_text())
179+
180+
return "\n".join(description_parts).strip()
181+
182+
183+
def parse_affected_packages(pkgs: list) -> Iterable[AffectedPackage]:
184+
for pkg in pkgs:
185+
if not pkg:
186+
continue
187+
188+
name, _, version = pkg.rpartition(" ")
189+
if version.count(".") == 3:
190+
continue # invalid SemVer
191+
try:
192+
fixed_version = SemverVersion(version)
193+
except Exception:
194+
logger.debug(f"Invalid version '{version}' for package '{name}'")
195+
continue
196+
197+
yield AffectedPackage(
198+
package=PackageURL(type="mozilla", name=name),
199+
fixed_version=fixed_version,
200+
)
201+
202+
203+
def get_reference_and_severity(mfsa_id: str, impact: str) -> ReferenceV2:
204+
return ReferenceV2(
205+
url=f"https://www.mozilla.org/en-US/security/advisories/{mfsa_id}",
206+
)
207+
208+
209+
def mfsa_id_from_filename(filename: str) -> str | None:
210+
match = MFSA_FILENAME_RE.search(filename)
211+
return f"mfsa{match.group(1)}" if match else None
212+
213+
214+
def get_severity_from_impact(impact: str, url=None) -> VulnerabilitySeverity:
215+
"""
216+
Extracts the severity from the impact string.
217+
"""
218+
impact = (impact or "").lower()
219+
if impact == "moderate":
220+
impact = "medium"
221+
severities = ["critical", "high", "medium", "low", "none"]
222+
severity_value = "none"
223+
224+
for level in severities:
225+
if level in impact:
226+
severity_value = level
227+
break
228+
229+
return VulnerabilitySeverity(system=GENERIC, value=severity_value, url=url)
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
import json
10+
from pathlib import Path
11+
from textwrap import dedent
12+
13+
from vulnerabilities.pipelines.v2_importers.mozilla_importer import extract_description_from_html
14+
from vulnerabilities.pipelines.v2_importers.mozilla_importer import get_severity_from_impact
15+
from vulnerabilities.pipelines.v2_importers.mozilla_importer import mfsa_id_from_filename
16+
from vulnerabilities.pipelines.v2_importers.mozilla_importer import parse_affected_packages
17+
from vulnerabilities.pipelines.v2_importers.mozilla_importer import parse_md_advisory
18+
from vulnerabilities.pipelines.v2_importers.mozilla_importer import parse_yml_advisory
19+
20+
21+
def test_mfsa_id_from_filename():
22+
assert mfsa_id_from_filename("mfsa2022-01.md") == "mfsa2022-01"
23+
assert mfsa_id_from_filename("mfsa2022-099.yml") == "mfsa2022-099"
24+
assert mfsa_id_from_filename("notmfsa.txt") is None
25+
26+
27+
def test_get_severity_from_impact():
28+
assert get_severity_from_impact("Critical").value == "critical"
29+
assert get_severity_from_impact("Moderate").value == "medium"
30+
assert get_severity_from_impact("Low").value == "low"
31+
assert get_severity_from_impact("Random Text").value == "none"
32+
assert get_severity_from_impact(None).value == "none"
33+
34+
35+
def test_extract_description_from_html():
36+
md_text = dedent(
37+
"""
38+
### Description
39+
40+
This vulnerability affects Firefox.
41+
42+
It could allow attackers to execute arbitrary code.
43+
44+
### Impact
45+
46+
Critical
47+
"""
48+
)
49+
expected = (
50+
"This vulnerability affects Firefox.\nIt could allow attackers to execute arbitrary code."
51+
)
52+
assert extract_description_from_html(md_text) == expected
53+
54+
55+
def test_parse_affected_packages_valid():
56+
packages = ["firefox 89.0", "thunderbird 78.10"]
57+
result = list(parse_affected_packages(packages))
58+
assert len(result) == 2
59+
assert result[0].package.name == "firefox"
60+
assert str(result[0].fixed_version) == "89.0.0"
61+
62+
63+
def test_parse_affected_packages_invalid():
64+
packages = ["firefox 89.0.0.1", "invalidpackage"]
65+
result = list(parse_affected_packages(packages))
66+
assert len(result) == 0 # invalid SemVer or malformed
67+
68+
69+
def test_parse_yml_advisory(tmp_path: Path):
70+
advisory = {
71+
"announced": "2022-01-01",
72+
"description": "<p>This is a test</p>",
73+
"impact": "High",
74+
"fixed_in": ["firefox 89.0"],
75+
"advisories": {
76+
"CVE-2022-1234": {"description": "<p>Memory safety issue</p>", "impact": "Critical"}
77+
},
78+
}
79+
file = tmp_path / "mfsa2022-01.yml"
80+
file.write_text(json.dumps(advisory))
81+
82+
results = list(
83+
parse_yml_advisory("mfsa2022-01", file.open(), advisory_url="https://example.com")
84+
)
85+
assert len(results) == 1 or len(results) == 2
86+
assert all(isinstance(r.summary, str) for r in results)

0 commit comments

Comments
 (0)