Add mozilla importer

TG1999 · TG1999 · commit f86eae339ca8 · 2025-07-17T12:53:20.000+05:30
Signed-off-by: Tushar Goel &lt;tushar.goel.dav@gmail.com&gt;
diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py
@@ -48,9 +48,11 @@
 )
 from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2
 from vulnerabilities.pipelines.v2_importers import istio_importer as istio_importer_v2
+from vulnerabilities.pipelines.v2_importers import mozilla_importer as mozilla_importer_v2
 from vulnerabilities.pipelines.v2_importers import npm_importer as npm_importer_v2
 from vulnerabilities.pipelines.v2_importers import nvd_importer as nvd_importer_v2
 from vulnerabilities.pipelines.v2_importers import oss_fuzz as oss_fuzz_v2
+from vulnerabilities.pipelines.v2_importers import postgresql_importer as postgresql_importer_v2
 from vulnerabilities.pipelines.v2_importers import pypa_importer as pypa_importer_v2
 from vulnerabilities.pipelines.v2_importers import pysec_importer as pysec_importer_v2
 from vulnerabilities.pipelines.v2_importers import vulnrichment_importer as vulnrichment_importer_v2
@@ -71,6 +73,8 @@
         curl_importer_v2.CurlImporterPipeline,
         oss_fuzz_v2.OSSFuzzImporterPipeline,
         istio_importer_v2.IstioImporterPipeline,
+        postgresql_importer_v2.PostgreSQLImporterPipeline,
+        mozilla_importer_v2.MozillaImporterPipeline,
         nvd_importer.NVDImporterPipeline,
         github_importer.GitHubAPIImporterPipeline,
         gitlab_importer.GitLabImporterPipeline,
diff --git a/vulnerabilities/pipelines/v2_importers/mozilla_importer.py b/vulnerabilities/pipelines/v2_importers/mozilla_importer.py
@@ -0,0 +1,229 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+import json
+import logging
+import re
+from pathlib import Path
+from typing import Iterable
+
+import yaml
+from bs4 import BeautifulSoup
+from dateutil import parser as date_parser
+from fetchcode.vcs import fetch_via_vcs
+from markdown import markdown
+from packageurl import PackageURL
+from univers.versions import SemverVersion
+
+from vulnerabilities.importer import AdvisoryData
+from vulnerabilities.importer import AffectedPackage
+from vulnerabilities.importer import ReferenceV2
+from vulnerabilities.importer import VulnerabilitySeverity
+from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
+from vulnerabilities.severity_systems import GENERIC
+from vulnerabilities.utils import get_advisory_url
+from vulnerabilities.utils import is_cve
+from vulnerabilities.utils import split_markdown_front_matter
+
+logger = logging.getLogger(__name__)
+
+MFSA_FILENAME_RE = re.compile(r"mfsa(\d{4}-\d{2,3})\.(md|yml)$")
+
+
+class MozillaImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
+    """
+    Pipeline-based importer for Mozilla Foundation Security Advisories.
+    """
+
+    pipeline_id = "mozilla_importer_v2"
+    repo_url = "git+https://github.com/mozilla/foundation-security-advisories"
+    spdx_license_expression = "MPL-2.0"
+    license_url = "https://github.com/mozilla/foundation-security-advisories/blob/master/LICENSE"
+
+    @classmethod
+    def steps(cls):
+        return (
+            cls.clone,
+            cls.collect_and_store_advisories,
+        )
+
+    def clone(self):
+        self.log(f"Cloning `{self.repo_url}`")
+        self.vcs_response = fetch_via_vcs(self.repo_url)
+
+    def advisories_count(self) -> int:
+        base_path = Path(self.vcs_response.dest_dir)
+        yml = list((base_path / "announce").glob("**/*.yml"))
+        md = list((base_path / "announce").glob("**/*.md"))
+        return len(yml) + len(md)
+
+    def collect_advisories(self) -> Iterable[AdvisoryData]:
+        base_path = Path(self.vcs_response.dest_dir)
+        advisory_dir = base_path / "announce"
+
+        for file_path in advisory_dir.glob("**/*"):
+            if file_path.suffix not in [".yml", ".md"]:
+                continue
+            yield from parse_advisory(file_path, base_path)
+
+
+def parse_advisory(file_path: Path, base_path: Path) -> Iterable[AdvisoryData]:
+    advisory_url = get_advisory_url(
+        file=file_path,
+        base_path=base_path,
+        url="https://github.com/mozilla/foundation-security-advisories/blob/master/",
+    )
+
+    mfsa_id = mfsa_id_from_filename(file_path.name)
+    if not mfsa_id:
+        return []
+
+    with open(file_path) as lines:
+        if file_path.suffix == ".md":
+            yield from parse_md_advisory(mfsa_id, lines, advisory_url)
+        elif file_path.suffix == ".yml":
+            yield from parse_yml_advisory(mfsa_id, lines, advisory_url)
+
+
+def parse_yml_advisory(mfsa_id, lines, advisory_url) -> Iterable[AdvisoryData]:
+    data = yaml.safe_load(lines)
+
+    affected_packages = list(parse_affected_packages(data.get("fixed_in") or []))
+    reference = ReferenceV2(
+        url=f"https://www.mozilla.org/en-US/security/advisories/{mfsa_id}",
+    )
+    severity = get_severity_from_impact(data.get("impact"), url=reference.url)
+    date_published = data.get("announced")
+    mfsa_summary = data.get("description", "")
+    mfsa_summary = BeautifulSoup(mfsa_summary, features="lxml").get_text()
+
+    advisories = data.get("advisories", {})
+
+    if not advisories:
+        yield AdvisoryData(
+            advisory_id=mfsa_id,
+            aliases=[],
+            summary=mfsa_summary,
+            affected_packages=affected_packages,
+            references_v2=[reference],
+            severities=[severity],
+            url=advisory_url,
+            date_published=date_parser.parse(date_published) if date_published else None,
+            original_advisory_text=json.dumps(data, indent=2, ensure_ascii=False),
+        )
+
+    for cve, advisory in advisories.items():
+        if not is_cve(cve):
+            continue
+
+        advisory_summary = BeautifulSoup(
+            advisory.get("description", ""), features="lxml"
+        ).get_text()
+        impact = advisory.get("impact", "")
+        advisory_severity = get_severity_from_impact(impact, url=reference.url)
+
+        yield AdvisoryData(
+            advisory_id=f"{mfsa_id}/{cve}",
+            aliases=[cve],
+            summary=mfsa_summary + "\n" + advisory_summary,
+            affected_packages=affected_packages,
+            references_v2=[reference],
+            url=advisory_url,
+            severities=[advisory_severity],
+            date_published=date_parser.parse(date_published) if date_published else None,
+            original_advisory_text=json.dumps(advisory, indent=2, ensure_ascii=False),
+        )
+
+
+def parse_md_advisory(mfsa_id, lines, advisory_url) -> Iterable[AdvisoryData]:
+    yamltext, mdtext = split_markdown_front_matter(lines.read())
+    data = yaml.safe_load(yamltext)
+
+    affected_packages = list(parse_affected_packages(data.get("fixed_in") or []))
+    reference = ReferenceV2(
+        url=f"https://www.mozilla.org/en-US/security/advisories/{mfsa_id}",
+    )
+    severity = get_severity_from_impact(data.get("impact"), url=reference.url)
+    description = extract_description_from_html(mdtext)
+
+    yield AdvisoryData(
+        advisory_id=mfsa_id,
+        aliases=[],
+        summary=description,
+        affected_packages=affected_packages,
+        references_v2=[reference],
+        severities=[severity],
+        url=advisory_url,
+        date_published=date_parser.parse(data.get("announced")) if data.get("announced") else None,
+        original_advisory_text=json.dumps(data, indent=2, ensure_ascii=False),
+    )
+
+
+def extract_description_from_html(md_text: str) -> str:
+    html = markdown(md_text)
+    soup = BeautifulSoup(html, features="lxml")
+    h3tag = soup.find("h3", string=lambda s: s and s.lower() == "description")
+    if not h3tag:
+        return ""
+
+    description_parts = []
+    for sibling in h3tag.find_next_siblings():
+        if sibling.name != "p":
+            break
+        description_parts.append(sibling.get_text())
+
+    return "\n".join(description_parts).strip()
+
+
+def parse_affected_packages(pkgs: list) -> Iterable[AffectedPackage]:
+    for pkg in pkgs:
+        if not pkg:
+            continue
+
+        name, _, version = pkg.rpartition(" ")
+        if version.count(".") == 3:
+            continue  # invalid SemVer
+        try:
+            fixed_version = SemverVersion(version)
+        except Exception:
+            logger.debug(f"Invalid version '{version}' for package '{name}'")
+            continue
+
+        yield AffectedPackage(
+            package=PackageURL(type="mozilla", name=name),
+            fixed_version=fixed_version,
+        )
+
+
+def get_reference_and_severity(mfsa_id: str, impact: str) -> ReferenceV2:
+    return ReferenceV2(
+        url=f"https://www.mozilla.org/en-US/security/advisories/{mfsa_id}",
+    )
+
+
+def mfsa_id_from_filename(filename: str) -> str | None:
+    match = MFSA_FILENAME_RE.search(filename)
+    return f"mfsa{match.group(1)}" if match else None
+
+
+def get_severity_from_impact(impact: str, url=None) -> VulnerabilitySeverity:
+    """
+    Extracts the severity from the impact string.
+    """
+    impact = (impact or "").lower()
+    if impact == "moderate":
+        impact = "medium"
+    severities = ["critical", "high", "medium", "low", "none"]
+    severity_value = "none"
+
+    for level in severities:
+        if level in impact:
+            severity_value = level
+            break
+
+    return VulnerabilitySeverity(system=GENERIC, value=severity_value, url=url)
diff --git a/vulnerabilities/tests/pipelines/test_mozilla_importer_v2.py b/vulnerabilities/tests/pipelines/test_mozilla_importer_v2.py
@@ -0,0 +1,86 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+import json
+from pathlib import Path
+from textwrap import dedent
+
+from vulnerabilities.pipelines.v2_importers.mozilla_importer import extract_description_from_html
+from vulnerabilities.pipelines.v2_importers.mozilla_importer import get_severity_from_impact
+from vulnerabilities.pipelines.v2_importers.mozilla_importer import mfsa_id_from_filename
+from vulnerabilities.pipelines.v2_importers.mozilla_importer import parse_affected_packages
+from vulnerabilities.pipelines.v2_importers.mozilla_importer import parse_md_advisory
+from vulnerabilities.pipelines.v2_importers.mozilla_importer import parse_yml_advisory
+
+
+def test_mfsa_id_from_filename():
+    assert mfsa_id_from_filename("mfsa2022-01.md") == "mfsa2022-01"
+    assert mfsa_id_from_filename("mfsa2022-099.yml") == "mfsa2022-099"
+    assert mfsa_id_from_filename("notmfsa.txt") is None
+
+
+def test_get_severity_from_impact():
+    assert get_severity_from_impact("Critical").value == "critical"
+    assert get_severity_from_impact("Moderate").value == "medium"
+    assert get_severity_from_impact("Low").value == "low"
+    assert get_severity_from_impact("Random Text").value == "none"
+    assert get_severity_from_impact(None).value == "none"
+
+
+def test_extract_description_from_html():
+    md_text = dedent(
+        """
+        ### Description
+
+        This vulnerability affects Firefox.
+
+        It could allow attackers to execute arbitrary code.
+
+        ### Impact
+
+        Critical
+    """
+    )
+    expected = (
+        "This vulnerability affects Firefox.\nIt could allow attackers to execute arbitrary code."
+    )
+    assert extract_description_from_html(md_text) == expected
+
+
+def test_parse_affected_packages_valid():
+    packages = ["firefox 89.0", "thunderbird 78.10"]
+    result = list(parse_affected_packages(packages))
+    assert len(result) == 2
+    assert result[0].package.name == "firefox"
+    assert str(result[0].fixed_version) == "89.0.0"
+
+
+def test_parse_affected_packages_invalid():
+    packages = ["firefox 89.0.0.1", "invalidpackage"]
+    result = list(parse_affected_packages(packages))
+    assert len(result) == 0  # invalid SemVer or malformed
+
+
+def test_parse_yml_advisory(tmp_path: Path):
+    advisory = {
+        "announced": "2022-01-01",
+        "description": "<p>This is a test</p>",
+        "impact": "High",
+        "fixed_in": ["firefox 89.0"],
+        "advisories": {
+            "CVE-2022-1234": {"description": "<p>Memory safety issue</p>", "impact": "Critical"}
+        },
+    }
+    file = tmp_path / "mfsa2022-01.yml"
+    file.write_text(json.dumps(advisory))
+
+    results = list(
+        parse_yml_advisory("mfsa2022-01", file.open(), advisory_url="https://example.com")
+    )
+    assert len(results) == 1 or len(results) == 2
+    assert all(isinstance(r.summary, str) for r in results)