Skip to content

Commit 33c24d4

Browse files
committed
Initial Fireeye importer migration to Advisory V2
Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent 973ee5c commit 33c24d4

File tree

2 files changed

+192
-0
lines changed

2 files changed

+192
-0
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from vulnerabilities.pipelines.v2_importers import (
4949
elixir_security_importer as elixir_security_importer_v2,
5050
)
51+
from vulnerabilities.pipelines.v2_importers import fireeye_importer_v2
5152
from vulnerabilities.pipelines.v2_importers import github_osv_importer as github_osv_importer_v2
5253
from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2
5354
from vulnerabilities.pipelines.v2_importers import istio_importer as istio_importer_v2
@@ -91,6 +92,7 @@
9192
npm_importer.NpmImporterPipeline,
9293
nginx_importer.NginxImporterPipeline,
9394
pysec_importer.PyPIImporterPipeline,
95+
fireeye_importer_v2.FireeyeImporterPipeline,
9496
apache_tomcat.ApacheTomcatImporter,
9597
postgresql.PostgreSQLImporter,
9698
debian.DebianImporter,
Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
import logging
10+
import re
11+
from pathlib import Path
12+
from typing import Iterable
13+
from typing import List
14+
15+
from fetchcode.vcs import fetch_via_vcs
16+
17+
from vulnerabilities.importer import AdvisoryData
18+
from vulnerabilities.importer import ReferenceV2
19+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
20+
from vulnerabilities.utils import build_description
21+
from vulnerabilities.utils import create_weaknesses_list
22+
from vulnerabilities.utils import cwe_regex
23+
from vulnerabilities.utils import dedupe
24+
25+
logger = logging.getLogger(__name__)
26+
27+
28+
class FireeyeImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
29+
spdx_license_expression = "CC-BY-SA-4.0 AND MIT"
30+
license_url = "https://github.com/mandiant/Vulnerability-Disclosures/blob/master/README.md"
31+
notice = """
32+
Copyright (c) Mandiant
33+
The following licenses/licensing apply to this Mandiant repository:
34+
1. CC BY-SA 4.0 - For CVE related information not including source code (such as PoCs)
35+
2. MIT - For source code contained within provided CVE information
36+
"""
37+
repo_url = "git+https://github.com/mandiant/Vulnerability-Disclosures"
38+
pipeline_id = "fireeye_importer_v2"
39+
40+
@classmethod
41+
def steps(cls):
42+
return (
43+
cls.clone,
44+
cls.collect_and_store_advisories,
45+
cls.clean_downloads,
46+
)
47+
48+
def advisories_count(self):
49+
files = filter(
50+
lambda p: p.suffix in [".md", ".MD"], Path(self.vcs_response.dest_dir).glob("**/*")
51+
)
52+
return len(list(files))
53+
54+
def clone(self):
55+
self.log(f"Cloning `{self.repo_url}`")
56+
self.vcs_response = fetch_via_vcs(self.repo_url)
57+
58+
def collect_advisories(self) -> Iterable[AdvisoryData]:
59+
base_path = Path(self.vcs_response.dest_dir)
60+
files = filter(
61+
lambda p: p.suffix in [".md", ".MD"], Path(self.vcs_response.dest_dir).glob("**/*")
62+
)
63+
for file in files:
64+
if Path(file).stem == "README":
65+
continue
66+
try:
67+
with open(file, encoding="utf-8-sig") as f:
68+
yield parse_advisory_data(raw_data=f.read(), file=file, base_path=base_path)
69+
except UnicodeError:
70+
logger.error(f"Invalid file {file}")
71+
72+
def clean_downloads(self):
73+
if self.vcs_response:
74+
self.log(f"Removing cloned repository")
75+
self.vcs_response.delete()
76+
77+
def on_failure(self):
78+
self.clean_downloads()
79+
80+
81+
def parse_advisory_data(raw_data, file, base_path) -> AdvisoryData:
82+
"""
83+
Parse a fireeye advisory repo and return an AdvisoryData or None.
84+
These files are in Markdown format.
85+
"""
86+
relative_path = str(file.relative_to(base_path)).strip("/")
87+
advisory_url = (
88+
f"https://github.com/mandiant/Vulnerability-Disclosures/blob/master/{relative_path}"
89+
)
90+
raw_data = raw_data.replace("\n\n", "\n")
91+
md_list = raw_data.split("\n")
92+
md_dict = md_list_to_dict(md_list)
93+
94+
database_id = md_list[0][1::]
95+
summary = md_dict.get(database_id[1::]) or []
96+
description = md_dict.get("## Description") or []
97+
impact = md_dict.get("## Impact") # not used but can be used to get severity
98+
exploit_ability = md_dict.get("## Exploitability") # not used
99+
cve_ref = md_dict.get("## CVE Reference") or []
100+
tech_details = md_dict.get("## Technical Details") # not used
101+
resolution = md_dict.get("## Resolution") # not used
102+
disc_credits = md_dict.get("## Discovery Credits") # not used
103+
disc_timeline = md_dict.get("## Disclosure Timeline") # not used
104+
references = md_dict.get("## References") or []
105+
cwe_data = md_dict.get("## Common Weakness Enumeration") or []
106+
107+
return AdvisoryData(
108+
advisory_id=base_path.stem,
109+
aliases=get_aliases(database_id, cve_ref),
110+
summary=build_description(" ".join(summary), " ".join(description)),
111+
references_v2=get_references(references),
112+
weaknesses=get_weaknesses(cwe_data),
113+
url=advisory_url,
114+
)
115+
116+
117+
def get_references(references):
118+
"""
119+
Return a list of Reference from a list of URL reference in md format
120+
>>> get_references(["- http://1-4a.com/cgi-bin/alienform/af.cgi"])
121+
[ReferenceV2(reference_id='', reference_type='', url='http://1-4a.com/cgi-bin/alienform/af.cgi')]
122+
>>> get_references(["- [Mitre CVE-2021-42712](https://www.cve.org/CVERecord?id=CVE-2021-42712)"])
123+
[ReferenceV2(reference_id='', reference_type='', url='https://www.cve.org/CVERecord?id=CVE-2021-42712')]
124+
"""
125+
urls = []
126+
for ref in references:
127+
if ref.startswith("- "):
128+
urls.append(matcher_url(ref[2::]))
129+
else:
130+
urls.append(matcher_url(ref))
131+
132+
return [ReferenceV2(url=url) for url in urls if url]
133+
134+
135+
def matcher_url(ref) -> str:
136+
"""
137+
Returns URL of the reference markup from reference url in Markdown format
138+
"""
139+
markup_regex = "\[([^\[]+)]\(\s*(http[s]?://.+)\s*\)"
140+
matched_markup = re.findall(markup_regex, ref)
141+
if matched_markup:
142+
return matched_markup[0][1]
143+
else:
144+
return ref
145+
146+
147+
def get_aliases(database_id, cve_ref) -> List:
148+
"""
149+
Returns a List of Aliases from a database_id and a list of CVEs
150+
>>> get_aliases("MNDT-2021-0012", ["CVE-2021-44207"])
151+
['CVE-2021-44207', 'MNDT-2021-0012']
152+
"""
153+
cve_ref.append(database_id)
154+
return dedupe(cve_ref)
155+
156+
157+
def md_list_to_dict(md_list):
158+
"""
159+
Returns a dictionary of md_list from a list of a md file splited by \n
160+
>>> md_list_to_dict(["# Header","hello" , "hello again" ,"# Header2"])
161+
{'# Header': ['hello', 'hello again'], '# Header2': []}
162+
"""
163+
md_dict = {}
164+
md_key = ""
165+
for md_line in md_list:
166+
if md_line.startswith("#"):
167+
md_dict[md_line] = []
168+
md_key = md_line
169+
else:
170+
md_dict[md_key].append(md_line)
171+
return md_dict
172+
173+
174+
def get_weaknesses(cwe_data):
175+
"""
176+
Return the list of CWE IDs as integers from a list of weakness summaries, e.g., [379].
177+
178+
>>> get_weaknesses([
179+
... "CWE-379: Creation of Temporary File in Directory with Insecure Permissions",
180+
... "CWE-362: Concurrent Execution using Shared Resource with Improper Synchronization ('Race Condition')"
181+
... ])
182+
[379, 362]
183+
"""
184+
cwe_list = []
185+
for line in cwe_data:
186+
cwe_ids = re.findall(cwe_regex, line)
187+
cwe_list.extend(cwe_ids)
188+
189+
weaknesses = create_weaknesses_list(cwe_list)
190+
return weaknesses

0 commit comments

Comments
 (0)