Skip to content

Commit 4171dbe

Browse files
authored
Migrate the NVD importer to use the 2.0 API schema (#2012)
* Update nvd test data Update NVD Importer v2 to use 2.0 API schema Migrate nvd importer v1 to use 2.0 API schema Signed-off-by: ziad hany <ziadhany2016@gmail.com> * Fix nvd importer to collect severities correctly Add a test Signed-off-by: ziad hany <ziadhany2016@gmail.com> * Add a test for nvd importer v2 Signed-off-by: ziad hany <ziadhany2016@gmail.com> --------- Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent ca6a6bf commit 4171dbe

File tree

12 files changed

+2738
-773
lines changed

12 files changed

+2738
-773
lines changed

vulnerabilities/pipelines/nvd_importer.py

Lines changed: 51 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import json
1212
import logging
1313
from datetime import date
14+
from datetime import timezone
1415
from traceback import format_exc as traceback_format_exc
1516
from typing import Iterable
1617

@@ -94,7 +95,7 @@ def advisories_count(self):
9495
return advisory_count
9596

9697
def collect_advisories(self) -> Iterable[AdvisoryData]:
97-
for _year, cve_data in fetch_cve_data_1_1(logger=self.log):
98+
for _year, cve_data in fetch_cve_data_2_0(logger=self.log):
9899
yield from to_advisories(cve_data=cve_data)
99100

100101

@@ -107,15 +108,15 @@ def fetch(url, logger=None):
107108
return json.loads(data)
108109

109110

110-
def fetch_cve_data_1_1(starting_year=2002, logger=None):
111+
def fetch_cve_data_2_0(starting_year=2002, logger=None):
111112
"""
112113
Yield tuples of (year, lists of CVE mappings) from the NVD, one for each
113114
year since ``starting_year`` defaulting to 2002.
114115
"""
115116
current_year = date.today().year
116117
# NVD json feeds start from 2002.
117118
for year in range(starting_year, current_year + 1):
118-
download_url = f"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{year}.json.gz"
119+
download_url = f"https://nvd.nist.gov/feeds/json/cve/2.0/nvdcve-2.0-{year}.json.gz"
119120
yield year, fetch(url=download_url, logger=logger)
120121

121122

@@ -134,20 +135,22 @@ class CveItem:
134135
cve_item = attr.attrib(default=attr.Factory(dict), type=dict)
135136

136137
@classmethod
137-
def to_advisories(cls, cve_data, skip_hardware=True):
138+
def to_advisories(cls, vulnerabilities, skip_hardware=True):
138139
"""
139140
Yield AdvisoryData objects from ``cve_data`` data for CVE JSON 1.1feed.
140141
Skip hardware
141142
"""
142-
for cve_item in CveItem.from_cve_data(cve_data=cve_data, skip_hardware=skip_hardware):
143+
for cve_item in CveItem.from_cve_data(
144+
cve_data=vulnerabilities, skip_hardware=skip_hardware
145+
):
143146
yield cve_item.to_advisory()
144147

145148
@classmethod
146149
def from_cve_data(cls, cve_data, skip_hardware=True):
147150
"""
148151
Yield CVE items mapping from a cve_data list of CVE mappings from the NVD.
149152
"""
150-
for cve_item in cve_data.get("CVE_Items") or []:
153+
for cve_item in cve_data.get("vulnerabilities") or []:
151154
if not cve_item:
152155
continue
153156
if not isinstance(cve_item, dict):
@@ -159,20 +162,20 @@ def from_cve_data(cls, cve_data, skip_hardware=True):
159162

160163
@property
161164
def cve_id(self):
162-
return self.cve_item["cve"]["CVE_data_meta"]["ID"]
165+
return self.cve_item["cve"]["id"]
163166

164167
@property
165168
def summary(self):
166169
"""
167170
Return a descriptive summary.
168171
"""
169-
# In 99% of cases len(cve_item['cve']['description']['description_data']) == 1 , so
170-
# this usually returns cve_item['cve']['description']['description_data'][0]['value']
172+
# In 99% of cases len(cve_item['cve']['description']) == 1 , so
173+
# this usually returns cve_item['cve']['description'][0]['value']
171174
# In the remaining 1% cases this returns the longest summary.
172-
# FIXME: we should retun the full description WITH the summry as the first line instead
175+
# FIXME: we should return the full description WITH the summary as the first line instead
173176
summaries = []
174-
for desc in get_item(self.cve_item, "cve", "description", "description_data") or []:
175-
if desc.get("value"):
177+
for desc in get_item(self.cve_item, "cve", "descriptions") or []:
178+
if desc.get("value") and desc.get("lang") == "en":
176179
summaries.append(desc["value"])
177180
return max(summaries, key=len) if summaries else None
178181

@@ -183,11 +186,12 @@ def cpes(self):
183186
"""
184187
# FIXME: we completely ignore the configurations here
185188
cpes = []
186-
for node in get_item(self.cve_item, "configurations", "nodes") or []:
187-
for cpe_data in node.get("cpe_match") or []:
188-
cpe23_uri = cpe_data.get("cpe23Uri")
189-
if cpe23_uri and cpe23_uri not in cpes:
190-
cpes.append(cpe23_uri)
189+
for nodes in get_item(self.cve_item, "cve", "configurations") or []:
190+
for node in nodes.get("nodes") or []:
191+
for cpe_data in node.get("cpeMatch") or []:
192+
cpe23_uri = cpe_data.get("criteria")
193+
if cpe23_uri and cpe23_uri not in cpes:
194+
cpes.append(cpe23_uri)
191195
return cpes
192196

193197
@property
@@ -196,43 +200,32 @@ def severities(self):
196200
Return a list of VulnerabilitySeverity for this CVE.
197201
"""
198202
severities = []
199-
impact = self.cve_item.get("impact") or {}
200-
base_metric_v4 = impact.get("baseMetricV4") or {}
201-
if base_metric_v4:
202-
cvss_v4 = base_metric_v4.get("cvssV4") or {}
203-
vs = VulnerabilitySeverity(
204-
system=severity_systems.CVSSV4,
205-
value=str(cvss_v4.get("baseScore") or ""),
206-
scoring_elements=str(cvss_v4.get("vectorString") or ""),
207-
)
208-
severities.append(vs)
209-
210-
base_metric_v3 = impact.get("baseMetricV3") or {}
211-
if base_metric_v3:
212-
cvss_v3 = get_item(base_metric_v3, "cvssV3")
213-
version = cvss_v3.get("version")
214-
system = None
215-
if version == "3.1":
216-
system = severity_systems.CVSSV31
217-
else:
218-
system = severity_systems.CVSSV3
219-
vs = VulnerabilitySeverity(
220-
system=system,
221-
value=str(cvss_v3.get("baseScore") or ""),
222-
scoring_elements=str(cvss_v3.get("vectorString") or ""),
223-
)
224-
severities.append(vs)
225-
226-
base_metric_v2 = impact.get("baseMetricV2") or {}
227-
if base_metric_v2:
228-
cvss_v2 = base_metric_v2.get("cvssV2") or {}
229-
vs = VulnerabilitySeverity(
230-
system=severity_systems.CVSSV2,
231-
value=str(cvss_v2.get("baseScore") or ""),
232-
scoring_elements=str(cvss_v2.get("vectorString") or ""),
233-
)
234-
severities.append(vs)
203+
metrics = get_item(self.cve_item, "cve", "metrics") or {}
204+
url = f"https://nvd.nist.gov/vuln/detail/{self.cve_id}"
205+
metric_configs = [
206+
("cvssMetricV40", severity_systems.CVSSV4),
207+
("cvssMetricV31", severity_systems.CVSSV31),
208+
("cvssMetricV30", severity_systems.CVSSV3),
209+
("cvssMetricV2", severity_systems.CVSSV2),
210+
]
235211

212+
for key, default_system in metric_configs:
213+
items = metrics.get(key) or []
214+
215+
for item in items:
216+
cvss_data = item.get("cvssData") or {}
217+
system = default_system
218+
if key == "cvssMetricV31" and cvss_data.get("version") != "3.1":
219+
system = severity_systems.CVSSV3
220+
221+
severities.append(
222+
VulnerabilitySeverity(
223+
system=system,
224+
value=str(cvss_data.get("baseScore") or ""),
225+
scoring_elements=str(cvss_data.get("vectorString") or ""),
226+
url=url,
227+
)
228+
)
236229
return severities
237230

238231
@property
@@ -243,7 +236,7 @@ def reference_urls(self):
243236
# FIXME: we should also collect additional data from the references such as tags and ids
244237

245238
urls = []
246-
for reference in get_item(self.cve_item, "cve", "references", "reference_data") or []:
239+
for reference in get_item(self.cve_item, "cve", "references") or []:
247240
ref_url = reference.get("url")
248241
if ref_url and ref_url.startswith(("http", "ftp")) and ref_url not in urls:
249242
urls.append(ref_url)
@@ -294,9 +287,7 @@ def weaknesses(self):
294287
Return a list of CWE IDs like: [119, 189]
295288
"""
296289
weaknesses = []
297-
for weaknesses_item in (
298-
get_item(self.cve_item, "cve", "problemtype", "problemtype_data") or []
299-
):
290+
for weaknesses_item in get_item(self.cve_item, "cve", "weaknesses") or []:
300291
weaknesses_description = weaknesses_item.get("description") or []
301292
for weaknesses_value in weaknesses_description:
302293
cwe_id = (
@@ -315,7 +306,9 @@ def to_advisory(self):
315306
aliases=[self.cve_id],
316307
summary=self.summary,
317308
references=self.references,
318-
date_published=dateparser.parse(self.cve_item.get("publishedDate")),
309+
date_published=dateparser.parse(self.cve_item["cve"].get("published")).replace(
310+
tzinfo=timezone.utc
311+
),
319312
weaknesses=self.weaknesses,
320313
url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}",
321314
)

vulnerabilities/pipelines/v2_importers/nvd_importer.py

Lines changed: 44 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import json
1212
import logging
1313
from datetime import date
14+
from datetime import timezone
1415
from traceback import format_exc as traceback_format_exc
1516
from typing import Iterable
1617

@@ -93,7 +94,7 @@ def advisories_count(self):
9394
return advisory_count
9495

9596
def collect_advisories(self) -> Iterable[AdvisoryData]:
96-
for _year, cve_data in fetch_cve_data_1_1(logger=self.log):
97+
for _year, cve_data in fetch_cve_data_2_0(logger=self.log):
9798
yield from to_advisories(cve_data=cve_data)
9899

99100

@@ -111,15 +112,15 @@ def fetch(url, logger=None):
111112
return json.loads(data)
112113

113114

114-
def fetch_cve_data_1_1(starting_year=2002, logger=None):
115+
def fetch_cve_data_2_0(starting_year=2002, logger=None):
115116
"""
116117
Yield tuples of (year, lists of CVE mappings) from the NVD, one for each
117118
year since ``starting_year`` defaulting to 2002.
118119
"""
119120
current_year = date.today().year
120121
# NVD json feeds start from 2002.
121122
for year in range(starting_year, current_year + 1):
122-
download_url = f"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{year}.json.gz"
123+
download_url = f"https://nvd.nist.gov/feeds/json/cve/2.0/nvdcve-2.0-{year}.json.gz"
123124
yield year, fetch(url=download_url, logger=logger)
124125

125126

@@ -151,7 +152,7 @@ def from_cve_data(cls, cve_data, skip_hardware=True):
151152
"""
152153
Yield CVE items mapping from a cve_data list of CVE mappings from the NVD.
153154
"""
154-
for cve_item in cve_data.get("CVE_Items") or []:
155+
for cve_item in cve_data.get("vulnerabilities") or []:
155156
if not cve_item:
156157
continue
157158
if not isinstance(cve_item, dict):
@@ -163,7 +164,7 @@ def from_cve_data(cls, cve_data, skip_hardware=True):
163164

164165
@property
165166
def cve_id(self):
166-
return self.cve_item["cve"]["CVE_data_meta"]["ID"]
167+
return self.cve_item["cve"]["id"]
167168

168169
@property
169170
def summary(self):
@@ -175,8 +176,8 @@ def summary(self):
175176
# In the remaining 1% cases this returns the longest summary.
176177
# FIXME: we should retun the full description WITH the summry as the first line instead
177178
summaries = []
178-
for desc in get_item(self.cve_item, "cve", "description", "description_data") or []:
179-
if desc.get("value"):
179+
for desc in get_item(self.cve_item, "cve", "descriptions") or []:
180+
if desc.get("value") and desc.get("lang") == "en":
180181
summaries.append(desc["value"])
181182
return max(summaries, key=len) if summaries else None
182183

@@ -187,11 +188,12 @@ def cpes(self):
187188
"""
188189
# FIXME: we completely ignore the configurations here
189190
cpes = []
190-
for node in get_item(self.cve_item, "configurations", "nodes") or []:
191-
for cpe_data in node.get("cpe_match") or []:
192-
cpe23_uri = cpe_data.get("cpe23Uri")
193-
if cpe23_uri and cpe23_uri not in cpes:
194-
cpes.append(cpe23_uri)
191+
for nodes in get_item(self.cve_item, "cve", "configurations") or []:
192+
for node in nodes.get("nodes") or []:
193+
for cpe_data in node.get("cpeMatch") or []:
194+
cpe23_uri = cpe_data.get("criteria")
195+
if cpe23_uri and cpe23_uri not in cpes:
196+
cpes.append(cpe23_uri)
195197
return cpes
196198

197199
@property
@@ -200,46 +202,32 @@ def severities(self):
200202
Return a list of VulnerabilitySeverity for this CVE.
201203
"""
202204
severities = []
203-
impact = self.cve_item.get("impact") or {}
204-
base_metric_v4 = impact.get("baseMetricV4") or {}
205-
if base_metric_v4:
206-
cvss_v4 = base_metric_v4.get("cvssV4") or {}
207-
vs = VulnerabilitySeverity(
208-
system=severity_systems.CVSSV4,
209-
value=str(cvss_v4.get("baseScore") or ""),
210-
scoring_elements=str(cvss_v4.get("vectorString") or ""),
211-
url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}",
212-
)
213-
severities.append(vs)
214-
215-
base_metric_v3 = impact.get("baseMetricV3") or {}
216-
if base_metric_v3:
217-
cvss_v3 = get_item(base_metric_v3, "cvssV3")
218-
version = cvss_v3.get("version")
219-
system = None
220-
if version == "3.1":
221-
system = severity_systems.CVSSV31
222-
else:
223-
system = severity_systems.CVSSV3
224-
vs = VulnerabilitySeverity(
225-
system=system,
226-
value=str(cvss_v3.get("baseScore") or ""),
227-
scoring_elements=str(cvss_v3.get("vectorString") or ""),
228-
url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}",
229-
)
230-
severities.append(vs)
231-
232-
base_metric_v2 = impact.get("baseMetricV2") or {}
233-
if base_metric_v2:
234-
cvss_v2 = base_metric_v2.get("cvssV2") or {}
235-
vs = VulnerabilitySeverity(
236-
system=severity_systems.CVSSV2,
237-
value=str(cvss_v2.get("baseScore") or ""),
238-
scoring_elements=str(cvss_v2.get("vectorString") or ""),
239-
url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}",
240-
)
241-
severities.append(vs)
205+
metrics = get_item(self.cve_item, "cve", "metrics") or {}
206+
url = f"https://nvd.nist.gov/vuln/detail/{self.cve_id}"
207+
metric_configs = [
208+
("cvssMetricV40", severity_systems.CVSSV4),
209+
("cvssMetricV31", severity_systems.CVSSV31),
210+
("cvssMetricV30", severity_systems.CVSSV3),
211+
("cvssMetricV2", severity_systems.CVSSV2),
212+
]
242213

214+
for key, default_system in metric_configs:
215+
items = metrics.get(key) or []
216+
217+
for item in items:
218+
cvss_data = item.get("cvssData") or {}
219+
system = default_system
220+
if key == "cvssMetricV31" and cvss_data.get("version") != "3.1":
221+
system = severity_systems.CVSSV3
222+
223+
severities.append(
224+
VulnerabilitySeverity(
225+
system=system,
226+
value=str(cvss_data.get("baseScore") or ""),
227+
scoring_elements=str(cvss_data.get("vectorString") or ""),
228+
url=url,
229+
)
230+
)
243231
return severities
244232

245233
@property
@@ -250,7 +238,7 @@ def reference_urls(self):
250238
# FIXME: we should also collect additional data from the references such as tags and ids
251239

252240
urls = []
253-
for reference in get_item(self.cve_item, "cve", "references", "reference_data") or []:
241+
for reference in get_item(self.cve_item, "cve", "references") or []:
254242
ref_url = reference.get("url")
255243
if ref_url and ref_url.startswith(("http", "ftp")) and ref_url not in urls:
256244
urls.append(ref_url)
@@ -300,9 +288,7 @@ def weaknesses(self):
300288
Return a list of CWE IDs like: [119, 189]
301289
"""
302290
weaknesses = []
303-
for weaknesses_item in (
304-
get_item(self.cve_item, "cve", "problemtype", "problemtype_data") or []
305-
):
291+
for weaknesses_item in get_item(self.cve_item, "cve", "weaknesses") or []:
306292
weaknesses_description = weaknesses_item.get("description") or []
307293
for weaknesses_value in weaknesses_description:
308294
cwe_id = (
@@ -322,7 +308,9 @@ def to_advisory(self):
322308
aliases=[],
323309
summary=self.summary,
324310
references_v2=self.references,
325-
date_published=dateparser.parse(self.cve_item.get("publishedDate")),
311+
date_published=dateparser.parse(self.cve_item["cve"].get("published")).replace(
312+
tzinfo=timezone.utc
313+
),
326314
weaknesses=self.weaknesses,
327315
severities=self.severities,
328316
url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}",

0 commit comments

Comments
 (0)