Skip to content

Commit 5eaccf8

Browse files
committed
Parse Linux kernel text to extract fixed versions and avoid duplicate advisory_id
Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent 57f23f2 commit 5eaccf8

File tree

3 files changed

+193
-2086
lines changed

3 files changed

+193
-2086
lines changed

vulnerabilities/pipelines/v2_importers/linux_kernel_importer.py

Lines changed: 63 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@
66
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
9+
from collections import defaultdict
910
from pathlib import Path
1011

1112
from fetchcode.vcs import fetch_via_vcs
13+
from univers.version_range import GenericVersionRange
1214

1315
from vulnerabilities.importer import AdvisoryData
1416
from vulnerabilities.importer import AffectedPackageV2
@@ -19,7 +21,6 @@
1921
from vulnerabilities.pipes.advisory import classify_patch_source
2022
from vulnerabilities.utils import commit_regex
2123
from vulnerabilities.utils import cve_regex
22-
from vulnerabilities.utils import get_advisory_url
2324
from vulnerabilities.utils import is_commit
2425

2526

@@ -31,13 +32,13 @@ class LinuxKernelPipeline(VulnerableCodeBaseImporterPipelineV2):
3132
pipeline_id = "linux_kernel_cves_fix_commits"
3233
spdx_license_expression = "Apache-2.0"
3334
license_url = "https://github.com/nluedtke/linux_kernel_cves/blob/master/LICENSE"
34-
importer_name = "linux_kernel_cves_fix_commits"
35-
qualified_name = "linux_kernel_cves_fix_commits"
35+
run_once = True
3636

3737
@classmethod
3838
def steps(cls):
3939
return (
4040
cls.clone,
41+
cls.extract_kernel_cve_fix_commits,
4142
cls.collect_and_store_advisories,
4243
cls.clean_downloads,
4344
)
@@ -51,60 +52,76 @@ def clone(self):
5152
self.log(f"Cloning `{self.repo_url}`")
5253
self.vcs_response = fetch_via_vcs(self.repo_url)
5354

54-
def collect_advisories(self):
55+
def extract_kernel_cve_fix_commits(self):
5556
self.log(f"Processing linux kernel fix commits.")
5657
base_path = Path(self.vcs_response.dest_dir) / "data"
58+
5759
for file_path in base_path.rglob("*.txt"):
5860
if "_CVEs.txt" in file_path.name:
5961
continue
6062

6163
if "_security.txt" in file_path.name:
62-
patches = []
63-
affected_packages = []
64-
references = []
65-
for vulnerability_id, commit_hash in self.parse_commits_file(file_path):
66-
patch_url = f"https://github.com/torvalds/linux/commit/{commit_hash}"
67-
if not commit_hash:
68-
continue
69-
70-
base_purl, patch_objs = classify_patch_source(
71-
url=patch_url,
72-
commit_hash=commit_hash,
73-
patch_text=None,
74-
)
75-
76-
for patch_obj in patch_objs:
77-
if isinstance(patch_obj, PackageCommitPatchData):
78-
fixed_commit = patch_obj
79-
affected_package = AffectedPackageV2(
80-
package=base_purl,
81-
fixed_by_commit_patches=[fixed_commit],
82-
)
83-
affected_packages.append(affected_package)
84-
elif isinstance(patch_obj, PatchData):
85-
patches.append(patch_obj)
86-
elif isinstance(patch_obj, ReferenceV2):
87-
references.append(patch_obj)
88-
89-
advisory_url = get_advisory_url(
90-
file=file_path,
91-
base_path=self.vcs_response.dest_dir,
92-
url="https://github.com/nluedtke/linux_kernel_cves/blob/master/",
93-
)
94-
95-
yield AdvisoryData(
96-
advisory_id=vulnerability_id,
97-
references_v2=references,
98-
affected_packages=affected_packages,
99-
patches=patches,
100-
url=advisory_url,
101-
)
64+
self.parse_commits_file(file_path)
65+
66+
def collect_advisories(self):
67+
for (
68+
vulnerability_id,
69+
fixed_versions_commits,
70+
) in self.cve_to_fixed_versions_and_commits.items():
71+
references = []
72+
patches = []
73+
affected_packages = []
74+
75+
for fixed_version, commit_hash in fixed_versions_commits:
76+
patch_url = f"https://github.com/torvalds/linux/commit/{commit_hash}"
77+
if not commit_hash:
78+
continue
79+
80+
base_purl, patch_objs = classify_patch_source(
81+
url=patch_url,
82+
commit_hash=commit_hash,
83+
patch_text=None,
84+
)
85+
86+
for patch_obj in patch_objs:
87+
fixed_version_range = GenericVersionRange.from_versions([fixed_version])
88+
if isinstance(patch_obj, PackageCommitPatchData):
89+
fixed_commit = patch_obj
90+
affected_package = AffectedPackageV2(
91+
package=base_purl,
92+
fixed_by_commit_patches=[fixed_commit],
93+
fixed_version_range=fixed_version_range,
94+
)
95+
affected_packages.append(affected_package)
96+
elif isinstance(patch_obj, PatchData):
97+
patches.append(patch_obj)
98+
elif isinstance(patch_obj, ReferenceV2):
99+
references.append(patch_obj)
100+
101+
yield AdvisoryData(
102+
advisory_id=vulnerability_id,
103+
references_v2=references,
104+
affected_packages=affected_packages,
105+
patches=patches,
106+
url="https://github.com/nluedtke/linux_kernel_cves",
107+
)
102108

103109
def parse_commits_file(self, file_path):
104110
"""Extract CVE-ID and commit hashes from a text file"""
111+
self.cve_to_fixed_versions_and_commits = defaultdict(set)
112+
fixed_version = None
105113
with open(file_path, "r", encoding="utf-8") as f:
106114
for line in f:
107-
parts = line.strip().split(":", 2)
115+
line = line.strip()
116+
117+
if not line:
118+
continue
119+
120+
if line.startswith("CVEs fixed in"):
121+
fixed_version = line.replace("CVEs fixed in", "").strip().rstrip(":")
122+
continue
123+
124+
parts = line.split(":", 2)
108125

109126
if len(parts) < 2:
110127
continue
@@ -124,7 +141,7 @@ def parse_commits_file(self, file_path):
124141
if not commit_hash or not is_commit(commit_hash):
125142
continue
126143

127-
yield cve, commit_hash
144+
self.cve_to_fixed_versions_and_commits[cve].add((fixed_version, commit_hash))
128145

129146
def clean_downloads(self):
130147
"""Cleanup any temporary repository data."""

vulnerabilities/tests/pipelines/v2_importers/test_linux_pipelines_v2.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,6 @@ def test_linux_kernel_advisories():
2424
expected_file = os.path.join(TEST_DATA, "expected-linux-kernel-advisory.json")
2525
pipeline = LinuxKernelPipeline()
2626
pipeline.vcs_response = Mock(dest_dir=TEST_DATA)
27+
pipeline.extract_kernel_cve_fix_commits()
2728
result = [adv.to_dict() for adv in pipeline.collect_advisories()]
2829
util_tests.check_results_against_json(result, expected_file)

0 commit comments

Comments
 (0)