Skip to content

Commit 1619924

Browse files
committed
wip
Signed-off-by: Keshav Priyadarshi <git@keshav.space>
1 parent 96a52e8 commit 1619924

File tree

5 files changed

+1981
-0
lines changed

5 files changed

+1981
-0
lines changed
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
from datetime import timezone
12+
from traceback import format_exc as traceback_format_exc
13+
from typing import Iterable
14+
15+
import requests
16+
from bs4 import BeautifulSoup
17+
from dateutil.parser import parse
18+
from packageurl import PackageURL
19+
from univers.version_range import ApacheVersionRange
20+
21+
from vulnerabilities.importer import AdvisoryData
22+
from vulnerabilities.importer import AffectedPackageV2
23+
from vulnerabilities.importer import ReferenceV2
24+
from vulnerabilities.models import AdvisoryReference
25+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
26+
from vulnerabilities.pipes.apache_kafka import get_original_advisory
27+
from vulnerabilities.pipes.apache_kafka import parse_range
28+
from vulnerabilities.pipes.apache_kafka import parse_summary
29+
from vulnerabilities.utils import build_description
30+
31+
32+
class ApacheKafkaImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
33+
"""Import Apache Kafka Advisories"""
34+
35+
pipeline_id = "apache_kafka_importer_v2"
36+
spdx_license_expression = "Apache-2.0"
37+
importer_name = "Apache Kafka Importer V2"
38+
39+
license_url = "https://www.apache.org/licenses/"
40+
url = "https://kafka.apache.org/community/cve-list/"
41+
42+
cve_without_affected_fixed_range = [
43+
"CVE-2022-23302",
44+
"CVE-2022-23305",
45+
"CVE-2022-23307",
46+
"CVE-2021-45046",
47+
"CVE-2021-44228",
48+
"CVE-2021-4104",
49+
]
50+
51+
@classmethod
52+
def steps(cls):
53+
return (
54+
cls.fetch,
55+
cls.collect_and_store_advisories,
56+
)
57+
58+
def fetch(self):
59+
self.log(f"Fetch `{self.url}`")
60+
self.advisory_data = requests.get(self.url).text
61+
self.soup = BeautifulSoup(self.advisory_data, features="lxml")
62+
63+
def advisories_count(self):
64+
return sum(1 for _ in self.soup.find(class_="td-content").find_all("table"))
65+
66+
def collect_advisories(self) -> Iterable[AdvisoryData]:
67+
for table in self.soup.find(class_="td-content").find_all("table"):
68+
yield self.to_advisory_data(table)
69+
70+
def to_advisory_data(self, table) -> Iterable[AdvisoryData]:
71+
affected_constraints = None
72+
fixed_constraints = None
73+
affected_packages = []
74+
references = []
75+
76+
cve_h2 = table.find_previous("h2")
77+
refrence_a = cve_h2.find("a") or {}
78+
title = cve_h2.text
79+
ref_url = refrence_a.get("href")
80+
cve = cve_h2.get("id")
81+
82+
raw_affected = table.find(text="Versions affected").find_next("p").text
83+
raw_fixed = table.find(text="Fixed versions").find_next("p").text
84+
raw_date = table.find(text="Issue announced").find_next("p").text
85+
date_published = parse(raw_date).replace(tzinfo=timezone.utc)
86+
87+
description = parse_summary(cve_h2, table)
88+
original_advisory = get_original_advisory(cve_h2, table)
89+
90+
if cve not in self.cve_without_affected_fixed_range:
91+
affected_constraints = parse_range(raw_affected)
92+
fixed_constraints = parse_range(raw_fixed)
93+
94+
try:
95+
fixed_version_range = (
96+
ApacheVersionRange(constraints=fixed_constraints) if fixed_constraints else None
97+
)
98+
99+
affected_version_range = (
100+
ApacheVersionRange(constraints=affected_constraints)
101+
if affected_constraints
102+
else None
103+
)
104+
except Exception as e:
105+
self.log(
106+
f"Failed to parse Kafka range for: {cve} with error {e!r}:\n{traceback_format_exc()}",
107+
level=logging.ERROR,
108+
)
109+
110+
if affected_version_range or fixed_version_range:
111+
affected_packages.append(
112+
AffectedPackageV2(
113+
package=PackageURL(type="apache", name="kafka"),
114+
affected_version_range=affected_version_range,
115+
fixed_version_range=fixed_version_range,
116+
)
117+
)
118+
119+
references.append(
120+
ReferenceV2(
121+
reference_id=cve,
122+
reference_type=AdvisoryReference.OTHER,
123+
url=ref_url,
124+
)
125+
)
126+
127+
return AdvisoryData(
128+
advisory_id=cve,
129+
aliases=[],
130+
summary=build_description(summary=title, description=description),
131+
date_published=date_published,
132+
affected_packages=affected_packages,
133+
references_v2=references,
134+
url=f"{self.url}#{cve}",
135+
original_advisory_text=original_advisory,
136+
)
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
11+
from univers.version_constraint import VersionConstraint
12+
from univers.versions import SemverVersion
13+
14+
15+
def get_original_advisory(cve_h2, table):
16+
adv_segment = [str(cve_h2)]
17+
18+
for el in cve_h2.next_elements:
19+
if getattr(el, "name"):
20+
adv_segment.append(str(el))
21+
if el == table:
22+
break
23+
24+
return "".join(adv_segment)
25+
26+
27+
def parse_summary(cve_h2, table):
28+
summary = ""
29+
for el in cve_h2.next_elements:
30+
if el == table:
31+
break
32+
if getattr(el, "name") == "p":
33+
summary += f"{el.text} "
34+
35+
return summary
36+
37+
38+
def parse_range(raw_range):
39+
if ":" in raw_range:
40+
raw_range = raw_range.partition(":")[-1]
41+
42+
raw_range = raw_range.replace("to", "-")
43+
raw_range = raw_range.replace("and", "").replace("later", "")
44+
raw_range = raw_range.strip()
45+
parsed_range = []
46+
for range in raw_range.split(","):
47+
range = range.strip()
48+
if not range:
49+
continue
50+
if "-" not in range:
51+
parsed_range.append(
52+
VersionConstraint(
53+
comparator="=",
54+
version=SemverVersion(range),
55+
)
56+
)
57+
continue
58+
59+
lhs, rhs = range.split("-")
60+
parsed_range.append(
61+
VersionConstraint(
62+
comparator=">=",
63+
version=SemverVersion(lhs.strip()),
64+
)
65+
)
66+
parsed_range.append(
67+
VersionConstraint(
68+
comparator="<=",
69+
version=SemverVersion(rhs.strip()),
70+
)
71+
)
72+
73+
return parsed_range
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
11+
from pathlib import Path
12+
from unittest.mock import patch
13+
14+
from bs4 import BeautifulSoup
15+
from django.test import TestCase
16+
17+
from vulnerabilities.models import AdvisoryV2
18+
from vulnerabilities.pipelines.v2_importers.apache_kafka_importer import ApacheKafkaImporterPipeline
19+
from vulnerabilities.tests import util_tests
20+
from vulnerabilities.tests.pipelines import TestLogger
21+
22+
TEST_DATA = Path(__file__).parent.parent.parent / "test_data" / "apache_kafka"
23+
24+
25+
class TestApacheKafkaImporterPipeline(TestCase):
26+
def setUp(self):
27+
self.logger = TestLogger()
28+
29+
@patch(
30+
"vulnerabilities.pipelines.v2_importers.apache_kafka_importer.ApacheKafkaImporterPipeline.fetch"
31+
)
32+
def test_redhat_advisories_v2(self, mock_fetch):
33+
mock_fetch.__name__ = "fetch"
34+
cve_list = TEST_DATA / "cve-list-2026_01_23.html"
35+
advisory_data = open(cve_list).read()
36+
37+
pipeline = ApacheKafkaImporterPipeline()
38+
pipeline.soup = BeautifulSoup(advisory_data, features="lxml")
39+
pipeline.log = self.logger.write
40+
pipeline.execute()
41+
42+
expected_file = TEST_DATA / "cve-list-2026_01_23-expected.json"
43+
result = [adv.to_advisory_data().to_dict() for adv in AdvisoryV2.objects.all()]
44+
util_tests.check_results_against_json(result, expected_file)

0 commit comments

Comments
 (0)