Skip to content

Commit 546d2d9

Browse files
committed
Add tests and address review comments
Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
1 parent 0f6ab4d commit 546d2d9

File tree

4 files changed

+271
-11
lines changed

4 files changed

+271
-11
lines changed

vulnerabilities/importer.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
logger = logging.getLogger(__name__)
4747

4848

49-
@dataclasses.dataclass(order=True)
49+
@dataclasses.dataclass(frozen=True)
5050
class VulnerabilitySeverity:
5151
# FIXME: this should be named scoring_system, like in the model
5252
system: ScoringSystem
@@ -65,6 +65,16 @@ def to_dict(self):
6565
**published_at_dict,
6666
}
6767

68+
def __eq__(self, other):
69+
if not isinstance(other, VulnerabilitySeverity):
70+
return NotImplemented
71+
return str(self.to_dict()) == str(other.to_dict())
72+
73+
def __lt__(self, other):
74+
if not isinstance(other, VulnerabilitySeverity):
75+
return NotImplemented
76+
return str(self.to_dict()) < str(other.to_dict())
77+
6878
@classmethod
6979
def from_dict(cls, severity: dict):
7080
"""
@@ -79,7 +89,7 @@ def from_dict(cls, severity: dict):
7989
)
8090

8191

82-
@dataclasses.dataclass(order=True)
92+
@dataclasses.dataclass(frozen=True)
8393
class Reference:
8494
reference_id: str = ""
8595
reference_type: str = ""
@@ -99,6 +109,16 @@ def normalized(self):
99109
reference_type=self.reference_type,
100110
)
101111

112+
def __eq__(self, other):
113+
if not isinstance(other, Reference):
114+
return NotImplemented
115+
return str(self.to_dict()) == str(other.to_dict())
116+
117+
def __lt__(self, other):
118+
if not isinstance(other, Reference):
119+
return NotImplemented
120+
return str(self.to_dict()) < str(other.to_dict())
121+
102122
def to_dict(self):
103123
return {
104124
"reference_id": self.reference_id,
@@ -140,7 +160,7 @@ class NoAffectedPackages(Exception):
140160
"""
141161

142162

143-
@dataclasses.dataclass(order=True, frozen=True)
163+
@dataclasses.dataclass(frozen=True)
144164
class AffectedPackage:
145165
"""
146166
Relate a Package URL with a range of affected versions and a fixed version.
@@ -170,6 +190,16 @@ def get_fixed_purl(self):
170190
raise ValueError(f"Affected Package {self.package!r} does not have a fixed version")
171191
return update_purl_version(purl=self.package, version=str(self.fixed_version))
172192

193+
def __eq__(self, other):
194+
if not isinstance(other, AffectedPackage):
195+
return NotImplemented
196+
return str(self.to_dict()) == str(other.to_dict())
197+
198+
def __lt__(self, other):
199+
if not isinstance(other, AffectedPackage):
200+
return NotImplemented
201+
return str(self.to_dict()) < str(other.to_dict())
202+
173203
@classmethod
174204
def merge(
175205
cls, affected_packages: Iterable
@@ -274,6 +304,7 @@ class AdvisoryData:
274304
date_published: Optional[datetime.datetime] = None
275305
weaknesses: List[int] = dataclasses.field(default_factory=list)
276306
url: Optional[str] = None
307+
created_by: Optional[str] = None
277308

278309
def __post_init__(self):
279310
if self.date_published and not self.date_published.tzinfo:

vulnerabilities/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1389,6 +1389,7 @@ def to_advisory_data(self) -> "AdvisoryData":
13891389
date_published=self.date_published,
13901390
weaknesses=self.weaknesses,
13911391
url=self.url,
1392+
created_by=self.created_by
13921393
)
13931394

13941395

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
import datetime
2+
import pytz
3+
from unittest import TestCase
4+
5+
from packageurl import PackageURL
6+
from univers.version_range import VersionRange
7+
8+
from vulnerabilities.importer import AdvisoryData, AffectedPackage, Reference, VulnerabilitySeverity
9+
from vulnerabilities.severity_systems import SCORING_SYSTEMS
10+
from vulnerabilities.utils import compute_content_id
11+
12+
13+
class TestComputeContentId(TestCase):
14+
def setUp(self):
15+
self.maxDiff = None
16+
self.base_advisory = AdvisoryData(
17+
summary="Test summary",
18+
affected_packages=[
19+
AffectedPackage(
20+
package=PackageURL(
21+
type="npm",
22+
name="package1",
23+
qualifiers={},
24+
),
25+
affected_version_range=VersionRange.from_string("vers:npm/>=1.0.0|<2.0.0"),
26+
)
27+
],
28+
references=[
29+
Reference(
30+
url="https://example.com/vuln1",
31+
reference_id="GHSA-1234-5678-9012",
32+
severities=[
33+
VulnerabilitySeverity(
34+
system=SCORING_SYSTEMS["cvssv3.1"],
35+
value="7.5",
36+
)
37+
],
38+
)
39+
],
40+
date_published=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC),
41+
)
42+
43+
def test_same_content_different_order_same_id(self):
44+
"""
45+
Test that advisories with same content but different ordering have same content ID
46+
"""
47+
advisory1 = self.base_advisory
48+
49+
# Same content but different order of references and affected packages
50+
advisory2 = AdvisoryData(
51+
summary="Test summary",
52+
affected_packages=list(reversed(self.base_advisory.affected_packages)),
53+
references=list(reversed(self.base_advisory.references)),
54+
date_published=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC),
55+
)
56+
57+
self.assertEqual(
58+
compute_content_id(advisory1),
59+
compute_content_id(advisory2),
60+
)
61+
62+
def test_different_metadata_same_content_same_id(self):
63+
"""
64+
Test that advisories with same content but different metadata have same content ID
65+
when include_metadata=False
66+
"""
67+
advisory1 = self.base_advisory
68+
69+
advisory2 = AdvisoryData(
70+
summary="Test summary",
71+
affected_packages=self.base_advisory.affected_packages,
72+
references=self.base_advisory.references,
73+
date_published=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC),
74+
created_by="different_importer",
75+
url="https://different.url",
76+
)
77+
78+
self.assertEqual(
79+
compute_content_id(advisory1),
80+
compute_content_id(advisory2),
81+
)
82+
83+
def test_different_metadata_different_id_when_included(self):
84+
"""
85+
Test that advisories with same content but different metadata have different content IDs
86+
when include_metadata=True
87+
"""
88+
advisory1 = self.base_advisory
89+
90+
advisory2 = AdvisoryData(
91+
summary="Test summary",
92+
affected_packages=self.base_advisory.affected_packages,
93+
references=self.base_advisory.references,
94+
date_published=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC),
95+
created_by="different_importer",
96+
url="https://different.url",
97+
)
98+
99+
self.assertNotEqual(
100+
compute_content_id(advisory1, include_metadata=True),
101+
compute_content_id(advisory2, include_metadata=True),
102+
)
103+
104+
def test_different_summary_different_id(self):
105+
"""
106+
Test that advisories with different summaries have different content IDs
107+
"""
108+
advisory1 = self.base_advisory
109+
110+
advisory2 = AdvisoryData(
111+
summary="Different summary",
112+
affected_packages=self.base_advisory.affected_packages,
113+
references=self.base_advisory.references,
114+
date_published=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC),
115+
)
116+
117+
self.assertNotEqual(
118+
compute_content_id(advisory1),
119+
compute_content_id(advisory2),
120+
)
121+
122+
def test_different_affected_packages_different_id(self):
123+
"""
124+
Test that advisories with different affected packages have different content IDs
125+
"""
126+
advisory1 = self.base_advisory
127+
128+
advisory2 = AdvisoryData(
129+
summary="Test summary",
130+
affected_packages=[
131+
AffectedPackage(
132+
package=PackageURL(
133+
type="npm",
134+
name="different-package",
135+
qualifiers={},
136+
),
137+
affected_version_range=VersionRange.from_string("vers:npm/>=1.0.0|<2.0.0"),
138+
)
139+
],
140+
references=self.base_advisory.references,
141+
date_published=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC),
142+
)
143+
144+
self.assertNotEqual(
145+
compute_content_id(advisory1),
146+
compute_content_id(advisory2),
147+
)
148+
149+
def test_different_references_different_id(self):
150+
"""
151+
Test that advisories with different references have different content IDs
152+
"""
153+
advisory1 = self.base_advisory
154+
155+
advisory2 = AdvisoryData(
156+
summary="Test summary",
157+
affected_packages=self.base_advisory.affected_packages,
158+
references=[
159+
Reference(
160+
url="https://example.com/different-vuln",
161+
reference_id="GHSA-9999-9999-9999",
162+
severities=[
163+
VulnerabilitySeverity(
164+
system=SCORING_SYSTEMS["cvssv3.1"],
165+
value="8.5",
166+
)
167+
],
168+
)
169+
],
170+
date_published=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC),
171+
)
172+
173+
self.assertNotEqual(
174+
compute_content_id(advisory1),
175+
compute_content_id(advisory2),
176+
)
177+
178+
def test_different_weaknesses_different_id(self):
179+
"""
180+
Test that advisories with different weaknesses have different content IDs
181+
"""
182+
advisory1 = AdvisoryData(
183+
summary="Test summary",
184+
affected_packages=self.base_advisory.affected_packages,
185+
references=self.base_advisory.references,
186+
weaknesses=[1, 2, 3],
187+
date_published=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC),
188+
)
189+
190+
advisory2 = AdvisoryData(
191+
summary="Test summary",
192+
affected_packages=self.base_advisory.affected_packages,
193+
references=self.base_advisory.references,
194+
weaknesses=[4, 5, 6],
195+
date_published=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC),
196+
)
197+
198+
self.assertNotEqual(
199+
compute_content_id(advisory1),
200+
compute_content_id(advisory2),
201+
)
202+
203+
def test_empty_fields_same_id(self):
204+
"""
205+
Test that advisories with empty optional fields still generate same content ID
206+
"""
207+
advisory1 = AdvisoryData(
208+
summary="",
209+
affected_packages=self.base_advisory.affected_packages,
210+
references=self.base_advisory.references,
211+
date_published=None,
212+
)
213+
214+
advisory2 = AdvisoryData(
215+
summary="",
216+
affected_packages=self.base_advisory.affected_packages,
217+
references=self.base_advisory.references,
218+
date_published=None,
219+
)
220+
221+
self.assertEqual(
222+
compute_content_id(advisory1),
223+
compute_content_id(advisory2),
224+
)

vulnerabilities/utils.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -557,24 +557,28 @@ def compute_content_id(advisory_data, include_metadata=False):
557557
:param include_metadata: Boolean indicating whether to include `created_by` and `url`
558558
:return: SHA-256 hash digest as content_id
559559
"""
560-
561560
def normalize_text(text):
562561
"""Normalize text by removing spaces and converting to lowercase."""
563562
return text.replace(" ", "").lower() if text else ""
564563

564+
def normalize_affected_packages(packages):
565+
"""Normalize a list of AffectedPackage objects"""
566+
if not packages:
567+
return []
568+
return sorted([pkg.to_dict() for pkg in packages])
569+
565570
def normalize_list(lst):
566571
"""Sort a list to ensure consistent ordering."""
567572
return sorted(lst) if lst else []
568-
569-
def normalize_dict(obj):
570-
"""Ensure dictionary keys are ordered."""
571-
return json.loads(json.dumps(obj, sort_keys=True)) if obj else {}
572-
573+
574+
def normalize_references(references):
575+
"""Normalize a list of references"""
576+
return sorted([ref.to_dict() for ref in references])
573577
# Normalize fields
574578
normalized_data = {
575579
"summary": normalize_text(advisory_data.summary),
576-
"affected_packages": normalize_list(advisory_data.affected_packages),
577-
"references": normalize_list(advisory_data.references),
580+
"affected_packages": normalize_affected_packages(advisory_data.affected_packages),
581+
"references": normalize_references(advisory_data.references),
578582
"weaknesses": normalize_list(advisory_data.weaknesses),
579583
}
580584

0 commit comments

Comments
 (0)