Skip to content

Commit 39d1234

Browse files
committed
Drop AdvisoryDataV2
Add constraint to make sure we have at least one field to create a valid Patch obj. Update patch_text only if patch_text field is empty. Return multiple objects for classify_patch_source function Add patch in AdviosryData.from_dict() Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent 9b8cd07 commit 39d1234

File tree

7 files changed

+154
-173
lines changed

7 files changed

+154
-173
lines changed

vulnerabilities/importer.py

Lines changed: 1 addition & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,7 @@ def from_dict(cls, advisory_data):
633633
"affected_packages": [
634634
affected_package_cls.from_dict(pkg) for pkg in affected_packages if pkg is not None
635635
],
636+
"patches": [PatchData.from_dict(patch) for patch in advisory_data.get("patches", [])],
636637
"references": [Reference.from_dict(ref) for ref in advisory_data["references"]],
637638
"date_published": datetime.datetime.fromisoformat(date_published)
638639
if date_published
@@ -643,77 +644,6 @@ def from_dict(cls, advisory_data):
643644
return cls(**transformed)
644645

645646

646-
@dataclasses.dataclass(order=True)
647-
class AdvisoryDataV2:
648-
"""
649-
This data class expresses the contract between data sources and the import runner.
650-
651-
If a vulnerability_id is present then:
652-
summary or affected_packages or references must be present
653-
otherwise
654-
either affected_package or references should be present
655-
656-
date_published must be aware datetime
657-
"""
658-
659-
advisory_id: str = ""
660-
aliases: List[str] = dataclasses.field(default_factory=list)
661-
summary: Optional[str] = ""
662-
affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list)
663-
references: List[ReferenceV2] = dataclasses.field(default_factory=list)
664-
patches: List[PatchData] = dataclasses.field(default_factory=list)
665-
date_published: Optional[datetime.datetime] = None
666-
weaknesses: List[int] = dataclasses.field(default_factory=list)
667-
url: Optional[str] = None
668-
669-
def __post_init__(self):
670-
if self.date_published and not self.date_published.tzinfo:
671-
logger.warning(f"AdvisoryData with no tzinfo: {self!r}")
672-
if self.summary:
673-
self.summary = self.clean_summary(self.summary)
674-
675-
def clean_summary(self, summary):
676-
# https://nvd.nist.gov/vuln/detail/CVE-2013-4314
677-
# https://github.com/cms-dev/cms/issues/888#issuecomment-516977572
678-
summary = summary.strip()
679-
if summary:
680-
summary = summary.replace("\x00", "\uFFFD")
681-
return summary
682-
683-
def to_dict(self):
684-
return {
685-
"aliases": self.aliases,
686-
"summary": self.summary,
687-
"affected_packages": [pkg.to_dict() for pkg in self.affected_packages],
688-
"references": [ref.to_dict() for ref in self.references],
689-
"patches": [ref.to_dict() for ref in self.patches],
690-
"date_published": self.date_published.isoformat() if self.date_published else None,
691-
"weaknesses": self.weaknesses,
692-
"url": self.url if self.url else "",
693-
}
694-
695-
@classmethod
696-
def from_dict(cls, advisory_data):
697-
date_published = advisory_data["date_published"]
698-
transformed = {
699-
"aliases": advisory_data["aliases"],
700-
"summary": advisory_data["summary"],
701-
"affected_packages": [
702-
AffectedPackage.from_dict(pkg)
703-
for pkg in advisory_data["affected_packages"]
704-
if pkg is not None
705-
],
706-
"references": [Reference.from_dict(ref) for ref in advisory_data["references"]],
707-
"patches": [PatchData.from_dict(ref) for ref in advisory_data["patches"]],
708-
"date_published": datetime.datetime.fromisoformat(date_published)
709-
if date_published
710-
else None,
711-
"weaknesses": advisory_data["weaknesses"],
712-
"url": advisory_data.get("url") or None,
713-
}
714-
return cls(**transformed)
715-
716-
717647
class NoLicenseError(Exception):
718648
pass
719649

vulnerabilities/models.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2773,7 +2773,13 @@ class Patch(models.Model):
27732773
help_text="SHA512 checksum of the patch content.",
27742774
)
27752775

2776+
def clean(self):
2777+
if not self.patch_url and not self.patch_text:
2778+
raise ValidationError("Either patch_url or patch_text must be provided.")
2779+
27762780
def save(self, *args, **kwargs):
2781+
# https://docs.djangoproject.com/en/4.2/ref/models/instances/#django.db.models.Model.clean
2782+
self.full_clean()
27772783
if self.patch_text:
27782784
self.patch_checksum = compute_patch_checksum(self.patch_text)
27792785
super().save(*args, **kwargs)

vulnerabilities/pipelines/v2_importers/aosp_importer.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -90,23 +90,23 @@ def collect_advisories(self):
9090
patch_url = commit_data.get("patchUrl")
9191
commit_id = commit_data.get("commitId")
9292

93-
base_purl, patch_obj = classify_patch_source(
93+
base_purl, patch_objs = classify_patch_source(
9494
url=patch_url,
9595
commit_hash=commit_id,
9696
patch_text=None,
9797
)
98-
99-
if isinstance(patch_obj, PackageCommitPatchData):
100-
fixed_commit = patch_obj
101-
affected_package = AffectedPackageV2(
102-
package=base_purl,
103-
fixed_by_commit_patches=[fixed_commit],
104-
)
105-
affected_packages.append(affected_package)
106-
elif isinstance(patch_obj, PatchData):
107-
patches.append(patch_obj)
108-
elif isinstance(patch_obj, ReferenceV2):
109-
references.append(patch_obj)
98+
for patch_obj in patch_objs:
99+
if isinstance(patch_obj, PackageCommitPatchData):
100+
fixed_commit = patch_obj
101+
affected_package = AffectedPackageV2(
102+
package=base_purl,
103+
fixed_by_commit_patches=[fixed_commit],
104+
)
105+
affected_packages.append(affected_package)
106+
elif isinstance(patch_obj, PatchData):
107+
patches.append(patch_obj)
108+
elif isinstance(patch_obj, ReferenceV2):
109+
references.append(patch_obj)
110110

111111
url = (
112112
"https://raw.githubusercontent.com/quarkslab/aosp_dataset/refs/heads/master/cves/"

vulnerabilities/pipes/advisory.py

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from typing import List
1717
from typing import Union
1818

19+
from django.core.exceptions import ValidationError
1920
from django.db import transaction
2021
from django.db.models import Q
2122
from django.db.models.query import QuerySet
@@ -128,13 +129,19 @@ def get_or_create_advisory_package_commit_patches(
128129
key = (commit_obj.commit_hash, commit_obj.vcs_url)
129130
input_data = data_map[key]
130131

131-
if (
132-
commit_obj.patch_checksum != input_data.patch_checksum
133-
or commit_obj.patch_text != input_data.patch_text
134-
):
132+
if not commit_obj.patch_text and input_data.patch_text:
135133
commit_obj.patch_checksum = input_data.patch_checksum
136134
commit_obj.patch_text = input_data.patch_text
137135
to_update.append(commit_obj)
136+
elif (
137+
commit_obj.patch_text
138+
and input_data.patch_text
139+
and (commit_obj.patch_text != input_data.patch_text)
140+
):
141+
raise ValidationError(
142+
f"Patch text conflict detected: existing record: {commit_obj.vcs_url} - {commit_obj.commit_hash} has different patch text"
143+
f"than {input_data.vcs_url} - {input_data.commit_hash}"
144+
)
138145

139146
if to_update:
140147
PackageCommitPatch.objects.bulk_update(to_update, fields=["patch_checksum", "patch_text"])
@@ -204,25 +211,39 @@ def classify_patch_source(url, commit_hash, patch_text):
204211
if not patch_text:
205212
return
206213

207-
return None, PatchData(patch_text=patch_text)
214+
return None, [PatchData(patch_text=patch_text)]
208215

209216
purl = url2purl(url)
210217
if not purl or (purl.type not in VCS_URLS_SUPPORTED_TYPES):
211218
if commit_hash:
212-
return None, ReferenceV2(
213-
reference_id=commit_hash, reference_type=AdvisoryReference.COMMIT, url=url
214-
)
215-
return None, PatchData(patch_url=url, patch_text=patch_text)
219+
if not patch_text:
220+
return None, [
221+
ReferenceV2(
222+
reference_id=commit_hash, reference_type=AdvisoryReference.COMMIT, url=url
223+
)
224+
]
225+
226+
return None, [
227+
ReferenceV2(
228+
reference_id=commit_hash, reference_type=AdvisoryReference.COMMIT, url=url
229+
),
230+
PatchData(patch_url=url, patch_text=patch_text),
231+
]
232+
233+
return None, [PatchData(patch_url=url, patch_text=patch_text)]
216234

217235
if not commit_hash and not purl.version:
218-
return None, PatchData(patch_url=url, patch_text=patch_text or None)
236+
return None, [PatchData(patch_url=url, patch_text=patch_text)]
219237

220238
base_purl = get_core_purl(purl)
221239
base_purl_str = base_purl.to_string()
222240
base_url = purl2url(base_purl_str)
223-
return base_purl, PackageCommitPatchData(
224-
vcs_url=base_url, commit_hash=purl.version or commit_hash, patch_text=patch_text or None
225-
)
241+
package_commit_hash = purl.version or commit_hash
242+
return base_purl, [
243+
PackageCommitPatchData(
244+
vcs_url=base_url, commit_hash=package_commit_hash, patch_text=patch_text
245+
)
246+
]
226247

227248

228249
def insert_advisory(advisory: AdvisoryData, pipeline_id: str, logger: Callable = None):

vulnerabilities/tests/pipes/test_vulnerablecode_importer_pipeline_v2.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ def patch_source_samples():
191191
"url": "https://unknown.com/abc/def",
192192
"commit_hash": "8eb1b04ca4ae6fc0a0ef46f1b0c042f64db28ff9",
193193
"patch_text": "+1-2",
194-
}, # ReferenceV2
194+
}, # ReferenceV2, PatchData
195195
{
196196
"url": "https://unknown.com/abc/def/be891173be2fbdc897116bf5aa4fc9fdc8dc4f3d",
197197
"commit_hash": None,
@@ -225,28 +225,28 @@ def dumpy_patch_advisory(patch_source_samples):
225225
commit_hash = entry["commit_hash"]
226226
patch_text = entry["patch_text"]
227227

228-
base_purl, patch_obj = classify_patch_source(
228+
base_purl, patch_objs = classify_patch_source(
229229
url=url, commit_hash=commit_hash, patch_text=patch_text
230230
)
231-
232-
if isinstance(patch_obj, PackageCommitPatchData):
233-
# For testing only: commit hashes starting with "a" are treated as introduced_by_commit_patches,
234-
# all others are treated as fixed_by_commit_patches.
235-
if patch_obj.commit_hash.startswith("a"):
236-
affected_package = AffectedPackageV2(
237-
package=base_purl,
238-
introduced_by_commit_patches=[patch_obj],
239-
)
240-
else:
241-
affected_package = AffectedPackageV2(
242-
package=base_purl,
243-
fixed_by_commit_patches=[patch_obj],
244-
)
245-
affected_packages.append(affected_package)
246-
elif isinstance(patch_obj, PatchData):
247-
patches.append(patch_obj)
248-
elif isinstance(patch_obj, ReferenceV2):
249-
references.append(patch_obj)
231+
for patch_obj in patch_objs:
232+
if isinstance(patch_obj, PackageCommitPatchData):
233+
# For testing only: commit hashes starting with "a" are treated as introduced_by_commit_patches,
234+
# all others are treated as fixed_by_commit_patches.
235+
if patch_obj.commit_hash.startswith("a"):
236+
affected_package = AffectedPackageV2(
237+
package=base_purl,
238+
introduced_by_commit_patches=[patch_obj],
239+
)
240+
else:
241+
affected_package = AffectedPackageV2(
242+
package=base_purl,
243+
fixed_by_commit_patches=[patch_obj],
244+
)
245+
affected_packages.append(affected_package)
246+
elif isinstance(patch_obj, PatchData):
247+
patches.append(patch_obj)
248+
elif isinstance(patch_obj, ReferenceV2):
249+
references.append(patch_obj)
250250

251251
return AdvisoryData(
252252
summary="Test patch advisory",

vulnerabilities/tests/test_data/ruby/parse-advisory-ruby-expected.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
]
5757
}
5858
],
59+
"patches": [],
5960
"date_published": "2018-01-09T00:00:00+00:00",
6061
"weaknesses": []
6162
}

0 commit comments

Comments
 (0)