Skip to content

Commit f716130

Browse files
committed
Add pipeline to collect fix commit
Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
1 parent cb1fa4d commit f716130

File tree

1 file changed

+112
-0
lines changed

1 file changed

+112
-0
lines changed
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
from aboutcode.pipeline import LoopProgress
11+
12+
from vulnerabilities.models import CodeFix
13+
from vulnerabilities.models import Package
14+
from vulnerabilities.models import VulnerabilityReference
15+
from vulnerabilities.pipelines import VulnerableCodePipeline
16+
from vulnerabilities.utils import normalize_purl
17+
18+
19+
class CollectFixCommitsPipeline(VulnerableCodePipeline):
20+
"""
21+
Improver pipeline to scout References and create CodeFix entries.
22+
"""
23+
24+
pipeline_id = "collect_fix_commits"
25+
license_expression = None
26+
27+
@classmethod
28+
def steps(cls):
29+
return (cls.collect_and_store_fix_commits,)
30+
31+
def collect_and_store_fix_commits(self):
32+
references = VulnerabilityReference.objects.prefetch_related("vulnerabilities").distinct()
33+
34+
self.log(f"Processing {references.count():,d} references to collect fix commits.")
35+
36+
created_fix_count = 0
37+
progress = LoopProgress(total_iterations=references.count(), logger=self.log)
38+
for reference in progress.iter(references.paginated(per_page=500)):
39+
for vulnerability in reference.vulnerabilities.all():
40+
package_urls = self.extract_package_urls(reference)
41+
commit_id = self.extract_commit_id(reference.url)
42+
43+
if commit_id and package_urls:
44+
for purl in package_urls:
45+
normalized_purl = normalize_purl(purl)
46+
package = self.get_or_create_package(normalized_purl)
47+
codefix = self.create_codefix_entry(
48+
vulnerability=vulnerability,
49+
package=package,
50+
commit_id=commit_id,
51+
reference=reference.url,
52+
)
53+
if codefix:
54+
created_fix_count += 1
55+
56+
self.log(f"Successfully created {created_fix_count:,d} CodeFix entries.")
57+
58+
def extract_package_urls(self, reference):
59+
"""
60+
Extract Package URLs from a reference.
61+
Returns a list of Package URLs inferred from the reference.
62+
"""
63+
urls = []
64+
if "github" in reference.url:
65+
parts = reference.url.split("/")
66+
if len(parts) >= 5:
67+
namespace = parts[-3]
68+
name = parts[-2]
69+
commit = parts[-1]
70+
if commit:
71+
urls.append(f"pkg:github/{namespace}/{name}@{commit}")
72+
return urls
73+
74+
def extract_commit_id(self, url):
75+
"""
76+
Extract a commit ID from a URL, if available.
77+
"""
78+
if "github" in url:
79+
parts = url.split("/")
80+
return parts[-1] if len(parts) > 0 else None
81+
return None
82+
83+
def get_or_create_package(self, purl):
84+
"""
85+
Get or create a Package object from a Package URL.
86+
"""
87+
try:
88+
package, _ = Package.objects.get_or_create_from_purl(purl)
89+
return package
90+
except Exception as e:
91+
self.log(f"Error creating package from purl {purl}: {e}")
92+
return None
93+
94+
def create_codefix_entry(self, vulnerability, package, commit_id, reference):
95+
"""
96+
Create a CodeFix entry associated with the given vulnerability and package.
97+
"""
98+
try:
99+
codefix, created = CodeFix.objects.get_or_create(
100+
base_version=package,
101+
defaults={
102+
"commits": [commit_id],
103+
"references": [reference],
104+
},
105+
)
106+
if created:
107+
codefix.vulnerabilities.add(vulnerability)
108+
codefix.save()
109+
return codefix
110+
except Exception as e:
111+
self.log(f"Error creating CodeFix entry: {e}")
112+
return None

0 commit comments

Comments
 (0)