From 7cee60fc4f54356c976d09be378c3063dcdc03b1 Mon Sep 17 00:00:00 2001 From: Villon CHEN Date: Mon, 24 Nov 2025 15:29:03 +0100 Subject: [PATCH 1/4] refactor(report): separate report generation from the cli into a new file Moving the report generation logic into a separate file as the CLI should only make calls to functions it needs. --- codesectools/sasts/all/cli.py | 237 +++++------------------------ codesectools/sasts/all/report.py | 247 +++++++++++++++++++++++++++++++ 2 files changed, 280 insertions(+), 204 deletions(-) create mode 100644 codesectools/sasts/all/report.py diff --git a/codesectools/sasts/all/cli.py b/codesectools/sasts/all/cli.py index f76933b..d3264f6 100644 --- a/codesectools/sasts/all/cli.py +++ b/codesectools/sasts/all/cli.py @@ -1,8 +1,6 @@ """Defines the command-line interface for running all available SAST tools.""" -import io import shutil -from hashlib import sha256 from pathlib import Path import typer @@ -13,9 +11,9 @@ from codesectools.datasets import DATASETS_ALL from codesectools.datasets.core.dataset import FileDataset, GitRepoDataset from codesectools.sasts import SASTS_ALL +from codesectools.sasts.all.report import ReportEngine from codesectools.sasts.all.sast import AllSAST from codesectools.sasts.core.sast import PrebuiltBuildlessSAST, PrebuiltSAST -from codesectools.utils import group_successive, shorten_path def build_cli() -> typer.Typer: @@ -85,7 +83,14 @@ def analyze( ), ] = False, ) -> None: - """Run analysis on the current project with all available SAST tools.""" + """Run analysis on the current project with all available SAST tools. + + Args: + lang: The source code language to analyze. + artifacts: The path to pre-built artifacts (for PrebuiltSAST only). + overwrite: If True, overwrite existing analysis results for the current project. + + """ for sast in all_sast.sasts_by_lang.get(lang, []): if isinstance(sast, PrebuiltBuildlessSAST) and artifacts is None: print( @@ -140,7 +145,14 @@ def benchmark( ), ] = False, ) -> None: - """Run a benchmark on a dataset using all available SAST tools.""" + """Run a benchmark on a dataset using all available SAST tools. + + Args: + dataset: The name of the dataset to benchmark. + overwrite: If True, overwrite existing results. + testing: If True, run benchmark over a single dataset unit for testing. + + """ dataset_name, lang = dataset.split("_") for sast in all_sast.sasts_by_dataset.get(DATASETS_ALL[dataset_name], []): dataset = DATASETS_ALL[dataset_name](lang) @@ -205,7 +217,14 @@ def plot( typer.Option("--format", help="Figures export format"), ] = "png", ) -> None: - """Generate and display plots for a project's aggregated analysis results.""" + """Generate and display plots for a project's aggregated analysis results. + + Args: + project: The name of the project to visualize. + overwrite: If True, overwrite existing figures. + format: The export format for the figures. + + """ from codesectools.sasts.all.graphics import ProjectGraphics project_graphics = ProjectGraphics(project_name=project) @@ -228,14 +247,13 @@ def report( ), ] = False, ) -> None: - """Generate an HTML report for a project's aggregated analysis results.""" - from rich.console import Console - from rich.progress import track - from rich.style import Style - from rich.syntax import Syntax - from rich.table import Table - from rich.text import Text + """Generate an HTML report for a project's aggregated analysis results. + + Args: + project: The name of the project to report on. + overwrite: If True, overwrite existing results. + """ report_dir = all_sast.output_dir / project / "report" if report_dir.is_dir(): if overwrite: @@ -247,197 +265,8 @@ def report( report_dir.mkdir(parents=True) - result = all_sast.parser.load_from_output_dir(project_name=project) - report_data = result.prepare_report_data() - - template = """ - - - - - - - -

CodeSecTools All SAST Tools Report

-

SAST Tools used: [sasts]

-

[name]

-
{code}
- - - - ^ - - - """ - template = template.replace( - "[sasts]", ", ".join(sast_name for sast_name in result.sast_names) - ) - - home_page = Console(record=True, file=io.StringIO()) - - main_table = Table(title="") - main_table.add_column("Files") - for key in list(report_data["defects"].values())[0]["score"].keys(): - main_table.add_column( - key.replace("_", " ").title(), justify="center", no_wrap=True - ) - - for defect_data in track( - report_data["defects"].values(), - description="Generating report for source file with defects...", - ): - defect_report_name = ( - f"{sha256(defect_data['source_path'].encode()).hexdigest()}.html" - ) - defect_page = Console(record=True, file=io.StringIO()) - - # Defect stat table - defect_stats_table = Table(title="") - for key in list(report_data["defects"].values())[0]["score"].keys(): - defect_stats_table.add_column( - key.replace("_", " ").title(), justify="center" - ) - - rendered_scores = [] - for v in defect_data["score"].values(): - if isinstance(v, float): - rendered_scores.append(f"~{v}") - else: - rendered_scores.append(str(v)) - - defect_stats_table.add_row(*rendered_scores) - defect_page.print(defect_stats_table) - - defect_report_redirect = Text( - shorten_path(defect_data["source_path"], 60), - style=Style(link=defect_report_name), - ) - - main_table.add_row(defect_report_redirect, *rendered_scores) - - # Defect table - defect_table = Table(title="", show_lines=True) - defect_table.add_column("Location", justify="center") - defect_table.add_column("SAST", justify="center") - defect_table.add_column("CWE", justify="center") - defect_table.add_column("Message") - rows = [] - for defect in defect_data["raw"]: - groups = group_successive(defect.lines) - if groups: - for group in groups: - start, end = group[0], group[-1] - shortcut = Text(f"{start}", style=Style(link=f"#L{start}")) - cwe_link = ( - Text( - f"CWE-{defect.cwe.id}", - style=Style( - link=f"https://cwe.mitre.org/data/definitions/{defect.cwe.id}.html" - ), - ) - if defect.cwe.id != -1 - else "None" - ) - rows.append( - (start, shortcut, defect.sast, cwe_link, defect.message) - ) - else: - cwe_link = ( - Text( - f"CWE-{defect.cwe.id}", - style=Style( - link=f"https://cwe.mitre.org/data/definitions/{defect.cwe.id}.html" - ), - ) - if defect.cwe.id != -1 - else "None" - ) - rows.append( - (float("inf"), "None", defect.sast, cwe_link, defect.message) - ) - - for row in sorted(rows, key=lambda r: r[0]): - defect_table.add_row(*row[1:]) - defect_page.print(defect_table) - - # Syntax - if not Path(defect_data["source_path"]).is_file(): - tippy_calls = "" - print( - f"Source file {defect_data['source_path']} not found, skipping it..." - ) - else: - syntax = Syntax.from_path(defect_data["source_path"], line_numbers=True) - tooltips = {} - highlights = {} - for location in defect_data["locations"]: - sast, cwe, message, (start, end) = location - for i in range(start, end + 1): - text = ( - f"{sast}: {message} (CWE-{cwe.id})" - if cwe.id != -1 - else f"{sast}: {message}" - ) - if highlights.get(i): - highlights[i].add(text) - else: - highlights[i] = {text} - - for line, texts in highlights.items(): - element_id = f"L{line}" - bgcolor = "red" if len(texts) > 1 else "yellow" - syntax.stylize_range( - Style(bgcolor=bgcolor, link=f"HACK{element_id}"), - start=(line, 0), - end=(line + 1, 0), - ) - tooltips[element_id] = "
".join(text for text in texts) - - tippy_calls = "" - for element_id, content in tooltips.items(): - tippy_calls += f"""tippy('#{element_id}', {{ content: `{content.replace("`", "\\`")}`, allowHTML: true, interactive: true }});\n""" - - defect_page.print(syntax) - - html_content = defect_page.export_html(code_format=template) - html_content = html_content.replace('href="HACK', 'id="') - html_content = html_content.replace("[name]", defect_data["source_path"]) - html_content = html_content.replace("[tippy_calls]", tippy_calls) - - report_defect_file = report_dir / defect_report_name - report_defect_file.write_text(html_content) - - home_page.print(main_table) - html_content = home_page.export_html(code_format=template) - html_content = html_content.replace("[name]", f"Project: {project}") - - report_home_file = report_dir / "home.html" - report_home_file.write_text(html_content) - + report_engine = ReportEngine(project=project, all_sast=all_sast) + report_engine.generate() print(f"Report generated at {report_dir.resolve()}") return cli diff --git a/codesectools/sasts/all/report.py b/codesectools/sasts/all/report.py new file mode 100644 index 0000000..00a1294 --- /dev/null +++ b/codesectools/sasts/all/report.py @@ -0,0 +1,247 @@ +"""Generates HTML reports for aggregated SAST analysis results.""" + +import io +from hashlib import sha256 +from pathlib import Path + +from rich import print + +from codesectools.sasts.all.sast import AllSAST +from codesectools.utils import group_successive, shorten_path + + +class ReportEngine: + """Generate interactive HTML reports for SAST analysis results. + + Attributes: + TEMPLATE (str): The HTML template used for report generation. + project (str): The name of the project. + all_sast (AllSAST): The AllSAST manager instance. + report_dir (Path): The directory where reports are saved. + result (AllSASTAnalysisResult): The parsed analysis results. + report_data (dict): The data prepared for rendering the report. + + """ + + TEMPLATE = """ + + + + + + + +

CodeSecTools All SAST Tools Report

+

SAST Tools used: [sasts]

+

[name]

+
{code}
+ + + + ^ + + + """ + + def __init__(self, project: str, all_sast: AllSAST) -> None: + """Initialize the ReportEngine. + + Args: + project: The name of the project. + all_sast: The AllSAST instance. + + """ + self.project = project + self.all_sast = all_sast + self.report_dir = all_sast.output_dir / project / "report" + + self.result = all_sast.parser.load_from_output_dir(project_name=project) + self.report_data = self.result.prepare_report_data() + + def generate_single_defect(self, file_data: dict) -> tuple: + """Generate the HTML report for a single file with defects.""" + from rich.console import Console + from rich.style import Style + from rich.syntax import Syntax + from rich.table import Table + from rich.text import Text + + file_report_name = ( + f"{sha256(file_data['source_path'].encode()).hexdigest()}.html" + ) + file_page = Console(record=True, file=io.StringIO()) + + # Defect stat table + file_stats_table = Table(title="") + for key in list(self.report_data["files"].values())[0]["count"].keys(): + file_stats_table.add_column(key.replace("_", " ").title(), justify="center") + + rendered_scores = [] + for v in file_data["count"].values(): + if isinstance(v, float): + rendered_scores.append(f"~{v}") + else: + rendered_scores.append(str(v)) + + file_stats_table.add_row(*rendered_scores) + file_page.print(file_stats_table) + + file_report_redirect = Text( + shorten_path(file_data["source_path"], 60), + style=Style(link=file_report_name), + ) + + # Defect table + defect_table = Table(title="", show_lines=True) + defect_table.add_column("Location", justify="center") + defect_table.add_column("SAST", justify="center") + defect_table.add_column("CWE", justify="center") + defect_table.add_column("Message") + rows = [] + for defect in file_data["defects"]: + groups = group_successive(defect.lines) + if groups: + for group in groups: + start, end = group[0], group[-1] + shortcut = Text(f"{start}", style=Style(link=f"#L{start}")) + cwe_link = ( + Text( + f"CWE-{defect.cwe.id}", + style=Style( + link=f"https://cwe.mitre.org/data/definitions/{defect.cwe.id}.html" + ), + ) + if defect.cwe.id != -1 + else "None" + ) + rows.append( + (start, shortcut, defect.sast, cwe_link, defect.message) + ) + else: + cwe_link = ( + Text( + f"CWE-{defect.cwe.id}", + style=Style( + link=f"https://cwe.mitre.org/data/definitions/{defect.cwe.id}.html" + ), + ) + if defect.cwe.id != -1 + else "None" + ) + rows.append( + (float("inf"), "None", defect.sast, cwe_link, defect.message) + ) + + for row in sorted(rows, key=lambda r: r[0]): + defect_table.add_row(*row[1:]) + file_page.print(defect_table) + + # Syntax + if not Path(file_data["source_path"]).is_file(): + tippy_calls = "" + print(f"Source file {file_data['source_path']} not found, skipping it...") + else: + syntax = Syntax.from_path(file_data["source_path"], line_numbers=True) + tooltips = {} + highlights = {} + for location in file_data["locations"]: + sast, cwe, message, (start, end) = location + for i in range(start, end + 1): + text = ( + f"{sast}: {message} (CWE-{cwe.id})" + if cwe.id != -1 + else f"{sast}: {message}" + ) + if highlights.get(i): + highlights[i].add(text) + else: + highlights[i] = {text} + + for line, texts in highlights.items(): + element_id = f"L{line}" + bgcolor = "red" if len(texts) > 1 else "yellow" + syntax.stylize_range( + Style(bgcolor=bgcolor, link=f"HACK{element_id}"), + start=(line, 0), + end=(line + 1, 0), + ) + tooltips[element_id] = "
".join(text for text in texts) + + tippy_calls = "" + for element_id, content in tooltips.items(): + tippy_calls += f"""tippy('#{element_id}', {{ content: `{content.replace("`", "\\`")}`, allowHTML: true, interactive: true }});\n""" + + file_page.print(syntax) + + html_content = file_page.export_html(code_format=self.TEMPLATE) + html_content = html_content.replace('href="HACK', 'id="') + html_content = html_content.replace("[name]", file_data["source_path"]) + html_content = html_content.replace("[tippy_calls]", tippy_calls) + + report_file = self.report_dir / file_report_name + report_file.write_text(html_content) + + return file_report_redirect, rendered_scores + + def generate(self) -> None: + """Generate the HTML report. + + Creates the report directory and generates HTML files for the main view + and for each file with defects. + """ + from rich.console import Console + from rich.progress import track + from rich.table import Table + + self.TEMPLATE = self.TEMPLATE.replace( + "[sasts]", ", ".join(sast_name for sast_name in self.result.sast_names) + ) + + home_page = Console(record=True, file=io.StringIO()) + + main_table = Table(title="") + main_table.add_column("Files") + for key in list(self.report_data["files"].values())[0]["score"].keys(): + main_table.add_column( + key.replace("_", " ").title(), justify="center", no_wrap=True + ) + + for file_data in track( + self.report_data["files"].values(), + description="Generating report for source file with defects...", + ): + file_report_redirect, rendered_scores = self.generate_single_defect( + file_data + ) + main_table.add_row(file_report_redirect, *rendered_scores) + + home_page.print(main_table) + html_content = home_page.export_html(code_format=self.TEMPLATE) + html_content = html_content.replace("[name]", f"Project: {self.project}") + + report_home_file = self.report_dir / "home.html" + report_home_file.write_text(html_content) From 2a49111bc764a16583dc367cf9372a05f2e5cc36 Mon Sep 17 00:00:00 2001 From: Villon CHEN Date: Mon, 24 Nov 2025 15:34:33 +0100 Subject: [PATCH 2/4] feat(report): add defect count by type Previously, the report was displaying the score as the count instead of the real count which was not provided.. --- codesectools/sasts/all/parser.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/codesectools/sasts/all/parser.py b/codesectools/sasts/all/parser.py index f932bef..715e586 100644 --- a/codesectools/sasts/all/parser.py +++ b/codesectools/sasts/all/parser.py @@ -192,6 +192,12 @@ def stats_by_scores(self) -> dict: "defects_same_location_same_cwe": defects_same_location_same_cwe * 8, }, + "count": { + "defect_number": len(defects), + "defects_same_cwe": defects_same_cwe, + "defects_same_location": defects_same_location, + "defects_same_location_same_cwe": defects_same_location_same_cwe, + }, } return stats @@ -223,6 +229,7 @@ def prepare_report_data(self) -> dict: report["defects"][defect_file] = { "score": scores[defect_file]["score"], + "count": scores[defect_file]["count"], "source_path": str(self.source_path / defect.filepath), "locations": locations, "raw": defects, From b4b981b3c84735b1f4629c1d570932c6730de847 Mon Sep 17 00:00:00 2001 From: Villon CHEN Date: Mon, 24 Nov 2025 15:42:17 +0100 Subject: [PATCH 3/4] refactor(report): correct `defect` to `file` The main object in the report represents a "file" containing "defects" not "defect" alone. --- codesectools/sasts/all/parser.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/codesectools/sasts/all/parser.py b/codesectools/sasts/all/parser.py index 715e586..2c3641d 100644 --- a/codesectools/sasts/all/parser.py +++ b/codesectools/sasts/all/parser.py @@ -204,7 +204,7 @@ def stats_by_scores(self) -> dict: def prepare_report_data(self) -> dict: """Prepare data needed to generate a report.""" - report = {"score": {}, "defects": {}} + report = {"score": {}, "files": {}} scores = self.stats_by_scores() report["score"] = {k: 0 for k, _ in list(scores.values())[0]["score"].items()} @@ -227,18 +227,18 @@ def prepare_report_data(self) -> dict: (defect.sast, defect.cwe, defect.message, (start, end)) ) - report["defects"][defect_file] = { + report["files"][defect_file] = { "score": scores[defect_file]["score"], "count": scores[defect_file]["count"], "source_path": str(self.source_path / defect.filepath), "locations": locations, - "raw": defects, + "defects": defects, } - report["defects"] = { + report["files"] = { k: v for k, v in sorted( - report["defects"].items(), + report["files"].items(), key=lambda item: (sum(v for v in item[1]["score"].values())), reverse=True, ) From c81cc79e6341d84003a4d0713a1f596e199c8fbe Mon Sep 17 00:00:00 2001 From: Villon CHEN Date: Mon, 24 Nov 2025 17:57:34 +0100 Subject: [PATCH 4/4] chore(release): bump project version --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8808a1a..6d990f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "CodeSecTools" -version = "0.13.5" +version = "0.13.6" description = "A framework for code security that provides abstractions for static analysis tools and datasets to support their integration, testing, and evaluation." readme = "README.md" license = "AGPL-3.0-only" diff --git a/uv.lock b/uv.lock index b16e6ba..104af77 100644 --- a/uv.lock +++ b/uv.lock @@ -221,7 +221,7 @@ wheels = [ [[package]] name = "codesectools" -version = "0.13.5" +version = "0.13.6" source = { editable = "." } dependencies = [ { name = "gitpython" },