From 7cee60fc4f54356c976d09be378c3063dcdc03b1 Mon Sep 17 00:00:00 2001
From: Villon CHEN <villon.chen@oppida.fr>
Date: Mon, 24 Nov 2025 15:29:03 +0100
Subject: [PATCH 1/4] refactor(report): separate report generation from the cli
 into a new file

Moving the report generation logic into a separate file as the CLI should only make calls to functions it needs.
---
 codesectools/sasts/all/cli.py    | 237 +++++------------------------
 codesectools/sasts/all/report.py | 247 +++++++++++++++++++++++++++++++
 2 files changed, 280 insertions(+), 204 deletions(-)
 create mode 100644 codesectools/sasts/all/report.py

diff --git a/codesectools/sasts/all/cli.py b/codesectools/sasts/all/cli.py
index f76933b..d3264f6 100644
--- a/codesectools/sasts/all/cli.py
+++ b/codesectools/sasts/all/cli.py
@@ -1,8 +1,6 @@
 """Defines the command-line interface for running all available SAST tools."""
 
-import io
 import shutil
-from hashlib import sha256
 from pathlib import Path
 
 import typer
@@ -13,9 +11,9 @@
 from codesectools.datasets import DATASETS_ALL
 from codesectools.datasets.core.dataset import FileDataset, GitRepoDataset
 from codesectools.sasts import SASTS_ALL
+from codesectools.sasts.all.report import ReportEngine
 from codesectools.sasts.all.sast import AllSAST
 from codesectools.sasts.core.sast import PrebuiltBuildlessSAST, PrebuiltSAST
-from codesectools.utils import group_successive, shorten_path
 
 
 def build_cli() -> typer.Typer:
@@ -85,7 +83,14 @@ def analyze(
             ),
         ] = False,
     ) -> None:
-        """Run analysis on the current project with all available SAST tools."""
+        """Run analysis on the current project with all available SAST tools.
+
+        Args:
+            lang: The source code language to analyze.
+            artifacts: The path to pre-built artifacts (for PrebuiltSAST only).
+            overwrite: If True, overwrite existing analysis results for the current project.
+
+        """
         for sast in all_sast.sasts_by_lang.get(lang, []):
             if isinstance(sast, PrebuiltBuildlessSAST) and artifacts is None:
                 print(
@@ -140,7 +145,14 @@ def benchmark(
             ),
         ] = False,
     ) -> None:
-        """Run a benchmark on a dataset using all available SAST tools."""
+        """Run a benchmark on a dataset using all available SAST tools.
+
+        Args:
+            dataset: The name of the dataset to benchmark.
+            overwrite: If True, overwrite existing results.
+            testing: If True, run benchmark over a single dataset unit for testing.
+
+        """
         dataset_name, lang = dataset.split("_")
         for sast in all_sast.sasts_by_dataset.get(DATASETS_ALL[dataset_name], []):
             dataset = DATASETS_ALL[dataset_name](lang)
@@ -205,7 +217,14 @@ def plot(
             typer.Option("--format", help="Figures export format"),
         ] = "png",
     ) -> None:
-        """Generate and display plots for a project's aggregated analysis results."""
+        """Generate and display plots for a project's aggregated analysis results.
+
+        Args:
+            project: The name of the project to visualize.
+            overwrite: If True, overwrite existing figures.
+            format: The export format for the figures.
+
+        """
         from codesectools.sasts.all.graphics import ProjectGraphics
 
         project_graphics = ProjectGraphics(project_name=project)
@@ -228,14 +247,13 @@ def report(
             ),
         ] = False,
     ) -> None:
-        """Generate an HTML report for a project's aggregated analysis results."""
-        from rich.console import Console
-        from rich.progress import track
-        from rich.style import Style
-        from rich.syntax import Syntax
-        from rich.table import Table
-        from rich.text import Text
+        """Generate an HTML report for a project's aggregated analysis results.
+
+        Args:
+            project: The name of the project to report on.
+            overwrite: If True, overwrite existing results.
 
+        """
         report_dir = all_sast.output_dir / project / "report"
         if report_dir.is_dir():
             if overwrite:
@@ -247,197 +265,8 @@ def report(
 
         report_dir.mkdir(parents=True)
 
-        result = all_sast.parser.load_from_output_dir(project_name=project)
-        report_data = result.prepare_report_data()
-
-        template = """
-    <!DOCTYPE html>
-    <html>
-    <head>
-    <meta charset="UTF-8">
-    <style>
-    {stylesheet}
-    body {{
-        color: {foreground};
-        background-color: {background};
-        font-family: Menlo, 'DejaVu Sans Mono', consolas, 'Courier New', monospace;
-    }}
-    .tippy-box {{
-        background-color: white;
-        color: black;
-    }}
-    img {{
-        display: block;
-        margin: auto;
-        border: solid black 1px;
-    }}
-    #top {{
-        position: fixed;
-        bottom: 20px;
-        right: 30px;
-        background-color: white;
-        padding: 10px;
-        border: solid black 5px;
-    }}
-    </style>
-    </head>
-    <body>
-        <a href="./home.html"><h1>CodeSecTools All SAST Tools Report</h1></a>
-        <h3>SAST Tools used: [sasts]</h3>
-        <h2>[name]</h2>
-        <pre style="font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace"><code style="font-family:inherit">{code}</code></pre>
-        <script src="https://unpkg.com/@popperjs/core@2"></script>
-        <script src="https://unpkg.com/tippy.js@6"></script>
-        <script>[tippy_calls]</script>
-        <a href="#" id="top">^</a>
-    </body>
-    </html>
-    """
-        template = template.replace(
-            "[sasts]", ", ".join(sast_name for sast_name in result.sast_names)
-        )
-
-        home_page = Console(record=True, file=io.StringIO())
-
-        main_table = Table(title="")
-        main_table.add_column("Files")
-        for key in list(report_data["defects"].values())[0]["score"].keys():
-            main_table.add_column(
-                key.replace("_", " ").title(), justify="center", no_wrap=True
-            )
-
-        for defect_data in track(
-            report_data["defects"].values(),
-            description="Generating report for source file with defects...",
-        ):
-            defect_report_name = (
-                f"{sha256(defect_data['source_path'].encode()).hexdigest()}.html"
-            )
-            defect_page = Console(record=True, file=io.StringIO())
-
-            # Defect stat table
-            defect_stats_table = Table(title="")
-            for key in list(report_data["defects"].values())[0]["score"].keys():
-                defect_stats_table.add_column(
-                    key.replace("_", " ").title(), justify="center"
-                )
-
-            rendered_scores = []
-            for v in defect_data["score"].values():
-                if isinstance(v, float):
-                    rendered_scores.append(f"~{v}")
-                else:
-                    rendered_scores.append(str(v))
-
-            defect_stats_table.add_row(*rendered_scores)
-            defect_page.print(defect_stats_table)
-
-            defect_report_redirect = Text(
-                shorten_path(defect_data["source_path"], 60),
-                style=Style(link=defect_report_name),
-            )
-
-            main_table.add_row(defect_report_redirect, *rendered_scores)
-
-            # Defect table
-            defect_table = Table(title="", show_lines=True)
-            defect_table.add_column("Location", justify="center")
-            defect_table.add_column("SAST", justify="center")
-            defect_table.add_column("CWE", justify="center")
-            defect_table.add_column("Message")
-            rows = []
-            for defect in defect_data["raw"]:
-                groups = group_successive(defect.lines)
-                if groups:
-                    for group in groups:
-                        start, end = group[0], group[-1]
-                        shortcut = Text(f"{start}", style=Style(link=f"#L{start}"))
-                        cwe_link = (
-                            Text(
-                                f"CWE-{defect.cwe.id}",
-                                style=Style(
-                                    link=f"https://cwe.mitre.org/data/definitions/{defect.cwe.id}.html"
-                                ),
-                            )
-                            if defect.cwe.id != -1
-                            else "None"
-                        )
-                        rows.append(
-                            (start, shortcut, defect.sast, cwe_link, defect.message)
-                        )
-                else:
-                    cwe_link = (
-                        Text(
-                            f"CWE-{defect.cwe.id}",
-                            style=Style(
-                                link=f"https://cwe.mitre.org/data/definitions/{defect.cwe.id}.html"
-                            ),
-                        )
-                        if defect.cwe.id != -1
-                        else "None"
-                    )
-                    rows.append(
-                        (float("inf"), "None", defect.sast, cwe_link, defect.message)
-                    )
-
-            for row in sorted(rows, key=lambda r: r[0]):
-                defect_table.add_row(*row[1:])
-            defect_page.print(defect_table)
-
-            # Syntax
-            if not Path(defect_data["source_path"]).is_file():
-                tippy_calls = ""
-                print(
-                    f"Source file {defect_data['source_path']} not found, skipping it..."
-                )
-            else:
-                syntax = Syntax.from_path(defect_data["source_path"], line_numbers=True)
-                tooltips = {}
-                highlights = {}
-                for location in defect_data["locations"]:
-                    sast, cwe, message, (start, end) = location
-                    for i in range(start, end + 1):
-                        text = (
-                            f"<b>{sast}</b>: <i>{message} (CWE-{cwe.id})</i>"
-                            if cwe.id != -1
-                            else f"<b>{sast}</b>: <i>{message}</i>"
-                        )
-                        if highlights.get(i):
-                            highlights[i].add(text)
-                        else:
-                            highlights[i] = {text}
-
-                for line, texts in highlights.items():
-                    element_id = f"L{line}"
-                    bgcolor = "red" if len(texts) > 1 else "yellow"
-                    syntax.stylize_range(
-                        Style(bgcolor=bgcolor, link=f"HACK{element_id}"),
-                        start=(line, 0),
-                        end=(line + 1, 0),
-                    )
-                    tooltips[element_id] = "<hr>".join(text for text in texts)
-
-                tippy_calls = ""
-                for element_id, content in tooltips.items():
-                    tippy_calls += f"""tippy('#{element_id}', {{ content: `{content.replace("`", "\\`")}`, allowHTML: true, interactive: true }});\n"""
-
-                defect_page.print(syntax)
-
-            html_content = defect_page.export_html(code_format=template)
-            html_content = html_content.replace('href="HACK', 'id="')
-            html_content = html_content.replace("[name]", defect_data["source_path"])
-            html_content = html_content.replace("[tippy_calls]", tippy_calls)
-
-            report_defect_file = report_dir / defect_report_name
-            report_defect_file.write_text(html_content)
-
-        home_page.print(main_table)
-        html_content = home_page.export_html(code_format=template)
-        html_content = html_content.replace("[name]", f"Project: {project}")
-
-        report_home_file = report_dir / "home.html"
-        report_home_file.write_text(html_content)
-
+        report_engine = ReportEngine(project=project, all_sast=all_sast)
+        report_engine.generate()
         print(f"Report generated at {report_dir.resolve()}")
 
     return cli
diff --git a/codesectools/sasts/all/report.py b/codesectools/sasts/all/report.py
new file mode 100644
index 0000000..00a1294
--- /dev/null
+++ b/codesectools/sasts/all/report.py
@@ -0,0 +1,247 @@
+"""Generates HTML reports for aggregated SAST analysis results."""
+
+import io
+from hashlib import sha256
+from pathlib import Path
+
+from rich import print
+
+from codesectools.sasts.all.sast import AllSAST
+from codesectools.utils import group_successive, shorten_path
+
+
+class ReportEngine:
+    """Generate interactive HTML reports for SAST analysis results.
+
+    Attributes:
+        TEMPLATE (str): The HTML template used for report generation.
+        project (str): The name of the project.
+        all_sast (AllSAST): The AllSAST manager instance.
+        report_dir (Path): The directory where reports are saved.
+        result (AllSASTAnalysisResult): The parsed analysis results.
+        report_data (dict): The data prepared for rendering the report.
+
+    """
+
+    TEMPLATE = """
+    <!DOCTYPE html>
+    <html>
+    <head>
+    <meta charset="UTF-8">
+    <style>
+    {stylesheet}
+    body {{
+        color: {foreground};
+        background-color: {background};
+        font-family: Menlo, 'DejaVu Sans Mono', consolas, 'Courier New', monospace;
+    }}
+    .tippy-box {{
+        background-color: white;
+        color: black;
+    }}
+    img {{
+        display: block;
+        margin: auto;
+        border: solid black 1px;
+    }}
+    #top {{
+        position: fixed;
+        bottom: 20px;
+        right: 30px;
+        background-color: white;
+        padding: 10px;
+        border: solid black 5px;
+    }}
+    </style>
+    </head>
+    <body>
+        <a href="./home.html"><h1>CodeSecTools All SAST Tools Report</h1></a>
+        <h3>SAST Tools used: [sasts]</h3>
+        <h2>[name]</h2>
+        <pre style="font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace"><code style="font-family:inherit">{code}</code></pre>
+        <script src="https://unpkg.com/@popperjs/core@2"></script>
+        <script src="https://unpkg.com/tippy.js@6"></script>
+        <script>[tippy_calls]</script>
+        <a href="#" id="top">^</a>
+    </body>
+    </html>
+    """
+
+    def __init__(self, project: str, all_sast: AllSAST) -> None:
+        """Initialize the ReportEngine.
+
+        Args:
+            project: The name of the project.
+            all_sast: The AllSAST instance.
+
+        """
+        self.project = project
+        self.all_sast = all_sast
+        self.report_dir = all_sast.output_dir / project / "report"
+
+        self.result = all_sast.parser.load_from_output_dir(project_name=project)
+        self.report_data = self.result.prepare_report_data()
+
+    def generate_single_defect(self, file_data: dict) -> tuple:
+        """Generate the HTML report for a single file with defects."""
+        from rich.console import Console
+        from rich.style import Style
+        from rich.syntax import Syntax
+        from rich.table import Table
+        from rich.text import Text
+
+        file_report_name = (
+            f"{sha256(file_data['source_path'].encode()).hexdigest()}.html"
+        )
+        file_page = Console(record=True, file=io.StringIO())
+
+        # Defect stat table
+        file_stats_table = Table(title="")
+        for key in list(self.report_data["files"].values())[0]["count"].keys():
+            file_stats_table.add_column(key.replace("_", " ").title(), justify="center")
+
+        rendered_scores = []
+        for v in file_data["count"].values():
+            if isinstance(v, float):
+                rendered_scores.append(f"~{v}")
+            else:
+                rendered_scores.append(str(v))
+
+        file_stats_table.add_row(*rendered_scores)
+        file_page.print(file_stats_table)
+
+        file_report_redirect = Text(
+            shorten_path(file_data["source_path"], 60),
+            style=Style(link=file_report_name),
+        )
+
+        # Defect table
+        defect_table = Table(title="", show_lines=True)
+        defect_table.add_column("Location", justify="center")
+        defect_table.add_column("SAST", justify="center")
+        defect_table.add_column("CWE", justify="center")
+        defect_table.add_column("Message")
+        rows = []
+        for defect in file_data["defects"]:
+            groups = group_successive(defect.lines)
+            if groups:
+                for group in groups:
+                    start, end = group[0], group[-1]
+                    shortcut = Text(f"{start}", style=Style(link=f"#L{start}"))
+                    cwe_link = (
+                        Text(
+                            f"CWE-{defect.cwe.id}",
+                            style=Style(
+                                link=f"https://cwe.mitre.org/data/definitions/{defect.cwe.id}.html"
+                            ),
+                        )
+                        if defect.cwe.id != -1
+                        else "None"
+                    )
+                    rows.append(
+                        (start, shortcut, defect.sast, cwe_link, defect.message)
+                    )
+            else:
+                cwe_link = (
+                    Text(
+                        f"CWE-{defect.cwe.id}",
+                        style=Style(
+                            link=f"https://cwe.mitre.org/data/definitions/{defect.cwe.id}.html"
+                        ),
+                    )
+                    if defect.cwe.id != -1
+                    else "None"
+                )
+                rows.append(
+                    (float("inf"), "None", defect.sast, cwe_link, defect.message)
+                )
+
+        for row in sorted(rows, key=lambda r: r[0]):
+            defect_table.add_row(*row[1:])
+        file_page.print(defect_table)
+
+        # Syntax
+        if not Path(file_data["source_path"]).is_file():
+            tippy_calls = ""
+            print(f"Source file {file_data['source_path']} not found, skipping it...")
+        else:
+            syntax = Syntax.from_path(file_data["source_path"], line_numbers=True)
+            tooltips = {}
+            highlights = {}
+            for location in file_data["locations"]:
+                sast, cwe, message, (start, end) = location
+                for i in range(start, end + 1):
+                    text = (
+                        f"<b>{sast}</b>: <i>{message} (CWE-{cwe.id})</i>"
+                        if cwe.id != -1
+                        else f"<b>{sast}</b>: <i>{message}</i>"
+                    )
+                    if highlights.get(i):
+                        highlights[i].add(text)
+                    else:
+                        highlights[i] = {text}
+
+            for line, texts in highlights.items():
+                element_id = f"L{line}"
+                bgcolor = "red" if len(texts) > 1 else "yellow"
+                syntax.stylize_range(
+                    Style(bgcolor=bgcolor, link=f"HACK{element_id}"),
+                    start=(line, 0),
+                    end=(line + 1, 0),
+                )
+                tooltips[element_id] = "<hr>".join(text for text in texts)
+
+            tippy_calls = ""
+            for element_id, content in tooltips.items():
+                tippy_calls += f"""tippy('#{element_id}', {{ content: `{content.replace("`", "\\`")}`, allowHTML: true, interactive: true }});\n"""
+
+            file_page.print(syntax)
+
+        html_content = file_page.export_html(code_format=self.TEMPLATE)
+        html_content = html_content.replace('href="HACK', 'id="')
+        html_content = html_content.replace("[name]", file_data["source_path"])
+        html_content = html_content.replace("[tippy_calls]", tippy_calls)
+
+        report_file = self.report_dir / file_report_name
+        report_file.write_text(html_content)
+
+        return file_report_redirect, rendered_scores
+
+    def generate(self) -> None:
+        """Generate the HTML report.
+
+        Creates the report directory and generates HTML files for the main view
+        and for each file with defects.
+        """
+        from rich.console import Console
+        from rich.progress import track
+        from rich.table import Table
+
+        self.TEMPLATE = self.TEMPLATE.replace(
+            "[sasts]", ", ".join(sast_name for sast_name in self.result.sast_names)
+        )
+
+        home_page = Console(record=True, file=io.StringIO())
+
+        main_table = Table(title="")
+        main_table.add_column("Files")
+        for key in list(self.report_data["files"].values())[0]["score"].keys():
+            main_table.add_column(
+                key.replace("_", " ").title(), justify="center", no_wrap=True
+            )
+
+        for file_data in track(
+            self.report_data["files"].values(),
+            description="Generating report for source file with defects...",
+        ):
+            file_report_redirect, rendered_scores = self.generate_single_defect(
+                file_data
+            )
+            main_table.add_row(file_report_redirect, *rendered_scores)
+
+        home_page.print(main_table)
+        html_content = home_page.export_html(code_format=self.TEMPLATE)
+        html_content = html_content.replace("[name]", f"Project: {self.project}")
+
+        report_home_file = self.report_dir / "home.html"
+        report_home_file.write_text(html_content)

From 2a49111bc764a16583dc367cf9372a05f2e5cc36 Mon Sep 17 00:00:00 2001
From: Villon CHEN <villon.chen@oppida.fr>
Date: Mon, 24 Nov 2025 15:34:33 +0100
Subject: [PATCH 2/4] feat(report): add defect count by type

Previously, the report was displaying the score as the count instead of the real count which was not provided..
---
 codesectools/sasts/all/parser.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/codesectools/sasts/all/parser.py b/codesectools/sasts/all/parser.py
index f932bef..715e586 100644
--- a/codesectools/sasts/all/parser.py
+++ b/codesectools/sasts/all/parser.py
@@ -192,6 +192,12 @@ def stats_by_scores(self) -> dict:
                     "defects_same_location_same_cwe": defects_same_location_same_cwe
                     * 8,
                 },
+                "count": {
+                    "defect_number": len(defects),
+                    "defects_same_cwe": defects_same_cwe,
+                    "defects_same_location": defects_same_location,
+                    "defects_same_location_same_cwe": defects_same_location_same_cwe,
+                },
             }
 
         return stats
@@ -223,6 +229,7 @@ def prepare_report_data(self) -> dict:
 
             report["defects"][defect_file] = {
                 "score": scores[defect_file]["score"],
+                "count": scores[defect_file]["count"],
                 "source_path": str(self.source_path / defect.filepath),
                 "locations": locations,
                 "raw": defects,

From b4b981b3c84735b1f4629c1d570932c6730de847 Mon Sep 17 00:00:00 2001
From: Villon CHEN <villon.chen@oppida.fr>
Date: Mon, 24 Nov 2025 15:42:17 +0100
Subject: [PATCH 3/4] refactor(report): correct `defect` to `file`

The main object in the report represents a "file" containing "defects" not "defect" alone.
---
 codesectools/sasts/all/parser.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/codesectools/sasts/all/parser.py b/codesectools/sasts/all/parser.py
index 715e586..2c3641d 100644
--- a/codesectools/sasts/all/parser.py
+++ b/codesectools/sasts/all/parser.py
@@ -204,7 +204,7 @@ def stats_by_scores(self) -> dict:
 
     def prepare_report_data(self) -> dict:
         """Prepare data needed to generate a report."""
-        report = {"score": {}, "defects": {}}
+        report = {"score": {}, "files": {}}
         scores = self.stats_by_scores()
 
         report["score"] = {k: 0 for k, _ in list(scores.values())[0]["score"].items()}
@@ -227,18 +227,18 @@ def prepare_report_data(self) -> dict:
                         (defect.sast, defect.cwe, defect.message, (start, end))
                     )
 
-            report["defects"][defect_file] = {
+            report["files"][defect_file] = {
                 "score": scores[defect_file]["score"],
                 "count": scores[defect_file]["count"],
                 "source_path": str(self.source_path / defect.filepath),
                 "locations": locations,
-                "raw": defects,
+                "defects": defects,
             }
 
-        report["defects"] = {
+        report["files"] = {
             k: v
             for k, v in sorted(
-                report["defects"].items(),
+                report["files"].items(),
                 key=lambda item: (sum(v for v in item[1]["score"].values())),
                 reverse=True,
             )

From c81cc79e6341d84003a4d0713a1f596e199c8fbe Mon Sep 17 00:00:00 2001
From: Villon CHEN <villon.chen@oppida.fr>
Date: Mon, 24 Nov 2025 17:57:34 +0100
Subject: [PATCH 4/4] chore(release): bump project version

---
 pyproject.toml | 2 +-
 uv.lock        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8808a1a..6d990f6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "CodeSecTools"
-version = "0.13.5"
+version = "0.13.6"
 description = "A framework for code security that provides abstractions for static analysis tools and datasets to support their integration, testing, and evaluation."
 readme = "README.md"
 license = "AGPL-3.0-only"
diff --git a/uv.lock b/uv.lock
index b16e6ba..104af77 100644
--- a/uv.lock
+++ b/uv.lock
@@ -221,7 +221,7 @@ wheels = [
 
 [[package]]
 name = "codesectools"
-version = "0.13.5"
+version = "0.13.6"
 source = { editable = "." }
 dependencies = [
     { name = "gitpython" },