From 7b56b91fce36d80833a31509060327bbc35b4c50 Mon Sep 17 00:00:00 2001 From: Villon CHEN Date: Wed, 19 Nov 2025 14:17:42 +0100 Subject: [PATCH 1/9] perf: add tuna for import time profiling --- Makefile | 3 +++ pyproject.toml | 1 + uv.lock | 11 +++++++++++ 3 files changed, 15 insertions(+) diff --git a/Makefile b/Makefile index ba19f48..5c124b6 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,9 @@ check: ## Lint, format, and type-check the code @ruff format @ty check +profile: ## Run profiling + @python3 -X importtime -m codesectools 1>/dev/null 2>/tmp/import.log || tuna /tmp/import.log + test: ## Run tests in a Docker container @docker compose build 1>/dev/null @docker compose run --rm no-sast diff --git a/pyproject.toml b/pyproject.toml index a9d8dbd..781c8f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ Repository = "https://github.com/OPPIDA/CodeSecTools" dev = [ "pre-commit>=4.3.0", "ruff>=0.12.8", + "tuna>=0.5.11", "ty>=0.0.1a17", "types-lxml>=2025.8.25", ] diff --git a/uv.lock b/uv.lock index a964ef6..ffcefb7 100644 --- a/uv.lock +++ b/uv.lock @@ -240,6 +240,7 @@ dependencies = [ dev = [ { name = "pre-commit" }, { name = "ruff" }, + { name = "tuna" }, { name = "ty" }, { name = "types-lxml" }, ] @@ -290,6 +291,7 @@ requires-dist = [ { name = "ruff", marker = "extra == 'docs'", specifier = ">=0.12.8" }, { name = "termynal", marker = "extra == 'docs'", specifier = ">=0.13.1" }, { name = "tqdm", specifier = ">=4.67.1" }, + { name = "tuna", marker = "extra == 'dev'", specifier = ">=0.5.11" }, { name = "ty", marker = "extra == 'dev'", specifier = ">=0.0.1a17" }, { name = "typer", specifier = ">=0.16.1" }, { name = "types-lxml", marker = "extra == 'dev'", specifier = ">=2025.8.25" }, @@ -1585,6 +1587,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" }, ] +[[package]] +name = "tuna" +version = "0.5.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/88/fb/5bf0865b2fdb44c0c62af24e77b5fe1bcfae4282b982a954fe7984587595/tuna-0.5.11.tar.gz", hash = "sha256:d47f3e39e80af961c8df016ac97d1643c3c60b5eb451299da0ab5fe411d8866c", size = 150600, upload-time = "2021-12-18T22:11:19.551Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d6/07/c115a27adb5228bdf78d0c2366637c5b1630427f879c674f7bab4e6eb637/tuna-0.5.11-py3-none-any.whl", hash = "sha256:ab352a6d836014ace585ecd882148f1f7c68be9ea4bf9e9298b7127594dab2ef", size = 149682, upload-time = "2021-12-18T22:11:16.716Z" }, +] + [[package]] name = "ty" version = "0.0.1a22" From b0f6cae533fd6acccd178c65a69e440a03ed815d Mon Sep 17 00:00:00 2001 From: Villon CHEN Date: Wed, 19 Nov 2025 14:24:39 +0100 Subject: [PATCH 2/9] perf: lazy load heavy modules to improve application startup time --- codesectools/cli.py | 6 ++++-- codesectools/datasets/BenchmarkJava/dataset.py | 6 +++--- .../datasets/JulietTestSuiteC/dataset.py | 7 ++++--- codesectools/datasets/core/dataset.py | 11 +++++++---- codesectools/sasts/all/cli.py | 18 ++++++++++++------ codesectools/sasts/core/cli.py | 17 ++++++++++------- codesectools/sasts/core/sast/__init__.py | 6 ++++-- codesectools/sasts/core/sast/requirements.py | 12 ++++++++---- codesectools/sasts/tools/Coverity/parser.py | 6 +++--- codesectools/sasts/tools/Cppcheck/parser.py | 7 ++++--- codesectools/shared/cloc.py | 6 +++--- codesectools/shared/cwe.py | 6 +++--- 12 files changed, 65 insertions(+), 43 deletions(-) diff --git a/codesectools/cli.py b/codesectools/cli.py index 4304bf1..425e73c 100755 --- a/codesectools/cli.py +++ b/codesectools/cli.py @@ -4,7 +4,6 @@ It dynamically discovers and adds CLI commands from all available SAST tools. """ -import importlib.metadata import os from typing import Optional @@ -12,7 +11,6 @@ import typer.core from click import Choice from rich import print -from rich.table import Table from typing_extensions import Annotated from codesectools.datasets import DATASETS_ALL @@ -26,6 +24,8 @@ def version_callback(value: bool) -> None: """Print the application version and exit.""" + import importlib.metadata + if value: print(importlib.metadata.version("codesectools")) raise typer.Exit() @@ -65,6 +65,8 @@ def status( ] = False, ) -> None: """Display the availability of SAST tools and datasets.""" + from rich.table import Table + if sasts or (not sasts and not datasets): table = Table(show_lines=True) table.add_column("SAST", justify="center", no_wrap=True) diff --git a/codesectools/datasets/BenchmarkJava/dataset.py b/codesectools/datasets/BenchmarkJava/dataset.py index 1dd192a..ed41682 100644 --- a/codesectools/datasets/BenchmarkJava/dataset.py +++ b/codesectools/datasets/BenchmarkJava/dataset.py @@ -10,8 +10,6 @@ from pathlib import Path from typing import Self -import git - from codesectools.datasets.core.dataset import File, PrebuiltFileDataset from codesectools.shared.cwe import CWE, CWEs @@ -108,7 +106,9 @@ def download_files(self: Self, test: bool = False) -> None: test: If True, reduce the number of test files for faster testing. """ - git.Repo.clone_from( + from git import Repo + + Repo.clone_from( "https://github.com/OWASP-Benchmark/BenchmarkJava.git", self.directory ) diff --git a/codesectools/datasets/JulietTestSuiteC/dataset.py b/codesectools/datasets/JulietTestSuiteC/dataset.py index 78e578b..ccd30f4 100644 --- a/codesectools/datasets/JulietTestSuiteC/dataset.py +++ b/codesectools/datasets/JulietTestSuiteC/dataset.py @@ -13,9 +13,6 @@ from pathlib import Path from typing import Self -import requests -from lxml import etree - from codesectools.datasets.core.dataset import File, PrebuiltFileDataset from codesectools.shared.cwe import CWE, CWEs from codesectools.utils import CPU_COUNT @@ -100,6 +97,8 @@ def download_files(self: Self, test: bool = False) -> None: test: If True, reduce the number of test files for faster testing. """ + import requests + zip_file = io.BytesIO( requests.get( "https://samate.nist.gov/SARD/downloads/test-suites/2017-10-01-juliet-test-suite-for-c-cplusplus-v1-3.zip" @@ -129,6 +128,8 @@ def load_dataset(self) -> list[TestCode]: A list of `TestCode` objects representing the dataset. """ + from lxml import etree + files = [] testcode_dir = self.directory / "C" / "testcases" testcode_paths = { diff --git a/codesectools/datasets/core/dataset.py b/codesectools/datasets/core/dataset.py index e582023..9a63ddf 100644 --- a/codesectools/datasets/core/dataset.py +++ b/codesectools/datasets/core/dataset.py @@ -12,12 +12,9 @@ from pathlib import Path from typing import TYPE_CHECKING -import git import humanize import typer from rich import print -from rich.panel import Panel -from rich.progress import Progress from codesectools.utils import USER_CACHE_DIR @@ -82,6 +79,8 @@ def is_cached(cls) -> bool: def prompt_license_agreement(self) -> None: """Display the dataset's license and prompt the user for agreement.""" + from rich.panel import Panel + panel = Panel( f"""Dataset:\t[b]{self.name}[/b] License:\t[b]{self.license}[/b] @@ -122,6 +121,8 @@ def download_dataset(self, test: bool = False) -> None: test: If True, download a smaller subset of the dataset for testing. """ + from rich.progress import Progress + self.prompt_license_agreement() with Progress() as progress: progress.add_task(f"Downloading [b]{self.name}[/b]...", total=None) @@ -538,7 +539,9 @@ def save(self, dir: Path) -> None: dir: The path to the directory where the repository should be cloned. """ - repo = git.Repo.clone_from(self.url, dir) + from git import Repo + + repo = Repo.clone_from(self.url, dir) repo.git.checkout(self.commit) diff --git a/codesectools/sasts/all/cli.py b/codesectools/sasts/all/cli.py index 32c0e9f..2cd32ca 100644 --- a/codesectools/sasts/all/cli.py +++ b/codesectools/sasts/all/cli.py @@ -8,17 +8,11 @@ import typer from click import Choice from rich import print -from rich.console import Console -from rich.style import Style -from rich.syntax import Syntax -from rich.table import Table -from rich.text import Text from typing_extensions import Annotated from codesectools.datasets import DATASETS_ALL from codesectools.datasets.core.dataset import FileDataset, GitRepoDataset from codesectools.sasts import SASTS_ALL -from codesectools.sasts.all.graphics import ProjectGraphics from codesectools.sasts.all.sast import AllSAST from codesectools.sasts.core.sast import PrebuiltBuildlessSAST, PrebuiltSAST from codesectools.utils import group_successive @@ -37,6 +31,8 @@ def main() -> None: @cli.command(help="List used SAST tools.") def info() -> None: """Display the status of all SAST tools and their inclusion in AllSAST.""" + from rich.table import Table + table = Table(show_lines=True) table.add_column("SAST", justify="center", no_wrap=True) table.add_column("Status", justify="center", no_wrap=True) @@ -156,6 +152,8 @@ def benchmark( @cli.command(name="list", help="List existing analysis results.") def list_() -> None: """List existing analysis results for projects and datasets.""" + from rich.table import Table + table = Table(show_lines=True) table.add_column("Name", justify="center", no_wrap=True) table.add_column("Type", justify="center", no_wrap=True) @@ -218,6 +216,8 @@ def plot( ] = False, ) -> None: """Generate and display plots for a project's aggregated analysis results.""" + from codesectools.sasts.all.graphics import ProjectGraphics + project_graphics = ProjectGraphics(project_name=project) project_graphics.export(overwrite=overwrite, show=show, pgf=pgf) @@ -239,6 +239,12 @@ def report( ] = False, ) -> None: """Generate an HTML report for a project's aggregated analysis results.""" + from rich.console import Console + from rich.style import Style + from rich.syntax import Syntax + from rich.table import Table + from rich.text import Text + report_dir = all_sast.output_dir / project / "report" if report_dir.is_dir(): if overwrite: diff --git a/codesectools/sasts/core/cli.py b/codesectools/sasts/core/cli.py index 63ee62f..d7d61e0 100644 --- a/codesectools/sasts/core/cli.py +++ b/codesectools/sasts/core/cli.py @@ -12,17 +12,10 @@ import typer from click import Choice from rich import print -from rich.panel import Panel -from rich.table import Table from typing_extensions import Annotated from codesectools.datasets import DATASETS_ALL from codesectools.datasets.core.dataset import FileDataset, GitRepoDataset -from codesectools.sasts.core.graphics import ( - FileDatasetGraphics, - GitRepoDatasetGraphics, - ProjectGraphics, -) from codesectools.sasts.core.sast import SAST, PrebuiltBuildlessSAST, PrebuiltSAST @@ -107,6 +100,8 @@ def add_install(self: Self, help: str = "") -> None: @self.cli.command(help=help) def install() -> None: """Display installation instructions for missing requirements.""" + from rich.panel import Panel + install_help = "" sast_reqs = self.sast.requirements missing_reqs = sast_reqs.get_missing() @@ -272,6 +267,8 @@ def add_list(self, help: str = "") -> None: @self.cli.command(help=help) def list() -> None: """List available analysis results.""" + from rich.table import Table + table = Table(show_lines=True) table.add_column("Name", justify="center", no_wrap=True) table.add_column("Type", justify="center", no_wrap=True) @@ -341,6 +338,12 @@ def plot( pgf: If True, export figures in PGF format for LaTeX documents. """ + from codesectools.sasts.core.graphics import ( + FileDatasetGraphics, + GitRepoDatasetGraphics, + ProjectGraphics, + ) + if result in self.sast.list_results(project=True): project = result project_graphics = ProjectGraphics(self.sast, project_name=project) diff --git a/codesectools/sasts/core/sast/__init__.py b/codesectools/sasts/core/sast/__init__.py index c874570..30fc19f 100644 --- a/codesectools/sasts/core/sast/__init__.py +++ b/codesectools/sasts/core/sast/__init__.py @@ -16,8 +16,6 @@ from typing import Any, Literal, Union from rich import print -from rich.panel import Panel -from rich.progress import Progress from codesectools.datasets import DATASETS_ALL from codesectools.datasets.core.dataset import ( @@ -113,6 +111,8 @@ def run_analysis( **kwargs: Additional tool-specific arguments. """ + from rich.progress import Progress + render_variables = {"{lang}": lang} for k, v in kwargs.items(): if v is None: @@ -357,6 +357,8 @@ def analyze_files( testing: If True, run analysis on a sample of two random files for testing. """ + from rich.panel import Panel + if not dataset.is_built(): prebuilt_dir, prebuilt_glob = dataset.prebuilt_expected panel = Panel( diff --git a/codesectools/sasts/core/sast/requirements.py b/codesectools/sasts/core/sast/requirements.py index 99e1007..9332fa5 100644 --- a/codesectools/sasts/core/sast/requirements.py +++ b/codesectools/sasts/core/sast/requirements.py @@ -5,12 +5,8 @@ from pathlib import Path from typing import Any, Literal, Self -import requests import typer -from git import Repo from rich import print -from rich.panel import Panel -from rich.progress import Progress from codesectools.utils import USER_CACHE_DIR, USER_CONFIG_DIR @@ -193,6 +189,10 @@ def is_fulfilled(self, **kwargs: Any) -> bool: def download(self, **kwargs: Any) -> None: """Prompt for license agreement and clone the Git repository.""" + from git import Repo + from rich.panel import Panel + from rich.progress import Progress + panel = Panel( f"""Repository:\t[b]{self.name}[/b] Repository URL:\t[u]{self.repo_url.rstrip(".git")}[/u] @@ -264,6 +264,10 @@ def is_fulfilled(self, **kwargs: Any) -> bool: def download(self, **kwargs: Any) -> None: """Prompt for license agreement and download the file.""" + import requests + from rich.panel import Panel + from rich.progress import Progress + panel = Panel( f"""File:\t\t[b]{self.name}[/b] Download URL:\t[u]{self.file_url}[/u] diff --git a/codesectools/sasts/tools/Coverity/parser.py b/codesectools/sasts/tools/Coverity/parser.py index ff8268f..40697ed 100644 --- a/codesectools/sasts/tools/Coverity/parser.py +++ b/codesectools/sasts/tools/Coverity/parser.py @@ -10,9 +10,6 @@ from pathlib import Path from typing import Self -import xmltodict -import yaml - from codesectools.sasts.core.parser import AnalysisResult, Defect from codesectools.shared.cwe import CWEs from codesectools.utils import USER_CONFIG_DIR, MissingFile @@ -199,6 +196,9 @@ def load_from_output_dir(cls, output_dir: Path) -> Self: MissingFile: If a required result file is not found. """ + import xmltodict + import yaml + cmdout = json.load((output_dir / "cstools_output.json").open()) # Analysis metrics diff --git a/codesectools/sasts/tools/Cppcheck/parser.py b/codesectools/sasts/tools/Cppcheck/parser.py index 50f409d..bdb6819 100644 --- a/codesectools/sasts/tools/Cppcheck/parser.py +++ b/codesectools/sasts/tools/Cppcheck/parser.py @@ -9,9 +9,6 @@ from pathlib import Path from typing import Self -from lxml import etree -from lxml.etree import ElementTree - from codesectools.sasts.core.parser import AnalysisResult, Defect from codesectools.shared.cwe import CWE, CWEs from codesectools.utils import MissingFile @@ -50,6 +47,8 @@ def __init__( class CppcheckAnalysisResult(AnalysisResult): """Represent the complete result of a Cppcheck analysis.""" + from lxml.etree import ElementTree + def __init__(self, output_dir: Path, xml_tree: ElementTree, cmdout: dict) -> None: """Initialize a CppcheckAnalysisResult instance. @@ -108,6 +107,8 @@ def load_from_output_dir(cls, output_dir: Path) -> Self: MissingFile: If a required result file is not found. """ + from lxml import etree + # Cmdout cmdout = json.load((output_dir / "cstools_output.json").open()) diff --git a/codesectools/shared/cloc.py b/codesectools/shared/cloc.py index 200462e..747e248 100644 --- a/codesectools/shared/cloc.py +++ b/codesectools/shared/cloc.py @@ -9,8 +9,6 @@ import shutil from pathlib import Path -import git - from codesectools.utils import USER_CACHE_DIR, MissingFile, NonZeroExit, run_command @@ -45,6 +43,8 @@ def __init__(self, dir: Path, lang: str) -> None: lang: The programming language to count. """ + from git import Repo + self.dir = dir self.lang = self.cloc_names[lang] if shutil.which("cloc"): @@ -53,7 +53,7 @@ def __init__(self, dir: Path, lang: str) -> None: if shutil.which("perl"): cloc_repo = USER_CACHE_DIR / "cloc" if not cloc_repo.is_dir(): - repo = git.Repo.clone_from( + repo = Repo.clone_from( "https://github.com/AlDanial/cloc.git", cloc_repo, depth=1, diff --git a/codesectools/shared/cwe.py b/codesectools/shared/cwe.py index 997258f..f99853b 100644 --- a/codesectools/shared/cwe.py +++ b/codesectools/shared/cwe.py @@ -10,9 +10,6 @@ import zipfile from typing import Self -import requests -from rich.progress import Progress - from codesectools.utils import USER_CACHE_DIR @@ -109,6 +106,9 @@ def __init__(self) -> None: def download(self) -> None: """Download CWE data from the official MITRE website.""" + import requests + from rich.progress import Progress + with Progress() as progress: task = progress.add_task( "[red]Downloading CWEs from [b]cwe.mitre.org[/b]...", total=100 From 78e583b4d8b6729193009bb2dfc32913bd69a0b2 Mon Sep 17 00:00:00 2001 From: Villon CHEN Date: Wed, 19 Nov 2025 14:27:24 +0100 Subject: [PATCH 3/9] perf(sasts): lazy load SAST instance to load only when necessary --- codesectools/sasts/__init__.py | 104 ++++++++++++++++++++++++--------- 1 file changed, 78 insertions(+), 26 deletions(-) diff --git a/codesectools/sasts/__init__.py b/codesectools/sasts/__init__.py index cad09d0..8c2a617 100644 --- a/codesectools/sasts/__init__.py +++ b/codesectools/sasts/__init__.py @@ -16,38 +16,90 @@ import importlib -import typer - +from codesectools.sasts.core.cli import CLIFactory from codesectools.sasts.core.sast import SAST, AnalysisResult +from codesectools.sasts.core.sast.properties import SASTProperties +from codesectools.sasts.core.sast.requirements import SASTRequirement from codesectools.utils import SASTS_DIR + +class LazySASTLoader: + """Lazily load SAST tool components to avoid premature imports.""" + + def __init__(self, name: str) -> None: + """Initialize the lazy loader. + + Args: + name: The name of the SAST tool to load. + + """ + self.name = name + self.loaded = False + + def _load(self) -> None: + """Import the SAST modules and classes on first access.""" + if not self.loaded: + sast_module = importlib.import_module( + f"codesectools.sasts.tools.{self.name}.sast" + ) + + self.sast: SAST = getattr(sast_module, f"{self.name}SAST") + self.sast_instance: SAST = self.sast() + self.analysis_result: AnalysisResult = getattr( + sast_module, f"{self.name}AnalysisResult" + ) + + self.cli_module = importlib.import_module( + f"codesectools.sasts.tools.{self.name}.cli" + ) + self.cli_factory: CLIFactory = getattr( + self.cli_module, f"{self.name}CLIFactory" + ) + + self._data = { + "status": self.sast_instance.status, + "missing": self.sast_instance.missing, + "properties": self.sast_instance.properties, + "sast": self.sast, + "analysis_result": self.analysis_result, + "cli_factory": self.cli_factory, + } + + self.loaded = True + + def __getitem__( + self, name: str + ) -> ( + str + | list[SASTRequirement] + | SASTProperties + | SAST + | AnalysisResult + | CLIFactory + ): + """Provide dictionary-like access to the loaded SAST components.""" + self._load() + return self._data[name] + + def __setitem__( + self, + name: str, + value: str + | list[SASTRequirement] + | SASTProperties + | SAST + | AnalysisResult + | CLIFactory, + ) -> None: + """Provide dictionary-like write access to the loaded SAST components.""" + self._load() + self._data[name] = value + + SASTS_ALL = {} for child in (SASTS_DIR / "tools").iterdir(): if child.is_dir(): sast_name = child.name - - sast_module = importlib.import_module( - f"codesectools.sasts.tools.{sast_name}.sast" - ) - - sast: SAST = getattr(sast_module, f"{sast_name}SAST") - sast_instance = sast() - analysis_result: AnalysisResult = getattr( - sast_module, f"{sast_name}AnalysisResult" - ) - - cli_module = importlib.import_module( - f"codesectools.sasts.tools.{sast_name}.cli" - ) - cli_factory: typer.Typer = getattr(cli_module, f"{sast_name}CLIFactory") - - SASTS_ALL[sast_name] = { - "status": sast_instance.status, - "missing": sast_instance.missing, - "properties": sast_instance.properties, - "sast": sast, - "analysis_result": analysis_result, - "cli_factory": cli_factory, - } + SASTS_ALL[sast_name] = LazySASTLoader(sast_name) SASTS_ALL = dict(sorted(SASTS_ALL.items())) From fb089bfe101b16447602dd78a32d57e8da09a5f6 Mon Sep 17 00:00:00 2001 From: Villon CHEN Date: Wed, 19 Nov 2025 14:27:40 +0100 Subject: [PATCH 4/9] perf(datasets): lazy load Dataset instance to load only when necessary --- codesectools/datasets/__init__.py | 43 ++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/codesectools/datasets/__init__.py b/codesectools/datasets/__init__.py index d9be38d..c13b211 100644 --- a/codesectools/datasets/__init__.py +++ b/codesectools/datasets/__init__.py @@ -12,21 +12,52 @@ """ import importlib +from typing import Any from codesectools.datasets.core.dataset import Dataset from codesectools.utils import DATASETS_DIR + +class LazyDatasetLoader: + """Lazily load a dataset class to avoid premature imports.""" + + def __init__(self, name: str) -> None: + """Initialize the lazy loader. + + Args: + name: The name of the dataset to load. + + """ + self.name = name + self.loaded = False + + def _load(self) -> None: + """Import the dataset module and class on first access.""" + if not self.loaded: + self.dataset_module = importlib.import_module( + f"codesectools.datasets.{self.name}.dataset" + ) + self.dataset: Dataset = getattr(self.dataset_module, self.name) + + self.loaded = True + + def __call__(self, *args: Any, **kwargs: Any) -> Dataset: + """Create an instance of the loaded dataset class.""" + self._load() + return self.dataset(*args, **kwargs) + + def __getattr__(self, name: str) -> Any: # noqa: ANN401 + """Proxy attribute access to the loaded dataset class.""" + self._load() + return getattr(self.dataset, name) + + DATASETS_ALL = {} for child in DATASETS_DIR.iterdir(): if child.is_dir(): if list(child.glob("dataset.py")) and child.name != "core": dataset_name = child.name - dataset_module = importlib.import_module( - f"codesectools.datasets.{dataset_name}.dataset" - ) - dataset: Dataset = getattr(dataset_module, dataset_name) - - DATASETS_ALL[dataset_name] = dataset + DATASETS_ALL[dataset_name] = LazyDatasetLoader(dataset_name) DATASETS_ALL = dict(sorted(DATASETS_ALL.items())) From 16b8f3725fa3a37f9754d207af013e406ce57a42 Mon Sep 17 00:00:00 2001 From: Villon CHEN Date: Wed, 19 Nov 2025 14:28:37 +0100 Subject: [PATCH 5/9] perf(Coverity): lazy load and cache config files --- codesectools/sasts/tools/Coverity/parser.py | 110 +++++++++++++------- codesectools/sasts/tools/Coverity/sast.py | 7 +- 2 files changed, 74 insertions(+), 43 deletions(-) diff --git a/codesectools/sasts/tools/Coverity/parser.py b/codesectools/sasts/tools/Coverity/parser.py index 40697ed..4f7dc4c 100644 --- a/codesectools/sasts/tools/Coverity/parser.py +++ b/codesectools/sasts/tools/Coverity/parser.py @@ -7,6 +7,7 @@ import json import re +from functools import lru_cache from pathlib import Path from typing import Self @@ -14,42 +15,73 @@ from codesectools.shared.cwe import CWEs from codesectools.utils import USER_CONFIG_DIR, MissingFile -"""Loads and provides configuration for the Coverity integration. - -This module reads `issueTypes.json` and `config.json` from the user's -Coverity configuration directory. It creates mappings and settings -used by the Coverity SAST integration. - -Attributes: - USER_COVERITY_DIR (Path): The path to the user's Coverity config directory. - TYPE_TO_CWE (dict): A mapping from Coverity issue types to CWE IDs. - LANGUAGES (dict): Configuration for supported languages. - COLOR_MAPPING (dict): A mapping of result categories to colors for plotting. - -""" - USER_COVERITY_DIR = USER_CONFIG_DIR / "Coverity" -types_file = USER_COVERITY_DIR / "issueTypes.json" - -if types_file.is_file(): - TYPES = json.load(types_file.open())["issue_type"] - TYPE_TO_CWE = {} - for type in TYPES: - TYPE_TO_CWE[type["type"]] = type["cim_checker_properties"]["cweCategory"] -else: - TYPE_TO_CWE = {} - -config_file = USER_COVERITY_DIR / "config.json" - -if config_file.is_file(): - config = json.load(config_file.open()) - LANGUAGES = config["languages"] - COLOR_MAPPING = config["color_mapping"] -else: - LANGUAGES = {} - COLOR_MAPPING = {} +class CoverityConfig: + """Handle the loading and parsing of Coverity configuration files.""" + + def __init__(self) -> None: + """Initialize the CoverityConfig instance.""" + self._type_to_cwe = None + self._languages = None + self._color_mapping = None + + @staticmethod + @lru_cache(maxsize=None) + def _load_issue_types_file() -> dict | None: + """Load and parse the issueTypes.json file.""" + types_file = USER_COVERITY_DIR / "issueTypes.json" + if types_file.is_file(): + return json.load(types_file.open()) + return None + + @staticmethod + @lru_cache(maxsize=None) + def _load_config_file() -> dict | None: + """Load and parse the config.json file.""" + config_file = USER_COVERITY_DIR / "config.json" + if config_file.is_file(): + return json.load(config_file.open()) + return None + + @property + def type_to_cwe(self) -> dict: + """Get a mapping from Coverity issue types to CWE IDs.""" + if self._type_to_cwe is None: + types_data = self._load_issue_types_file() + if types_data and "issue_type" in types_data: + self._type_to_cwe = { + type_info["type"]: type_info["cim_checker_properties"][ + "cweCategory" + ] + for type_info in types_data["issue_type"] + } + else: + self._type_to_cwe = {} + return self._type_to_cwe + + @property + def languages(self) -> dict: + """Get the language configuration for Coverity.""" + if self._languages is None: + config_data = self._load_config_file() + if config_data and "languages" in config_data: + self._languages = config_data["languages"] + else: + self._languages = {} + return self._languages + + @property + def color_mapping(self) -> dict: + """Get the color mapping for Coverity issue categories.""" + if self._color_mapping is None: + config_data = self._load_config_file() + if config_data and "color_mapping" in config_data: + self._color_mapping = config_data["color_mapping"] + else: + self._color_mapping = {} + return self._color_mapping class CoverityDefect(Defect): @@ -78,7 +110,7 @@ def __init__(self, defect_data: dict) -> None: filepath=Path(defect_data["file"]), checker=defect_data["checker"], category=None, - cwe=CWEs.from_id(TYPE_TO_CWE.get(defect_data["type"], -1)), + cwe=CWEs.from_id(CoverityConfig().type_to_cwe.get(defect_data["type"], -1)), message="", # TODO lines=[defect_data["line"]], data=defect_data, @@ -91,10 +123,10 @@ def __init__(self, defect_data: dict) -> None: elif self.checker.startswith("FB"): self.category = "SPOTBUGS" else: - if self.lang in LANGUAGES.keys(): - for set_name, checker_set in LANGUAGES[self.lang][ - "checker_sets" - ].items(): + if self.lang in CoverityConfig().languages.keys(): + for set_name, checker_set in ( + CoverityConfig().languages[self.lang]["checker_sets"].items() + ): if self.checker in checker_set: self.category = set_name break @@ -158,7 +190,7 @@ def __init__( self.files = list(map(lambda line: str(Path(line)), captured_list.splitlines())) file_count = 0 - for lang, pattern in LANGUAGES.items(): + for lang, pattern in CoverityConfig().languages.items(): include = pattern["include"] exclude = pattern["exclude"] files = [ diff --git a/codesectools/sasts/tools/Coverity/sast.py b/codesectools/sasts/tools/Coverity/sast.py index a70bcf2..55d172c 100644 --- a/codesectools/sasts/tools/Coverity/sast.py +++ b/codesectools/sasts/tools/Coverity/sast.py @@ -10,9 +10,8 @@ from codesectools.sasts.core.sast.properties import SASTProperties from codesectools.sasts.core.sast.requirements import Binary, Config, SASTRequirements from codesectools.sasts.tools.Coverity.parser import ( - COLOR_MAPPING, - LANGUAGES, CoverityAnalysisResult, + CoverityConfig, ) @@ -35,7 +34,7 @@ class CoveritySAST(BuildlessSAST): """ name = "Coverity" - supported_languages = LANGUAGES.keys() + supported_languages = CoverityConfig().languages.keys() supported_dataset_names = ["BenchmarkJava", "CVEfixes"] properties = SASTProperties(free=False, offline=True) requirements = SASTRequirements( @@ -77,4 +76,4 @@ class CoveritySAST(BuildlessSAST): (Path("idir", "output", "*.xml"), False), ] parser = CoverityAnalysisResult - color_mapping = COLOR_MAPPING + color_mapping = CoverityConfig().color_mapping From 710a34daaaae946250b6251e7247ef3e8d17e820 Mon Sep 17 00:00:00 2001 From: Villon CHEN Date: Wed, 19 Nov 2025 14:29:16 +0100 Subject: [PATCH 6/9] perf(cwes): lazy load CWEs --- codesectools/shared/cwe.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/codesectools/shared/cwe.py b/codesectools/shared/cwe.py index f99853b..321176c 100644 --- a/codesectools/shared/cwe.py +++ b/codesectools/shared/cwe.py @@ -97,12 +97,18 @@ def __init__(self) -> None: } self.directory = USER_CACHE_DIR / "cwe" self.NOCWE = CWE(id=-1, name=" Missing or invalid CWE", description="None") + self._cwes = None - try: - self.cwes = self.load() - except FileNotFoundError: + if not self.directory.is_dir(): self.download() - self.cwes = self.load() + + @property + def cwes(self) -> list[CWE]: + """Get the list of all CWEs, loading them if necessary.""" + if not self._cwes: + self._cwes = self.load() + + return self._cwes def download(self) -> None: """Download CWE data from the official MITRE website.""" From 0d6c9e60ec5df5106daca2c5056ee58f1b9dce92 Mon Sep 17 00:00:00 2001 From: Villon CHEN Date: Wed, 19 Nov 2025 14:42:51 +0100 Subject: [PATCH 7/9] perf(graphics): close plot figures to reduce memory usage --- codesectools/sasts/all/graphics.py | 2 ++ codesectools/sasts/core/graphics.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/codesectools/sasts/all/graphics.py b/codesectools/sasts/all/graphics.py index bb3d37c..fba5aeb 100644 --- a/codesectools/sasts/all/graphics.py +++ b/codesectools/sasts/all/graphics.py @@ -91,6 +91,8 @@ def export(self, overwrite: bool, pgf: bool, show: bool) -> None: fig.savefig(figure_path_pgf, bbox_inches="tight") print(f"Figure {fig_name} exported to pgf") + plt.close(fig) + ## Single project class ProjectGraphics(Graphics): diff --git a/codesectools/sasts/core/graphics.py b/codesectools/sasts/core/graphics.py index 7f9c210..14d0cd0 100644 --- a/codesectools/sasts/core/graphics.py +++ b/codesectools/sasts/core/graphics.py @@ -109,6 +109,8 @@ def export(self, overwrite: bool, pgf: bool, show: bool) -> None: fig.savefig(figure_path_pgf, bbox_inches="tight") print(f"Figure {fig_name} exported to pgf") + plt.close(fig) + ## Single project class ProjectGraphics(Graphics): From cae4277e10e99c8dcf839235f196a57e5b57da12 Mon Sep 17 00:00:00 2001 From: Villon CHEN Date: Wed, 19 Nov 2025 14:35:11 +0100 Subject: [PATCH 8/9] fix(tests): disable other logging to avoid closing buffer issue in Click testing --- tests/conftest.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 2475ab3..2c176c5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,12 +2,16 @@ import hashlib import json +import logging import os from pathlib import Path from types import GeneratorType import pytest +# Fix: I/O operation on closed (https://github.com/pallets/click/issues/824) +logging.getLogger("matplotlib").setLevel(logging.ERROR) + test_type = os.environ.get("TEST_TYPE") state_file = Path(f".pytest_cache/state_{test_type}.json") From 210daf20035f95d7b4ab5c97c2bb07963882520c Mon Sep 17 00:00:00 2001 From: Villon CHEN Date: Wed, 19 Nov 2025 14:35:31 +0100 Subject: [PATCH 9/9] chore(release): bump project version --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 781c8f7..af0086d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "CodeSecTools" -version = "0.13.0" +version = "0.13.1" description = "A framework for code security that provides abstractions for static analysis tools and datasets to support their integration, testing, and evaluation." readme = "README.md" license = "AGPL-3.0-only" diff --git a/uv.lock b/uv.lock index ffcefb7..29889e4 100644 --- a/uv.lock +++ b/uv.lock @@ -221,7 +221,7 @@ wheels = [ [[package]] name = "codesectools" -version = "0.13.0" +version = "0.13.1" source = { editable = "." } dependencies = [ { name = "gitpython" },