Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ check: ## Lint, format, and type-check the code
@ruff format
@ty check

profile: ## Run profiling
@python3 -X importtime -m codesectools 1>/dev/null 2>/tmp/import.log || tuna /tmp/import.log

test: ## Run tests in a Docker container
@docker compose build 1>/dev/null
@docker compose run --rm no-sast
Expand Down
6 changes: 4 additions & 2 deletions codesectools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,13 @@
It dynamically discovers and adds CLI commands from all available SAST tools.
"""

import importlib.metadata
import os
from typing import Optional

import typer
import typer.core
from click import Choice
from rich import print
from rich.table import Table
from typing_extensions import Annotated

from codesectools.datasets import DATASETS_ALL
Expand All @@ -26,6 +24,8 @@

def version_callback(value: bool) -> None:
"""Print the application version and exit."""
import importlib.metadata

if value:
print(importlib.metadata.version("codesectools"))
raise typer.Exit()
Expand Down Expand Up @@ -65,6 +65,8 @@ def status(
] = False,
) -> None:
"""Display the availability of SAST tools and datasets."""
from rich.table import Table

if sasts or (not sasts and not datasets):
table = Table(show_lines=True)
table.add_column("SAST", justify="center", no_wrap=True)
Expand Down
6 changes: 3 additions & 3 deletions codesectools/datasets/BenchmarkJava/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
from pathlib import Path
from typing import Self

import git

from codesectools.datasets.core.dataset import File, PrebuiltFileDataset
from codesectools.shared.cwe import CWE, CWEs

Expand Down Expand Up @@ -108,7 +106,9 @@ def download_files(self: Self, test: bool = False) -> None:
test: If True, reduce the number of test files for faster testing.

"""
git.Repo.clone_from(
from git import Repo

Repo.clone_from(
"https://github.com/OWASP-Benchmark/BenchmarkJava.git", self.directory
)

Expand Down
7 changes: 4 additions & 3 deletions codesectools/datasets/JulietTestSuiteC/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@
from pathlib import Path
from typing import Self

import requests
from lxml import etree

from codesectools.datasets.core.dataset import File, PrebuiltFileDataset
from codesectools.shared.cwe import CWE, CWEs
from codesectools.utils import CPU_COUNT
Expand Down Expand Up @@ -100,6 +97,8 @@ def download_files(self: Self, test: bool = False) -> None:
test: If True, reduce the number of test files for faster testing.

"""
import requests

zip_file = io.BytesIO(
requests.get(
"https://samate.nist.gov/SARD/downloads/test-suites/2017-10-01-juliet-test-suite-for-c-cplusplus-v1-3.zip"
Expand Down Expand Up @@ -129,6 +128,8 @@ def load_dataset(self) -> list[TestCode]:
A list of `TestCode` objects representing the dataset.

"""
from lxml import etree

files = []
testcode_dir = self.directory / "C" / "testcases"
testcode_paths = {
Expand Down
43 changes: 37 additions & 6 deletions codesectools/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,52 @@
"""

import importlib
from typing import Any

from codesectools.datasets.core.dataset import Dataset
from codesectools.utils import DATASETS_DIR


class LazyDatasetLoader:
"""Lazily load a dataset class to avoid premature imports."""

def __init__(self, name: str) -> None:
"""Initialize the lazy loader.

Args:
name: The name of the dataset to load.

"""
self.name = name
self.loaded = False

def _load(self) -> None:
"""Import the dataset module and class on first access."""
if not self.loaded:
self.dataset_module = importlib.import_module(
f"codesectools.datasets.{self.name}.dataset"
)
self.dataset: Dataset = getattr(self.dataset_module, self.name)

self.loaded = True

def __call__(self, *args: Any, **kwargs: Any) -> Dataset:
"""Create an instance of the loaded dataset class."""
self._load()
return self.dataset(*args, **kwargs)

def __getattr__(self, name: str) -> Any: # noqa: ANN401
"""Proxy attribute access to the loaded dataset class."""
self._load()
return getattr(self.dataset, name)


DATASETS_ALL = {}
for child in DATASETS_DIR.iterdir():
if child.is_dir():
if list(child.glob("dataset.py")) and child.name != "core":
dataset_name = child.name

dataset_module = importlib.import_module(
f"codesectools.datasets.{dataset_name}.dataset"
)
dataset: Dataset = getattr(dataset_module, dataset_name)

DATASETS_ALL[dataset_name] = dataset
DATASETS_ALL[dataset_name] = LazyDatasetLoader(dataset_name)

DATASETS_ALL = dict(sorted(DATASETS_ALL.items()))
11 changes: 7 additions & 4 deletions codesectools/datasets/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,9 @@
from pathlib import Path
from typing import TYPE_CHECKING

import git
import humanize
import typer
from rich import print
from rich.panel import Panel
from rich.progress import Progress

from codesectools.utils import USER_CACHE_DIR

Expand Down Expand Up @@ -82,6 +79,8 @@ def is_cached(cls) -> bool:

def prompt_license_agreement(self) -> None:
"""Display the dataset's license and prompt the user for agreement."""
from rich.panel import Panel

panel = Panel(
f"""Dataset:\t[b]{self.name}[/b]
License:\t[b]{self.license}[/b]
Expand Down Expand Up @@ -122,6 +121,8 @@ def download_dataset(self, test: bool = False) -> None:
test: If True, download a smaller subset of the dataset for testing.

"""
from rich.progress import Progress

self.prompt_license_agreement()
with Progress() as progress:
progress.add_task(f"Downloading [b]{self.name}[/b]...", total=None)
Expand Down Expand Up @@ -538,7 +539,9 @@ def save(self, dir: Path) -> None:
dir: The path to the directory where the repository should be cloned.

"""
repo = git.Repo.clone_from(self.url, dir)
from git import Repo

repo = Repo.clone_from(self.url, dir)
repo.git.checkout(self.commit)


Expand Down
104 changes: 78 additions & 26 deletions codesectools/sasts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,38 +16,90 @@

import importlib

import typer

from codesectools.sasts.core.cli import CLIFactory
from codesectools.sasts.core.sast import SAST, AnalysisResult
from codesectools.sasts.core.sast.properties import SASTProperties
from codesectools.sasts.core.sast.requirements import SASTRequirement
from codesectools.utils import SASTS_DIR


class LazySASTLoader:
"""Lazily load SAST tool components to avoid premature imports."""

def __init__(self, name: str) -> None:
"""Initialize the lazy loader.

Args:
name: The name of the SAST tool to load.

"""
self.name = name
self.loaded = False

def _load(self) -> None:
"""Import the SAST modules and classes on first access."""
if not self.loaded:
sast_module = importlib.import_module(
f"codesectools.sasts.tools.{self.name}.sast"
)

self.sast: SAST = getattr(sast_module, f"{self.name}SAST")
self.sast_instance: SAST = self.sast()
self.analysis_result: AnalysisResult = getattr(
sast_module, f"{self.name}AnalysisResult"
)

self.cli_module = importlib.import_module(
f"codesectools.sasts.tools.{self.name}.cli"
)
self.cli_factory: CLIFactory = getattr(
self.cli_module, f"{self.name}CLIFactory"
)

self._data = {
"status": self.sast_instance.status,
"missing": self.sast_instance.missing,
"properties": self.sast_instance.properties,
"sast": self.sast,
"analysis_result": self.analysis_result,
"cli_factory": self.cli_factory,
}

self.loaded = True

def __getitem__(
self, name: str
) -> (
str
| list[SASTRequirement]
| SASTProperties
| SAST
| AnalysisResult
| CLIFactory
):
"""Provide dictionary-like access to the loaded SAST components."""
self._load()
return self._data[name]

def __setitem__(
self,
name: str,
value: str
| list[SASTRequirement]
| SASTProperties
| SAST
| AnalysisResult
| CLIFactory,
) -> None:
"""Provide dictionary-like write access to the loaded SAST components."""
self._load()
self._data[name] = value


SASTS_ALL = {}
for child in (SASTS_DIR / "tools").iterdir():
if child.is_dir():
sast_name = child.name

sast_module = importlib.import_module(
f"codesectools.sasts.tools.{sast_name}.sast"
)

sast: SAST = getattr(sast_module, f"{sast_name}SAST")
sast_instance = sast()
analysis_result: AnalysisResult = getattr(
sast_module, f"{sast_name}AnalysisResult"
)

cli_module = importlib.import_module(
f"codesectools.sasts.tools.{sast_name}.cli"
)
cli_factory: typer.Typer = getattr(cli_module, f"{sast_name}CLIFactory")

SASTS_ALL[sast_name] = {
"status": sast_instance.status,
"missing": sast_instance.missing,
"properties": sast_instance.properties,
"sast": sast,
"analysis_result": analysis_result,
"cli_factory": cli_factory,
}
SASTS_ALL[sast_name] = LazySASTLoader(sast_name)

SASTS_ALL = dict(sorted(SASTS_ALL.items()))
18 changes: 12 additions & 6 deletions codesectools/sasts/all/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,11 @@
import typer
from click import Choice
from rich import print
from rich.console import Console
from rich.style import Style
from rich.syntax import Syntax
from rich.table import Table
from rich.text import Text
from typing_extensions import Annotated

from codesectools.datasets import DATASETS_ALL
from codesectools.datasets.core.dataset import FileDataset, GitRepoDataset
from codesectools.sasts import SASTS_ALL
from codesectools.sasts.all.graphics import ProjectGraphics
from codesectools.sasts.all.sast import AllSAST
from codesectools.sasts.core.sast import PrebuiltBuildlessSAST, PrebuiltSAST
from codesectools.utils import group_successive
Expand All @@ -37,6 +31,8 @@ def main() -> None:
@cli.command(help="List used SAST tools.")
def info() -> None:
"""Display the status of all SAST tools and their inclusion in AllSAST."""
from rich.table import Table

table = Table(show_lines=True)
table.add_column("SAST", justify="center", no_wrap=True)
table.add_column("Status", justify="center", no_wrap=True)
Expand Down Expand Up @@ -156,6 +152,8 @@ def benchmark(
@cli.command(name="list", help="List existing analysis results.")
def list_() -> None:
"""List existing analysis results for projects and datasets."""
from rich.table import Table

table = Table(show_lines=True)
table.add_column("Name", justify="center", no_wrap=True)
table.add_column("Type", justify="center", no_wrap=True)
Expand Down Expand Up @@ -218,6 +216,8 @@ def plot(
] = False,
) -> None:
"""Generate and display plots for a project's aggregated analysis results."""
from codesectools.sasts.all.graphics import ProjectGraphics

project_graphics = ProjectGraphics(project_name=project)
project_graphics.export(overwrite=overwrite, show=show, pgf=pgf)

Expand All @@ -239,6 +239,12 @@ def report(
] = False,
) -> None:
"""Generate an HTML report for a project's aggregated analysis results."""
from rich.console import Console
from rich.style import Style
from rich.syntax import Syntax
from rich.table import Table
from rich.text import Text

report_dir = all_sast.output_dir / project / "report"
if report_dir.is_dir():
if overwrite:
Expand Down
2 changes: 2 additions & 0 deletions codesectools/sasts/all/graphics.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ def export(self, overwrite: bool, pgf: bool, show: bool) -> None:
fig.savefig(figure_path_pgf, bbox_inches="tight")
print(f"Figure {fig_name} exported to pgf")

plt.close(fig)


## Single project
class ProjectGraphics(Graphics):
Expand Down
Loading