diff --git a/codesectools/datasets/BenchmarkJava/dataset.py b/codesectools/datasets/BenchmarkJava/dataset.py index ec25095..1dd192a 100644 --- a/codesectools/datasets/BenchmarkJava/dataset.py +++ b/codesectools/datasets/BenchmarkJava/dataset.py @@ -148,16 +148,17 @@ def load_dataset(self) -> list[TestCode]: for row in reader: filename = f"{row[0]}.java" filepath = testcode_dir / filename - content = filepath.read_text() - cwes = [CWEs.from_id(int(row[3]))] - has_vuln = True if row[2] == "true" else False - files.append( - TestCode( - filepath.relative_to(self.directory), - content, - cwes, - has_vuln, + if filepath.is_file(): + content = filepath.read_text() + cwes = [CWEs.from_id(int(row[3]))] + has_vuln = True if row[2] == "true" else False + files.append( + TestCode( + filepath.relative_to(self.directory), + content, + cwes, + has_vuln, + ) ) - ) return files diff --git a/codesectools/datasets/core/dataset.py b/codesectools/datasets/core/dataset.py index 97ed03e..e582023 100644 --- a/codesectools/datasets/core/dataset.py +++ b/codesectools/datasets/core/dataset.py @@ -57,12 +57,17 @@ def __init__(self, lang: str | None = None) -> None: """ self.directory = USER_CACHE_DIR / self.name self.lang = lang + self._files = [] if self.lang: self.full_name = f"{self.name}_{self.lang}" assert self.full_name in self.list_dataset_full_names() - self.files: list[File] = self.load_dataset() - else: - self.files = [] + + @property + def files(self) -> list: + """Get the list of dataset files, loading them if necessary.""" + if self.lang: + self._files = self.load_dataset() + return self._files @classmethod def is_cached(cls) -> bool: diff --git a/pyproject.toml b/pyproject.toml index 168d496..a997f14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "CodeSecTools" -version = "0.12.2" +version = "0.12.4" description = "A framework for code security that provides abstractions for static analysis tools and datasets to support their integration, testing, and evaluation." readme = "README.md" license = "AGPL-3.0-only" diff --git a/uv.lock b/uv.lock index 4eff334..de3b98f 100644 --- a/uv.lock +++ b/uv.lock @@ -221,7 +221,7 @@ wheels = [ [[package]] name = "codesectools" -version = "0.12.2" +version = "0.12.4" source = { editable = "." } dependencies = [ { name = "gitpython" },