Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/codegen/git/utils/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
raise ValueError(msg)

# Initialize counters for each language
language_counts = Counter()

Check failure on line 63 in src/codegen/git/utils/language.py

View workflow job for this annotation

GitHub Actions / mypy

error: Need type annotation for "language_counts" [var-annotated]
total_files = 0

# Walk through the directory
Expand Down Expand Up @@ -124,7 +124,7 @@
raise ValueError(msg)

# Initialize counters for each language
language_counts = Counter()

Check failure on line 127 in src/codegen/git/utils/language.py

View workflow job for this annotation

GitHub Actions / mypy

error: Need type annotation for "language_counts" [var-annotated]
total_files = 0

# Initiate RepoOperator
Expand All @@ -132,8 +132,11 @@
repo_config = RepoConfig.from_repo_path(repo_path=git_root)
repo_operator = RepoOperator(repo_config=repo_config)

# Use the specified subfolder path for language detection if provided
subdirs = [base_path] if base_path else None

# Walk through the directory
for rel_path, _ in repo_operator.iter_files(subdirs=[base_path] if base_path else None, ignore_list=GLOBAL_FILE_IGNORE_LIST):
for rel_path, _ in repo_operator.iter_files(subdirs=subdirs, ignore_list=GLOBAL_FILE_IGNORE_LIST):
# Convert to Path object
file_path = Path(git_root) / Path(rel_path)

Expand Down
14 changes: 10 additions & 4 deletions src/codegen/sdk/codebase/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,23 +46,29 @@
repo_path = os.path.abspath(path)
git_root, base_path = split_git_path(repo_path)
subdirectories = [base_path] if base_path else None
programming_language = programming_language or determine_project_language(repo_path)

# Only determine project language if not explicitly provided
detected_language = programming_language or determine_project_language(repo_path)

repo_config = RepoConfig.from_repo_path(repo_path=git_root)
repo_config.language = programming_language
repo_config.language = detected_language
repo_config.subdirectories = subdirectories
# Create main project
return cls(
repo_operator=RepoOperator(repo_config=repo_config),
programming_language=programming_language,
programming_language=detected_language,
base_path=base_path,
subdirectories=subdirectories,
)

@classmethod
def from_repo_operator(cls, repo_operator: RepoOperator, programming_language: ProgrammingLanguage | None = None, base_path: str | None = None) -> Self:
# Only determine project language if not explicitly provided
detected_language = programming_language or determine_project_language(repo_operator.repo_path)

Check warning on line 67 in src/codegen/sdk/codebase/config.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/sdk/codebase/config.py#L67

Added line #L67 was not covered by tests

return cls(
repo_operator=repo_operator,
programming_language=programming_language or determine_project_language(repo_operator.repo_path),
programming_language=detected_language,
base_path=base_path,
subdirectories=[base_path] if base_path else None,
)
4 changes: 2 additions & 2 deletions src/codegen/sdk/core/codebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@
TExport = TypeVar("TExport", bound="Export", default=Export)
TSGlobalVar = TypeVar("TSGlobalVar", bound="Assignment", default=Assignment)
PyGlobalVar = TypeVar("PyGlobalVar", bound="Assignment", default=Assignment)
TSDirectory = Directory[TSFile, TSSymbol, TSImportStatement, TSGlobalVar, TSClass, TSFunction, TSImport]

Check failure on line 112 in src/codegen/sdk/core/codebase.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot resolve name "TSDirectory" (possible cyclic definition) [misc]
PyDirectory = Directory[PyFile, PySymbol, PyImportStatement, PyGlobalVar, PyClass, PyFunction, PyImport]

Check failure on line 113 in src/codegen/sdk/core/codebase.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot resolve name "PyDirectory" (possible cyclic definition) [misc]


@apidoc
Expand Down Expand Up @@ -200,22 +200,22 @@
if repo_path is not None:
main_project = ProjectConfig.from_path(
repo_path,
programming_language=ProgrammingLanguage(language.upper()) if language else None,
programming_language=ProgrammingLanguage(language.upper()) if isinstance(language, str) and language else language,
)
projects = [main_project]
else:
main_project = projects[0]

Check failure on line 207 in src/codegen/sdk/core/codebase.py

View workflow job for this annotation

GitHub Actions / mypy

error: Value of type "list[ProjectConfig] | None" is not indexable [index]

# Initialize codebase
self._op = main_project.repo_operator
self.viz = VisualizationManager(op=self._op)
self.repo_path = Path(self._op.repo_path)
self.ctx = CodebaseContext(projects, config=config, secrets=secrets, io=io, progress=progress)

Check failure on line 213 in src/codegen/sdk/core/codebase.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 1 to "CodebaseContext" has incompatible type "list[ProjectConfig] | None"; expected "list[ProjectConfig]" [arg-type]
self.console = Console(record=True, soft_wrap=True)
if self.ctx.config.use_pink != PinkMode.OFF:
import codegen_sdk_pink

self._pink_codebase = codegen_sdk_pink.Codebase(self.repo_path)

Check failure on line 218 in src/codegen/sdk/core/codebase.py

View workflow job for this annotation

GitHub Actions / mypy

error: Module has no attribute "Codebase" [attr-defined]

@noapidoc
def __str__(self) -> str:
Expand All @@ -230,7 +230,7 @@
yield "nodes", len(self.ctx.nodes)
yield "edges", len(self.ctx.edges)

__rich_repr__.angular = ANGULAR_STYLE

Check failure on line 233 in src/codegen/sdk/core/codebase.py

View workflow job for this annotation

GitHub Actions / mypy

error: "Callable[[Codebase[TSourceFile, TDirectory, TSymbol, TClass, TFunction, TImport, TGlobalVar, TInterface, TTypeAlias, TParameter, TCodeBlock]], Iterable[Any | tuple[Any] | tuple[str, Any] | tuple[str, Any, Any]]]" has no attribute "angular" [attr-defined]

@property
@deprecated("Please do not use the local repo operator directly")
Expand Down Expand Up @@ -272,8 +272,8 @@

@noapidoc
def _symbols(self, symbol_type: SymbolType | None = None) -> list[TSymbol | TClass | TFunction | TGlobalVar]:
matches: list[Symbol] = self.ctx.get_nodes(NodeType.SYMBOL)

Check failure on line 275 in src/codegen/sdk/core/codebase.py

View workflow job for this annotation

GitHub Actions / mypy

error: Incompatible types in assignment (expression has type "list[Importable[Any]]", variable has type "list[Symbol[Any, Any]]") [assignment]
return [x for x in matches if x.is_top_level and (symbol_type is None or x.symbol_type == symbol_type)]

Check failure on line 276 in src/codegen/sdk/core/codebase.py

View workflow job for this annotation

GitHub Actions / mypy

error: List comprehension has incompatible type List[Symbol[Any, Any]]; expected List[TSymbol | TClass | TFunction | TGlobalVar] [misc]

# =====[ Node Types ]=====
@overload
Expand Down Expand Up @@ -1392,7 +1392,7 @@
logger.info("Initializing Codebase...")
project = ProjectConfig.from_repo_operator(
repo_operator=repo_operator,
programming_language=ProgrammingLanguage(language.upper()) if language else None,
programming_language=ProgrammingLanguage(language.upper()) if isinstance(language, str) and language else language,
)
codebase = Codebase(projects=[project], config=config, secrets=secrets)
logger.info("Codebase initialization complete")
Expand Down
74 changes: 74 additions & 0 deletions tests/unit/codegen/sdk/codebase/test_language_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import os
import tempfile
from pathlib import Path

from codegen.sdk.codebase.config import ProjectConfig
from codegen.shared.enums.programming_language import ProgrammingLanguage


def test_explicit_language_respected():
"""Test that explicitly provided language is respected and not overridden by detection."""
with tempfile.TemporaryDirectory() as tmp_dir:
# Create a temporary directory with more TypeScript files than Python files
ts_dir = Path(tmp_dir) / "ts"
py_dir = Path(tmp_dir) / "py"
ts_dir.mkdir()
py_dir.mkdir()

# Create TypeScript files
for i in range(5):
with open(ts_dir / f"file{i}.ts", "w") as f:
f.write(f"// TypeScript file {i}")

# Create fewer Python files
for i in range(2):
with open(py_dir / f"file{i}.py", "w") as f:
f.write(f"# Python file {i}")

# Initialize git repo
os.system(f"cd {tmp_dir} && git init && git config user.email 'test@example.com' && git config user.name 'Test User' && git add . && git commit -m 'Initial commit'")

# Test with explicit Python language
project_config = ProjectConfig.from_path(path=str(tmp_dir), programming_language=ProgrammingLanguage.PYTHON)

# Verify that the language is Python, not TypeScript (which would be detected based on file count)
assert project_config.programming_language == ProgrammingLanguage.PYTHON

# Test with explicit TypeScript language
project_config = ProjectConfig.from_path(
path=str(py_dir), # Use Python directory
programming_language=ProgrammingLanguage.TYPESCRIPT,
)

# Verify that the language is TypeScript, not Python (which would be detected based on file count)
assert project_config.programming_language == ProgrammingLanguage.TYPESCRIPT


def test_subfolder_language_detection():
"""Test that language detection respects the specified subfolder."""
with tempfile.TemporaryDirectory() as tmp_dir:
# Create a temporary directory with TypeScript files in root and Python files in subfolder
ts_dir = Path(tmp_dir)
py_dir = Path(tmp_dir) / "python_only"
py_dir.mkdir()

# Create TypeScript files in root
for i in range(5):
with open(ts_dir / f"file{i}.ts", "w") as f:
f.write(f"// TypeScript file {i}")

# Create Python files in subfolder
for i in range(3):
with open(py_dir / f"file{i}.py", "w") as f:
f.write(f"# Python file {i}")

# Initialize git repo
os.system(f"cd {tmp_dir} && git init && git config user.email 'test@example.com' && git config user.name 'Test User' && git add . && git commit -m 'Initial commit'")

# Test with root path - should detect TypeScript
project_config = ProjectConfig.from_path(path=str(tmp_dir))
assert project_config.programming_language == ProgrammingLanguage.TYPESCRIPT

# Test with Python subfolder path - should detect Python
project_config = ProjectConfig.from_path(path=str(py_dir))
assert project_config.programming_language == ProgrammingLanguage.PYTHON