diff --git a/src/codegen/git/utils/language.py b/src/codegen/git/utils/language.py index 551ac4212..8c761e52b 100644 --- a/src/codegen/git/utils/language.py +++ b/src/codegen/git/utils/language.py @@ -132,8 +132,11 @@ def _determine_language_by_git_file_count(folder_path: str) -> ProgrammingLangua repo_config = RepoConfig.from_repo_path(repo_path=git_root) repo_operator = RepoOperator(repo_config=repo_config) + # Use the specified subfolder path for language detection if provided + subdirs = [base_path] if base_path else None + # Walk through the directory - for rel_path, _ in repo_operator.iter_files(subdirs=[base_path] if base_path else None, ignore_list=GLOBAL_FILE_IGNORE_LIST): + for rel_path, _ in repo_operator.iter_files(subdirs=subdirs, ignore_list=GLOBAL_FILE_IGNORE_LIST): # Convert to Path object file_path = Path(git_root) / Path(rel_path) diff --git a/src/codegen/sdk/codebase/config.py b/src/codegen/sdk/codebase/config.py index 25f3c2e0e..0b9900ee2 100644 --- a/src/codegen/sdk/codebase/config.py +++ b/src/codegen/sdk/codebase/config.py @@ -46,23 +46,29 @@ def from_path(cls, path: str, programming_language: ProgrammingLanguage | None = repo_path = os.path.abspath(path) git_root, base_path = split_git_path(repo_path) subdirectories = [base_path] if base_path else None - programming_language = programming_language or determine_project_language(repo_path) + + # Only determine project language if not explicitly provided + detected_language = programming_language or determine_project_language(repo_path) + repo_config = RepoConfig.from_repo_path(repo_path=git_root) - repo_config.language = programming_language + repo_config.language = detected_language repo_config.subdirectories = subdirectories # Create main project return cls( repo_operator=RepoOperator(repo_config=repo_config), - programming_language=programming_language, + programming_language=detected_language, base_path=base_path, subdirectories=subdirectories, ) @classmethod def from_repo_operator(cls, repo_operator: RepoOperator, programming_language: ProgrammingLanguage | None = None, base_path: str | None = None) -> Self: + # Only determine project language if not explicitly provided + detected_language = programming_language or determine_project_language(repo_operator.repo_path) + return cls( repo_operator=repo_operator, - programming_language=programming_language or determine_project_language(repo_operator.repo_path), + programming_language=detected_language, base_path=base_path, subdirectories=[base_path] if base_path else None, ) diff --git a/src/codegen/sdk/core/codebase.py b/src/codegen/sdk/core/codebase.py index fc3e0557e..07701acf8 100644 --- a/src/codegen/sdk/core/codebase.py +++ b/src/codegen/sdk/core/codebase.py @@ -200,7 +200,7 @@ def __init__( if repo_path is not None: main_project = ProjectConfig.from_path( repo_path, - programming_language=ProgrammingLanguage(language.upper()) if language else None, + programming_language=ProgrammingLanguage(language.upper()) if isinstance(language, str) and language else language, ) projects = [main_project] else: @@ -1392,7 +1392,7 @@ def from_repo( logger.info("Initializing Codebase...") project = ProjectConfig.from_repo_operator( repo_operator=repo_operator, - programming_language=ProgrammingLanguage(language.upper()) if language else None, + programming_language=ProgrammingLanguage(language.upper()) if isinstance(language, str) and language else language, ) codebase = Codebase(projects=[project], config=config, secrets=secrets) logger.info("Codebase initialization complete") diff --git a/tests/unit/codegen/sdk/codebase/test_language_detection.py b/tests/unit/codegen/sdk/codebase/test_language_detection.py new file mode 100644 index 000000000..ae112f4c8 --- /dev/null +++ b/tests/unit/codegen/sdk/codebase/test_language_detection.py @@ -0,0 +1,74 @@ +import os +import tempfile +from pathlib import Path + +from codegen.sdk.codebase.config import ProjectConfig +from codegen.shared.enums.programming_language import ProgrammingLanguage + + +def test_explicit_language_respected(): + """Test that explicitly provided language is respected and not overridden by detection.""" + with tempfile.TemporaryDirectory() as tmp_dir: + # Create a temporary directory with more TypeScript files than Python files + ts_dir = Path(tmp_dir) / "ts" + py_dir = Path(tmp_dir) / "py" + ts_dir.mkdir() + py_dir.mkdir() + + # Create TypeScript files + for i in range(5): + with open(ts_dir / f"file{i}.ts", "w") as f: + f.write(f"// TypeScript file {i}") + + # Create fewer Python files + for i in range(2): + with open(py_dir / f"file{i}.py", "w") as f: + f.write(f"# Python file {i}") + + # Initialize git repo + os.system(f"cd {tmp_dir} && git init && git config user.email 'test@example.com' && git config user.name 'Test User' && git add . && git commit -m 'Initial commit'") + + # Test with explicit Python language + project_config = ProjectConfig.from_path(path=str(tmp_dir), programming_language=ProgrammingLanguage.PYTHON) + + # Verify that the language is Python, not TypeScript (which would be detected based on file count) + assert project_config.programming_language == ProgrammingLanguage.PYTHON + + # Test with explicit TypeScript language + project_config = ProjectConfig.from_path( + path=str(py_dir), # Use Python directory + programming_language=ProgrammingLanguage.TYPESCRIPT, + ) + + # Verify that the language is TypeScript, not Python (which would be detected based on file count) + assert project_config.programming_language == ProgrammingLanguage.TYPESCRIPT + + +def test_subfolder_language_detection(): + """Test that language detection respects the specified subfolder.""" + with tempfile.TemporaryDirectory() as tmp_dir: + # Create a temporary directory with TypeScript files in root and Python files in subfolder + ts_dir = Path(tmp_dir) + py_dir = Path(tmp_dir) / "python_only" + py_dir.mkdir() + + # Create TypeScript files in root + for i in range(5): + with open(ts_dir / f"file{i}.ts", "w") as f: + f.write(f"// TypeScript file {i}") + + # Create Python files in subfolder + for i in range(3): + with open(py_dir / f"file{i}.py", "w") as f: + f.write(f"# Python file {i}") + + # Initialize git repo + os.system(f"cd {tmp_dir} && git init && git config user.email 'test@example.com' && git config user.name 'Test User' && git add . && git commit -m 'Initial commit'") + + # Test with root path - should detect TypeScript + project_config = ProjectConfig.from_path(path=str(tmp_dir)) + assert project_config.programming_language == ProgrammingLanguage.TYPESCRIPT + + # Test with Python subfolder path - should detect Python + project_config = ProjectConfig.from_path(path=str(py_dir)) + assert project_config.programming_language == ProgrammingLanguage.PYTHON