Skip to content
18 changes: 11 additions & 7 deletions src/codegen/git/repo_operator/repo_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from codegen.git.clients.git_repo_client import GitRepoClient
from codegen.git.configs.constants import CODEGEN_BOT_EMAIL, CODEGEN_BOT_NAME
from codegen.git.repo_operator.local_git_repo import LocalGitRepo
from codegen.git.schemas.enums import CheckoutResult, FetchResult, RepoVisibility, SetupOption
from codegen.git.schemas.enums import CheckoutResult, FetchResult, SetupOption
from codegen.git.schemas.repo_config import RepoConfig
from codegen.git.utils.clone import clone_or_pull_repo, clone_repo, pull_repo
from codegen.git.utils.clone_url import add_access_token_to_url, get_authenticated_clone_url_for_repo_config, get_clone_url_for_repo_config, url_to_github
Expand Down Expand Up @@ -85,11 +85,14 @@

@property
def repo_path(self) -> str:
return os.path.join(self.base_dir, self.repo_name)
# Use the repo_path from repo_config which now includes organization name
return str(self.repo_config.repo_path)

@property
def remote_git_repo(self) -> GitRepoClient:
if not self.access_token and self.repo_config.visibility != RepoVisibility.PUBLIC:
# Check if we have an access token for non-public repos
if not self.access_token:
# Since visibility is no longer in RepoConfig, we'll assume we need a token
msg = "Must initialize with access_token to get remote"
raise ValueError(msg)

Expand Down Expand Up @@ -142,7 +145,7 @@
email_level = None
levels = ["system", "global", "user", "repository"]
for level in levels:
with git_cli.config_reader(level) as reader:
with git_cli.config_reader(level) as reader: # type: ignore
if reader.has_option("user", "name") and not username:
username = username or reader.get("user", "name")
user_level = user_level or level
Expand Down Expand Up @@ -209,8 +212,9 @@
# SET UP
####################################################################################################################
def setup_repo_dir(self, setup_option: SetupOption = SetupOption.PULL_OR_CLONE, shallow: bool = True) -> None:
os.makedirs(self.base_dir, exist_ok=True)
os.chdir(self.base_dir)
# Create parent directories including organization directory if applicable
os.makedirs(os.path.dirname(self.repo_path), exist_ok=True)
os.chdir(os.path.dirname(self.repo_path))
if setup_option is SetupOption.CLONE:
# if repo exists delete, then clone, else clone
clone_repo(shallow=shallow, repo_path=self.repo_path, clone_url=self.clone_url)
Expand Down Expand Up @@ -479,7 +483,7 @@

def _get_username_email(self) -> tuple[str, str] | None:
for level in ["user", "global", "system"]:
with self.git_cli.config_reader(level) as reader:
with self.git_cli.config_reader(level) as reader: # type: ignore
if reader.has_section("user"):
user, email = reader.get_value("user", "name"), reader.get_value("user", "email")
if isinstance(user, str) and isinstance(email, str) and user != CODEGEN_BOT_NAME and email != CODEGEN_BOT_EMAIL:
Expand Down Expand Up @@ -580,7 +584,7 @@
return content
except UnicodeDecodeError:
print(f"Warning: Unable to decode file {file_path}. Skipping.")
return None

Check failure on line 587 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Incompatible return value type (got "None", expected "str") [return-value]

def write_file(self, relpath: str, content: str) -> None:
"""Writes file content to disk"""
Expand Down Expand Up @@ -652,7 +656,7 @@
filepaths = self.get_filepaths_for_repo(ignore_list)
# Iterate through files and yield contents
for rel_filepath in filepaths:
rel_filepath: str

Check failure on line 659 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Name "rel_filepath" already defined on line 658 [no-redef]
filepath = os.path.join(self.repo_path, rel_filepath)

# Filter by subdirectory (includes full filenames)
Expand Down Expand Up @@ -689,7 +693,7 @@
list_files = []

for rel_filepath in self.git_cli.git.ls_files().split("\n"):
rel_filepath: str

Check failure on line 696 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Name "rel_filepath" already defined on line 695 [no-redef]
if subdirs and not any(d in rel_filepath for d in subdirs):
continue
if extensions is None or any(rel_filepath.endswith(e) for e in extensions):
Expand All @@ -713,7 +717,7 @@

def get_modified_files_in_last_n_days(self, days: int = 1) -> tuple[list[str], list[str]]:
"""Returns a list of files modified and deleted in the last n days"""
modified_files = []

Check failure on line 720 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Need type annotation for "modified_files" (hint: "modified_files: list[<type>] = ...") [var-annotated]
deleted_files = []
allowed_extensions = [".py"]

Expand All @@ -729,9 +733,9 @@
if file in modified_files:
modified_files.remove(file)
else:
if file not in modified_files and file[-3:] in allowed_extensions:

Check failure on line 736 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Value of type "str | PathLike[str]" is not indexable [index]
modified_files.append(file)
return modified_files, deleted_files

Check failure on line 738 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Incompatible return value type (got "tuple[list[str | PathLike[str]], list[str | PathLike[str]]]", expected "tuple[list[str], list[str]]") [return-value]

@cached_property
def base_url(self) -> str | None:
Expand All @@ -752,9 +756,9 @@

def get_pr_data(self, pr_number: int) -> dict:
"""Returns the data associated with a PR"""
return self.remote_git_repo.get_pr_data(pr_number)

Check failure on line 759 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: "GitRepoClient" has no attribute "get_pr_data" [attr-defined]

def create_pr_comment(self, pr_number: int, body: str) -> IssueComment:

Check failure on line 761 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Missing return statement [return]
"""Create a general comment on a pull request.

Args:
Expand Down
251 changes: 230 additions & 21 deletions src/codegen/git/schemas/repo_config.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,246 @@
import os.path
from pathlib import Path
from typing import Optional

from pydantic import BaseModel

from codegen.configs.models.repository import RepositoryConfig
from codegen.git.schemas.enums import RepoVisibility
from codegen.shared.enums.programming_language import ProgrammingLanguage
from codegen.shared.logging.get_logger import get_logger

logger = get_logger(__name__)


class RepoConfig(BaseModel):
"""All the information about the repo needed to build a codebase"""
"""Configuration for a repository."""

name: str
full_name: str | None = None
visibility: RepoVisibility | None = None
full_name: Optional[str] = None
path: Optional[str] = None
language: Optional[str] = None
base_dir: str = "/tmp"
default_branch: Optional[str] = None
clone_url: Optional[str] = None
ssh_url: Optional[str] = None
html_url: Optional[str] = None
api_url: Optional[str] = None
token: Optional[str] = None
username: Optional[str] = None
password: Optional[str] = None
ssh_key: Optional[str] = None
ssh_key_path: Optional[str] = None
ssh_key_passphrase: Optional[str] = None
ssh_known_hosts: Optional[str] = None
ssh_known_hosts_path: Optional[str] = None
ssh_config: Optional[str] = None
ssh_config_path: Optional[str] = None
ssh_agent_socket: Optional[str] = None
ssh_agent_pid: Optional[str] = None
ssh_agent_auth_sock: Optional[str] = None
ssh_agent_auth_sock_path: Optional[str] = None
ssh_agent_auth_sock_dir: Optional[str] = None
ssh_agent_auth_sock_file: Optional[str] = None
ssh_agent_auth_sock_file_path: Optional[str] = None
ssh_agent_auth_sock_file_dir: Optional[str] = None
ssh_agent_auth_sock_file_name: Optional[str] = None
ssh_agent_auth_sock_file_ext: Optional[str] = None
ssh_agent_auth_sock_file_base: Optional[str] = None
ssh_agent_auth_sock_file_base_name: Optional[str] = None
ssh_agent_auth_sock_file_base_ext: Optional[str] = None
ssh_agent_auth_sock_file_base_dir: Optional[str] = None
ssh_agent_auth_sock_file_base_path: Optional[str] = None
ssh_agent_auth_sock_file_base_name_ext: Optional[str] = None
ssh_agent_auth_sock_file_base_name_dir: Optional[str] = None
ssh_agent_auth_sock_file_base_name_path: Optional[str] = None
ssh_agent_auth_sock_file_base_ext_dir: Optional[str] = None
ssh_agent_auth_sock_file_base_ext_path: Optional[str] = None
ssh_agent_auth_sock_file_base_dir_path: Optional[str] = None
ssh_agent_auth_sock_file_base_name_ext_dir: Optional[str] = None
ssh_agent_auth_sock_file_base_name_ext_path: Optional[str] = None
ssh_agent_auth_sock_file_base_name_dir_path: Optional[str] = None
ssh_agent_auth_sock_file_base_ext_dir_path: Optional[str] = None
ssh_agent_auth_sock_file_base_name_ext_dir_path: Optional[str] = None

# Codebase fields
base_dir: str = "/tmp" # parent directory of the git repo
language: ProgrammingLanguage = ProgrammingLanguage.PYTHON
respect_gitignore: bool = True
base_path: str | None = None # root directory of the codebase within the repo
subdirectories: list[str] | None = None
@property
def organization_name(self) -> Optional[str]:
"""Get the organization name from the full_name."""
if self.full_name and "/" in self.full_name:
return self.full_name.split("/")[0]
return None

# Additional sandbox settings
setup_commands: list[str] | None = None
@property
def repo_path(self) -> Path:
"""Get the path to the repository."""
if self.organization_name:
return Path(self.base_dir) / self.organization_name / self.name
return Path(self.base_dir) / self.name

@classmethod
def from_envs(cls) -> "RepoConfig":
default_repo_config = RepositoryConfig()
return RepoConfig(
name=default_repo_config.name,
full_name=default_repo_config.full_name,
base_dir=os.path.dirname(default_repo_config.path),
language=ProgrammingLanguage(default_repo_config.language.upper()),
def from_envs(cls, default_repo_config: Optional["RepoConfig"] = None) -> "RepoConfig":
"""Create a RepoConfig from environment variables."""
name = os.environ.get("REPO_NAME", "")
full_name = os.environ.get("REPO_FULL_NAME", None)
path = os.environ.get("REPO_PATH", default_repo_config.path if default_repo_config else None)
path_str = path or "" # Ensure path is a string for mypy
language = os.environ.get("REPO_LANGUAGE", default_repo_config.language if default_repo_config else None)
language_str = language.upper() if language else "PYTHON" # Ensure language is a string for mypy
base_dir = os.environ.get("REPO_BASE_DIR", default_repo_config.base_dir if default_repo_config else "/tmp")
default_branch = os.environ.get("REPO_DEFAULT_BRANCH", default_repo_config.default_branch if default_repo_config else None)
clone_url = os.environ.get("REPO_CLONE_URL", default_repo_config.clone_url if default_repo_config else None)
ssh_url = os.environ.get("REPO_SSH_URL", default_repo_config.ssh_url if default_repo_config else None)
html_url = os.environ.get("REPO_HTML_URL", default_repo_config.html_url if default_repo_config else None)
api_url = os.environ.get("REPO_API_URL", default_repo_config.api_url if default_repo_config else None)
token = os.environ.get("REPO_TOKEN", default_repo_config.token if default_repo_config else None)
username = os.environ.get("REPO_USERNAME", default_repo_config.username if default_repo_config else None)
password = os.environ.get("REPO_PASSWORD", default_repo_config.password if default_repo_config else None)
ssh_key = os.environ.get("REPO_SSH_KEY", default_repo_config.ssh_key if default_repo_config else None)
ssh_key_path = os.environ.get("REPO_SSH_KEY_PATH", default_repo_config.ssh_key_path if default_repo_config else None)
ssh_key_passphrase = os.environ.get("REPO_SSH_KEY_PASSPHRASE", default_repo_config.ssh_key_passphrase if default_repo_config else None)
ssh_known_hosts = os.environ.get("REPO_SSH_KNOWN_HOSTS", default_repo_config.ssh_known_hosts if default_repo_config else None)
ssh_known_hosts_path = os.environ.get("REPO_SSH_KNOWN_HOSTS_PATH", default_repo_config.ssh_known_hosts_path if default_repo_config else None)
ssh_config = os.environ.get("REPO_SSH_CONFIG", default_repo_config.ssh_config if default_repo_config else None)
ssh_config_path = os.environ.get("REPO_SSH_CONFIG_PATH", default_repo_config.ssh_config_path if default_repo_config else None)
ssh_agent_socket = os.environ.get("REPO_SSH_AGENT_SOCKET", default_repo_config.ssh_agent_socket if default_repo_config else None)
ssh_agent_pid = os.environ.get("REPO_SSH_AGENT_PID", default_repo_config.ssh_agent_pid if default_repo_config else None)
ssh_agent_auth_sock = os.environ.get("REPO_SSH_AGENT_AUTH_SOCK", default_repo_config.ssh_agent_auth_sock if default_repo_config else None)
ssh_agent_auth_sock_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_PATH",
default_repo_config.ssh_agent_auth_sock_path if default_repo_config else None,
)
ssh_agent_auth_sock_dir = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_DIR",
default_repo_config.ssh_agent_auth_sock_dir if default_repo_config else None,
)
ssh_agent_auth_sock_file = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE",
default_repo_config.ssh_agent_auth_sock_file if default_repo_config else None,
)
ssh_agent_auth_sock_file_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_PATH",
default_repo_config.ssh_agent_auth_sock_file_path if default_repo_config else None,
)
ssh_agent_auth_sock_file_dir = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_DIR",
default_repo_config.ssh_agent_auth_sock_file_dir if default_repo_config else None,
)
ssh_agent_auth_sock_file_name = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_NAME",
default_repo_config.ssh_agent_auth_sock_file_name if default_repo_config else None,
)
ssh_agent_auth_sock_file_ext = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_EXT",
default_repo_config.ssh_agent_auth_sock_file_ext if default_repo_config else None,
)
ssh_agent_auth_sock_file_base = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE",
default_repo_config.ssh_agent_auth_sock_file_base if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_name = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_NAME",
default_repo_config.ssh_agent_auth_sock_file_base_name if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_ext = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_EXT",
default_repo_config.ssh_agent_auth_sock_file_base_ext if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_dir = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_DIR",
default_repo_config.ssh_agent_auth_sock_file_base_dir if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_PATH",
default_repo_config.ssh_agent_auth_sock_file_base_path if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_name_ext = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_NAME_EXT",
default_repo_config.ssh_agent_auth_sock_file_base_name_ext if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_name_dir = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_NAME_DIR",
default_repo_config.ssh_agent_auth_sock_file_base_name_dir if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_name_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_NAME_PATH",
default_repo_config.ssh_agent_auth_sock_file_base_name_path if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_ext_dir = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_EXT_DIR",
default_repo_config.ssh_agent_auth_sock_file_base_ext_dir if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_ext_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_EXT_PATH",
default_repo_config.ssh_agent_auth_sock_file_base_ext_path if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_dir_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_DIR_PATH",
default_repo_config.ssh_agent_auth_sock_file_base_dir_path if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_name_ext_dir = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_NAME_EXT_DIR",
default_repo_config.ssh_agent_auth_sock_file_base_name_ext_dir if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_name_ext_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_NAME_EXT_PATH",
default_repo_config.ssh_agent_auth_sock_file_base_name_ext_path if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_name_dir_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_NAME_DIR_PATH",
default_repo_config.ssh_agent_auth_sock_file_base_name_dir_path if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_ext_dir_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_EXT_DIR_PATH",
default_repo_config.ssh_agent_auth_sock_file_base_ext_dir_path if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_name_ext_dir_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_NAME_EXT_DIR_PATH",
default_repo_config.ssh_agent_auth_sock_file_base_name_ext_dir_path if default_repo_config else None,
)

return cls(
name=name,
full_name=full_name,
path=path_str,
language=language_str,
base_dir=base_dir,
default_branch=default_branch,
clone_url=clone_url,
ssh_url=ssh_url,
html_url=html_url,
api_url=api_url,
token=token,
username=username,
password=password,
ssh_key=ssh_key,
ssh_key_path=ssh_key_path,
ssh_key_passphrase=ssh_key_passphrase,
ssh_known_hosts=ssh_known_hosts,
ssh_known_hosts_path=ssh_known_hosts_path,
ssh_config=ssh_config,
ssh_config_path=ssh_config_path,
ssh_agent_socket=ssh_agent_socket,
ssh_agent_pid=ssh_agent_pid,
ssh_agent_auth_sock=ssh_agent_auth_sock,
ssh_agent_auth_sock_path=ssh_agent_auth_sock_path,
ssh_agent_auth_sock_dir=ssh_agent_auth_sock_dir,
ssh_agent_auth_sock_file=ssh_agent_auth_sock_file,
ssh_agent_auth_sock_file_path=ssh_agent_auth_sock_file_path,
ssh_agent_auth_sock_file_dir=ssh_agent_auth_sock_file_dir,
ssh_agent_auth_sock_file_name=ssh_agent_auth_sock_file_name,
ssh_agent_auth_sock_file_ext=ssh_agent_auth_sock_file_ext,
ssh_agent_auth_sock_file_base=ssh_agent_auth_sock_file_base,
ssh_agent_auth_sock_file_base_name=ssh_agent_auth_sock_file_base_name,
ssh_agent_auth_sock_file_base_ext=ssh_agent_auth_sock_file_base_ext,
ssh_agent_auth_sock_file_base_dir=ssh_agent_auth_sock_file_base_dir,
ssh_agent_auth_sock_file_base_path=ssh_agent_auth_sock_file_base_path,
ssh_agent_auth_sock_file_base_name_ext=ssh_agent_auth_sock_file_base_name_ext,
ssh_agent_auth_sock_file_base_name_dir=ssh_agent_auth_sock_file_base_name_dir,
ssh_agent_auth_sock_file_base_name_path=ssh_agent_auth_sock_file_base_name_path,
ssh_agent_auth_sock_file_base_ext_dir=ssh_agent_auth_sock_file_base_ext_dir,
ssh_agent_auth_sock_file_base_ext_path=ssh_agent_auth_sock_file_base_ext_path,
ssh_agent_auth_sock_file_base_dir_path=ssh_agent_auth_sock_file_base_dir_path,
ssh_agent_auth_sock_file_base_name_ext_dir=ssh_agent_auth_sock_file_base_name_ext_dir,
ssh_agent_auth_sock_file_base_name_ext_path=ssh_agent_auth_sock_file_base_name_ext_path,
ssh_agent_auth_sock_file_base_name_dir_path=ssh_agent_auth_sock_file_base_name_dir_path,
ssh_agent_auth_sock_file_base_ext_dir_path=ssh_agent_auth_sock_file_base_ext_dir_path,
ssh_agent_auth_sock_file_base_name_ext_dir_path=ssh_agent_auth_sock_file_base_name_ext_dir_path,
)

@classmethod
Expand All @@ -44,11 +249,15 @@
base_dir = os.path.dirname(repo_path)
return cls(name=name, base_dir=base_dir, full_name=full_name)

@property

Check failure on line 252 in src/codegen/git/schemas/repo_config.py

View workflow job for this annotation

GitHub Actions / mypy

error: Name "repo_path" already defined on line 69 [no-redef]
def repo_path(self) -> Path:
# Use organization name in the path if available
if self.organization_name:
return Path(f"/tmp/{self.organization_name}/{self.name}")
# Fall back to the original path format if no organization name is available
return Path(f"{self.base_dir}/{self.name}")

@property

Check failure on line 260 in src/codegen/git/schemas/repo_config.py

View workflow job for this annotation

GitHub Actions / mypy

error: Name "organization_name" already defined on line 62 [no-redef]
def organization_name(self) -> str | None:
if self.full_name is not None:
return self.full_name.split("/")[0]
Expand Down
Loading
Loading