diff --git a/src/gitingest/clone.py b/src/gitingest/clone.py index a91b6a99..9e88cb4d 100644 --- a/src/gitingest/clone.py +++ b/src/gitingest/clone.py @@ -12,21 +12,23 @@ class CloneConfig: local_path: str commit: str | None = None branch: str | None = None + pat: str | None = None @async_timeout(CLONE_TIMEOUT) async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: """ - Clones a repository to a local path based on the provided query parameters. + Clones a repository to a local path based on the provided configuration. Parameters ---------- config : CloneConfig - A dictionary containing the following keys: + Configuration object containing: - url (str): The URL of the repository. - local_path (str): The local path to clone the repository to. - commit (Optional[str]): The specific commit hash to checkout. - - branch (Optional[str]): The branch to clone. Defaults to 'main' or 'master' if not provided. + - branch (Optional[str]): The branch to clone. + - pat (Optional[str]): Personal Access Token for authentication. Returns ------- @@ -36,17 +38,18 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: Raises ------ ValueError - If the repository does not exist or if required query parameters are missing. + If the repository does not exist or if required parameters are missing. RuntimeError If any git command fails during execution. AsyncTimeoutError If the cloning process exceeds the specified timeout. """ - # Extract and validate query parameters + # Extract and validate parameters url: str = config.url local_path: str = config.local_path commit: str | None = config.commit branch: str | None = config.branch + pat: str | None = config.pat if not url: raise ValueError("The 'url' parameter is required.") @@ -55,14 +58,17 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: raise ValueError("The 'local_path' parameter is required.") # Check if the repository exists - if not await _check_repo_exists(url): - raise ValueError("Repository not found, make sure it is public") + if not await _check_repo_exists(url, pat): + raise ValueError("Repository not found, make sure it is public or provide valid PAT") try: if commit: # Scenario 1: Clone and checkout a specific commit # Clone the repository without depth to ensure full history for checkout - clone_cmd = ["git", "clone", "--single-branch", url, local_path] + clone_cmd = ["git", "clone", "--single-branch"] + if pat: + url = url.replace("https://", f"https://oauth2:{pat}@") + clone_cmd.extend([url, local_path]) await _run_git_command(*clone_cmd) # Checkout the specific commit @@ -70,20 +76,25 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: return await _run_git_command(*checkout_cmd) if branch and branch.lower() not in ("main", "master"): - # Scenario 2: Clone a specific branch with shallow depth - clone_cmd = ["git", "clone", "--depth=1", "--single-branch", "--branch", branch, url, local_path] + clone_cmd = ["git", "clone", "--depth=1", "--single-branch", "--branch", branch] + if pat: + url = url.replace("https://", f"https://oauth2:{pat}@") + clone_cmd.extend([url, local_path]) return await _run_git_command(*clone_cmd) # Scenario 3: Clone the default branch with shallow depth - clone_cmd = ["git", "clone", "--depth=1", "--single-branch", url, local_path] + clone_cmd = ["git", "clone", "--depth=1", "--single-branch"] + if pat: + url = url.replace("https://", f"https://oauth2:{pat}@") + clone_cmd.extend([url, local_path]) return await _run_git_command(*clone_cmd) except (RuntimeError, asyncio.TimeoutError, AsyncTimeoutError): raise # Re-raise the exception -async def _check_repo_exists(url: str) -> bool: +async def _check_repo_exists(url: str, pat: str | None = None) -> bool: """ Check if a repository exists at the given URL using an HTTP HEAD request. @@ -91,16 +102,37 @@ async def _check_repo_exists(url: str) -> bool: ---------- url : str The URL of the repository. + pat : str | None + Personal Access Token for authentication, optional. Returns ------- bool True if the repository exists, False otherwise. """ + # Parse URL to get components + parts = url.split('/') + if len(parts) < 5: # Need at least protocol, empty, host, username, repo + return False + + host = parts[2] + username = parts[3] + repo = parts[4] + + # Construct API URL based on host + if 'github.com' in host: + api_url = url + else: + # For custom Git servers, use API v1 endpoint + api_url = f"https://{host}/api/v1/repos/{username}/{repo}" + + cmd = ["curl", "-I"] + if pat: + cmd.extend(["-H", f"Authorization: token {pat}"]) + cmd.append(api_url) + proc = await asyncio.create_subprocess_exec( - "curl", - "-I", - url, + *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) diff --git a/src/gitingest/tests/test_clone.py b/src/gitingest/tests/test_clone.py index e3b81289..354f7007 100644 --- a/src/gitingest/tests/test_clone.py +++ b/src/gitingest/tests/test_clone.py @@ -19,14 +19,15 @@ async def test_clone_repo_with_commit() -> None: mock_process = AsyncMock() mock_process.communicate.return_value = (b"output", b"error") mock_exec.return_value = mock_process + await clone_repo(clone_config) - mock_check.assert_called_once_with(clone_config.url) + mock_check.assert_called_once_with(clone_config.url, None) assert mock_exec.call_count == 2 # Clone and checkout calls @pytest.mark.asyncio async def test_clone_repo_without_commit() -> None: - query = CloneConfig(url="https://github.com/user/repo", local_path="/tmp/repo", commit=None, branch="main") + clone_config = CloneConfig(url="https://github.com/user/repo", local_path="/tmp/repo", commit=None, branch="main") with patch("gitingest.clone._check_repo_exists", return_value=True) as mock_check: with patch("gitingest.clone._run_git_command", new_callable=AsyncMock) as mock_exec: @@ -34,8 +35,8 @@ async def test_clone_repo_without_commit() -> None: mock_process.communicate.return_value = (b"output", b"error") mock_exec.return_value = mock_process - await clone_repo(query) - mock_check.assert_called_once_with(query.url) + await clone_repo(clone_config) + mock_check.assert_called_once_with(clone_config.url, None) assert mock_exec.call_count == 1 # Only clone call @@ -50,7 +51,7 @@ async def test_clone_repo_nonexistent_repository() -> None: with patch("gitingest.clone._check_repo_exists", return_value=False) as mock_check: with pytest.raises(ValueError, match="Repository not found"): await clone_repo(clone_config) - mock_check.assert_called_once_with(clone_config.url) + mock_check.assert_called_once_with(clone_config.url, None) @pytest.mark.asyncio @@ -74,3 +75,80 @@ async def test_check_repo_exists() -> None: # Test failed request mock_process.returncode = 1 assert await _check_repo_exists(url) is False + + +@pytest.mark.asyncio +async def test_check_repo_exists_with_pat() -> None: + url = "https://github.com/user/repo" + pat = "test_token_123" + + with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec: + mock_process = AsyncMock() + mock_process.communicate.return_value = (b"HTTP/1.1 200 OK\n", b"") + mock_process.returncode = 0 + mock_exec.return_value = mock_process + + await _check_repo_exists(url, pat) + + # Verify curl command includes authorization header + mock_exec.assert_called_with( + "curl", + "-I", + "-H", + f"Authorization: token {pat}", + url, + stdout=-1, # asyncio.subprocess.PIPE + stderr=-1, # asyncio.subprocess.PIPE + ) + + +@pytest.mark.asyncio +async def test_check_repo_exists_custom_git_server() -> None: + url = "https://git.custom.com/user/repo" + pat = "test_token_123" + + with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec: + mock_process = AsyncMock() + mock_process.communicate.return_value = (b"HTTP/1.1 200 OK\n", b"") + mock_process.returncode = 0 + mock_exec.return_value = mock_process + + await _check_repo_exists(url, pat) + + # Verify curl command uses correct API endpoint and includes authorization header + mock_exec.assert_called_with( + "curl", + "-I", + "-H", + f"Authorization: token {pat}", + "https://git.custom.com/api/v1/repos/user/repo", + stdout=-1, # asyncio.subprocess.PIPE + stderr=-1, # asyncio.subprocess.PIPE + ) + + +@pytest.mark.asyncio +async def test_clone_repo_with_pat() -> None: + clone_config = CloneConfig( + url="https://git.custom.com/user/repo", + local_path="/tmp/repo", + commit=None, + branch="main", + pat="test_token_123", + ) + + with patch("gitingest.clone._check_repo_exists", return_value=True) as mock_check: + with patch("gitingest.clone._run_git_command", new_callable=AsyncMock) as mock_exec: + mock_process = AsyncMock() + mock_process.communicate.return_value = (b"output", b"error") + mock_exec.return_value = mock_process + + await clone_repo(clone_config) + mock_check.assert_called_once_with(clone_config.url, clone_config.pat) + + # Verify git clone command includes PAT in URL + expected_url = clone_config.url.replace("https://", f"https://oauth2:{clone_config.pat}@") + # Check that the command was called with the correct arguments + mock_exec.assert_called_with( + "git", "clone", "--depth=1", "--single-branch", expected_url, clone_config.local_path + ) diff --git a/src/process_query.py b/src/process_query.py index 470b675b..0f7d0285 100644 --- a/src/process_query.py +++ b/src/process_query.py @@ -89,6 +89,7 @@ async def process_query( slider_position: int, pattern_type: str = "exclude", pattern: str = "", + pat: str | None = None, is_index: bool = False, ) -> _TemplateResponse: """ @@ -140,6 +141,7 @@ async def process_query( local_path=query["local_path"], commit=query.get("commit"), branch=query.get("branch"), + pat=pat, ) await clone_repo(clone_config) summary, tree, content = ingest_from_query(query) diff --git a/src/templates/components/github_form.jinja b/src/templates/components/github_form.jinja index 7be65aee..57e369e0 100644 --- a/src/templates/components/github_form.jinja +++ b/src/templates/components/github_form.jinja @@ -3,9 +3,10 @@