diff --git a/src/gitingest/clone.py b/src/gitingest/clone.py index da6550f1..6b048c09 100644 --- a/src/gitingest/clone.py +++ b/src/gitingest/clone.py @@ -31,6 +31,89 @@ class CloneConfig: local_path: str commit: str | None = None branch: str | None = None + pat: str | None = None + + +async def _check_repo_exists(url: str, pat: str | None = None) -> bool: + """ + Check if a repository exists at the given URL using an HTTP HEAD request. + + Parameters + ---------- + url : str + The URL of the repository. + pat : str | None + Personal Access Token for authentication, optional. + + Returns + ------- + bool + True if the repository exists, False otherwise. + """ + # Parse URL to get components + parts = url.split("/") + if len(parts) < 5: # Need at least protocol, empty, host, username, repo + return False + + host = parts[2] + username = parts[3] + repo = parts[4] + + # Construct API URL based on host + if "github.com" in host: + api_url = url + else: + # For custom Git servers, use API v1 endpoint + api_url = f"https://{host}/api/v1/repos/{username}/{repo}" + + cmd = ["curl", "-I"] + if pat: + cmd.extend(["-H", f"Authorization: token {pat}"]) + cmd.append(api_url) + + proc = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, _ = await proc.communicate() + if proc.returncode != 0: + return False + # Check if stdout contains "404" status code + stdout_str = stdout.decode() + return "HTTP/1.1 404" not in stdout_str and "HTTP/2 404" not in stdout_str + + +async def _run_git_command(*args: str) -> tuple[bytes, bytes]: + """ + Executes a git command asynchronously and captures its output. + + Parameters + ---------- + *args : str + The git command and its arguments to execute. + + Returns + ------- + tuple[bytes, bytes] + A tuple containing the stdout and stderr of the git command. + + Raises + ------ + RuntimeError + If the git command exits with a non-zero status. + """ + proc = await asyncio.create_subprocess_exec( + *args, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await proc.communicate() + if proc.returncode != 0: + error_message = stderr.decode().strip() + raise RuntimeError(f"Git command failed: {' '.join(args)}\nError: {error_message}") + + return stdout, stderr @async_timeout(CLONE_TIMEOUT) @@ -45,11 +128,12 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: Parameters ---------- config : CloneConfig - A dictionary containing the following keys: + Configuration object containing: - url (str): The URL of the repository. - local_path (str): The local path to clone the repository to. - commit (Optional[str]): The specific commit hash to checkout. - branch (Optional[str]): The branch to clone. Defaults to 'main' or 'master' if not provided. + - pat (Optional[str]): Personal Access Token for authentication. Returns ------- @@ -65,11 +149,12 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: AsyncTimeoutError If the cloning process exceeds the specified timeout. """ - # Extract and validate query parameters + # Extract and validate parameters url: str = config.url local_path: str = config.local_path commit: str | None = config.commit branch: str | None = config.branch + pat: str | None = config.pat if not url: raise ValueError("The 'url' parameter is required.") @@ -78,13 +163,15 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: raise ValueError("The 'local_path' parameter is required.") # Check if the repository exists - if not await _check_repo_exists(url): - raise ValueError("Repository not found, make sure it is public") + if not await _check_repo_exists(url, pat): + raise ValueError("Repository not found, make sure it is public or provide valid PAT") try: if commit: # Scenario 1: Clone and checkout a specific commit # Clone the repository without depth to ensure full history for checkout + if pat: + url = url.replace("https://", f"https://oauth2:{pat}@") clone_cmd = ["git", "clone", "--single-branch", url, local_path] await _run_git_command(*clone_cmd) @@ -93,75 +180,17 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: return await _run_git_command(*checkout_cmd) if branch and branch.lower() not in ("main", "master"): - # Scenario 2: Clone a specific branch with shallow depth + if pat: + url = url.replace("https://", f"https://oauth2:{pat}@") clone_cmd = ["git", "clone", "--depth=1", "--single-branch", "--branch", branch, url, local_path] return await _run_git_command(*clone_cmd) # Scenario 3: Clone the default branch with shallow depth + if pat: + url = url.replace("https://", f"https://oauth2:{pat}@") clone_cmd = ["git", "clone", "--depth=1", "--single-branch", url, local_path] return await _run_git_command(*clone_cmd) except (RuntimeError, asyncio.TimeoutError, AsyncTimeoutError): raise # Re-raise the exception - - -async def _check_repo_exists(url: str) -> bool: - """ - Check if a repository exists at the given URL using an HTTP HEAD request. - - Parameters - ---------- - url : str - The URL of the repository. - - Returns - ------- - bool - True if the repository exists, False otherwise. - """ - proc = await asyncio.create_subprocess_exec( - "curl", - "-I", - url, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - stdout, _ = await proc.communicate() - if proc.returncode != 0: - return False - # Check if stdout contains "404" status code - stdout_str = stdout.decode() - return "HTTP/1.1 404" not in stdout_str and "HTTP/2 404" not in stdout_str - - -async def _run_git_command(*args: str) -> tuple[bytes, bytes]: - """ - Executes a git command asynchronously and captures its output. - - Parameters - ---------- - *args : str - The git command and its arguments to execute. - - Returns - ------- - tuple[bytes, bytes] - A tuple containing the stdout and stderr of the git command. - - Raises - ------ - RuntimeError - If the git command exits with a non-zero status. - """ - proc = await asyncio.create_subprocess_exec( - *args, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - stdout, stderr = await proc.communicate() - if proc.returncode != 0: - error_message = stderr.decode().strip() - raise RuntimeError(f"Git command failed: {' '.join(args)}\nError: {error_message}") - - return stdout, stderr diff --git a/src/gitingest/tests/test_clone.py b/src/gitingest/tests/test_clone.py index c124730a..40b377a7 100644 --- a/src/gitingest/tests/test_clone.py +++ b/src/gitingest/tests/test_clone.py @@ -19,14 +19,15 @@ async def test_clone_repo_with_commit() -> None: mock_process = AsyncMock() mock_process.communicate.return_value = (b"output", b"error") mock_exec.return_value = mock_process + await clone_repo(clone_config) - mock_check.assert_called_once_with(clone_config.url) + mock_check.assert_called_once_with(clone_config.url, None) assert mock_exec.call_count == 2 # Clone and checkout calls @pytest.mark.asyncio async def test_clone_repo_without_commit() -> None: - query = CloneConfig(url="https://github.com/user/repo", local_path="/tmp/repo", commit=None, branch="main") + clone_config = CloneConfig(url="https://github.com/user/repo", local_path="/tmp/repo", commit=None, branch="main") with patch("gitingest.clone._check_repo_exists", return_value=True) as mock_check: with patch("gitingest.clone._run_git_command", new_callable=AsyncMock) as mock_exec: @@ -34,8 +35,8 @@ async def test_clone_repo_without_commit() -> None: mock_process.communicate.return_value = (b"output", b"error") mock_exec.return_value = mock_process - await clone_repo(query) - mock_check.assert_called_once_with(query.url) + await clone_repo(clone_config) + mock_check.assert_called_once_with(clone_config.url, None) assert mock_exec.call_count == 1 # Only clone call @@ -50,7 +51,7 @@ async def test_clone_repo_nonexistent_repository() -> None: with patch("gitingest.clone._check_repo_exists", return_value=False) as mock_check: with pytest.raises(ValueError, match="Repository not found"): await clone_repo(clone_config) - mock_check.assert_called_once_with(clone_config.url) + mock_check.assert_called_once_with(clone_config.url, None) @pytest.mark.asyncio @@ -167,5 +168,81 @@ async def test_check_repo_exists_with_redirect() -> None: mock_process.communicate.return_value = (b"HTTP/1.1 302 Found\n", b"") mock_process.returncode = 0 # Simulate successful request mock_exec.return_value = mock_process - assert await _check_repo_exists(url) + + +@pytest.mark.asyncio +async def test_check_repo_exists_with_pat() -> None: + url = "https://github.com/user/repo" + pat = "test_token_123" + + with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec: + mock_process = AsyncMock() + mock_process.communicate.return_value = (b"HTTP/1.1 200 OK\n", b"") + mock_process.returncode = 0 + mock_exec.return_value = mock_process + + await _check_repo_exists(url, pat) + + # Verify curl command includes authorization header + mock_exec.assert_called_with( + "curl", + "-I", + "-H", + f"Authorization: token {pat}", + url, + stdout=-1, # asyncio.subprocess.PIPE + stderr=-1, # asyncio.subprocess.PIPE + ) + + +@pytest.mark.asyncio +async def test_check_repo_exists_custom_git_server() -> None: + url = "https://git.custom.com/user/repo" + pat = "test_token_123" + + with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec: + mock_process = AsyncMock() + mock_process.communicate.return_value = (b"HTTP/1.1 200 OK\n", b"") + mock_process.returncode = 0 + mock_exec.return_value = mock_process + + await _check_repo_exists(url, pat) + + # Verify curl command uses correct API endpoint and includes authorization header + mock_exec.assert_called_with( + "curl", + "-I", + "-H", + f"Authorization: token {pat}", + "https://git.custom.com/api/v1/repos/user/repo", + stdout=-1, # asyncio.subprocess.PIPE + stderr=-1, # asyncio.subprocess.PIPE + ) + + +@pytest.mark.asyncio +async def test_clone_repo_with_pat() -> None: + clone_config = CloneConfig( + url="https://git.custom.com/user/repo", + local_path="/tmp/repo", + commit=None, + branch="main", + pat="test_token_123", + ) + + with patch("gitingest.clone._check_repo_exists", return_value=True) as mock_check: + with patch("gitingest.clone._run_git_command", new_callable=AsyncMock) as mock_exec: + mock_process = AsyncMock() + mock_process.communicate.return_value = (b"output", b"error") + mock_exec.return_value = mock_process + + await clone_repo(clone_config) + mock_check.assert_called_once_with(clone_config.url, clone_config.pat) + + # Verify git clone command includes PAT in URL + expected_url = clone_config.url.replace("https://", f"https://oauth2:{clone_config.pat}@") + # Check that the command was called with the correct arguments + mock_exec.assert_called_with( + "git", "clone", "--depth=1", "--single-branch", expected_url, clone_config.local_path + ) diff --git a/src/process_query.py b/src/process_query.py index 4053e45c..9afe8f04 100644 --- a/src/process_query.py +++ b/src/process_query.py @@ -17,6 +17,7 @@ async def process_query( slider_position: int, pattern_type: str = "exclude", pattern: str = "", + pat: str | None = None, is_index: bool = False, ) -> _TemplateResponse: """ @@ -68,6 +69,7 @@ async def process_query( local_path=query["local_path"], commit=query.get("commit"), branch=query.get("branch"), + pat=pat, ) await clone_repo(clone_config) summary, tree, content = ingest_from_query(query) diff --git a/src/templates/components/github_form.jinja b/src/templates/components/github_form.jinja index 7be65aee..4952c633 100644 --- a/src/templates/components/github_form.jinja +++ b/src/templates/components/github_form.jinja @@ -3,9 +3,10 @@
-
+
-
+ +
+ + +
+ + + +