From 5eea53f1ded09fc4cc611ac19ef0dfd7381e7400 Mon Sep 17 00:00:00 2001 From: Kerim Incedayi Date: Sun, 29 Dec 2024 10:44:10 +0100 Subject: [PATCH 1/2] feat: add PAT support with custom Git server compatibility - Add optional PAT field with toggle checkbox in UI\n- Support custom Git server API endpoints (api/v1/repos)\n- Add comprehensive test coverage for PAT authentication\n- Update repository existence check to handle both GitHub and custom Git servers --- src/gitingest/clone.py | 62 ++++++++++++---- src/gitingest/tests/test_clone.py | 86 ++++++++++++++++++++-- src/process_query.py | 2 + src/templates/components/github_form.jinja | 39 +++++++++- 4 files changed, 166 insertions(+), 23 deletions(-) diff --git a/src/gitingest/clone.py b/src/gitingest/clone.py index 97a990a2..ea52118a 100644 --- a/src/gitingest/clone.py +++ b/src/gitingest/clone.py @@ -12,9 +12,10 @@ class CloneConfig: local_path: str commit: str | None = None branch: str | None = None + pat: str | None = None -async def check_repo_exists(url: str) -> bool: +async def check_repo_exists(url: str, pat: str | None = None) -> bool: """ Check if a repository exists at the given URL using an HTTP HEAD request. @@ -22,16 +23,37 @@ async def check_repo_exists(url: str) -> bool: ---------- url : str The URL of the repository. + pat : str | None + Personal Access Token for authentication, optional. Returns ------- bool True if the repository exists, False otherwise. """ + # Parse URL to get components + parts = url.split('/') + if len(parts) < 5: # Need at least protocol, empty, host, username, repo + return False + + host = parts[2] + username = parts[3] + repo = parts[4] + + # Construct API URL based on host + if 'github.com' in host: + api_url = url + else: + # For custom Git servers, use API v1 endpoint + api_url = f"https://{host}/api/v1/repos/{username}/{repo}" + + cmd = ["curl", "-I"] + if pat: + cmd.extend(["-H", f"Authorization: token {pat}"]) + cmd.append(api_url) + proc = await asyncio.create_subprocess_exec( - "curl", - "-I", - url, + *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) @@ -78,16 +100,17 @@ async def run_git_command(*args: str) -> tuple[bytes, bytes]: @async_timeout(CLONE_TIMEOUT) async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: """ - Clones a repository to a local path based on the provided query parameters. + Clones a repository to a local path based on the provided configuration. Parameters ---------- config : CloneConfig - A dictionary containing the following keys: + Configuration object containing: - url (str): The URL of the repository. - local_path (str): The local path to clone the repository to. - commit (Optional[str]): The specific commit hash to checkout. - - branch (Optional[str]): The branch to clone. Defaults to 'main' or 'master' if not provided. + - branch (Optional[str]): The branch to clone. + - pat (Optional[str]): Personal Access Token for authentication. Returns ------- @@ -97,17 +120,18 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: Raises ------ ValueError - If the repository does not exist or if required query parameters are missing. + If the repository does not exist or if required parameters are missing. RuntimeError If any git command fails during execution. AsyncTimeoutError If the cloning process exceeds the specified timeout. """ - # Extract and validate query parameters + # Extract and validate parameters url: str = config.url local_path: str = config.local_path commit: str | None = config.commit branch: str | None = config.branch + pat: str | None = config.pat if not url: raise ValueError("The 'url' parameter is required.") @@ -116,14 +140,17 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: raise ValueError("The 'local_path' parameter is required.") # Check if the repository exists - if not await check_repo_exists(url): - raise ValueError("Repository not found, make sure it is public") + if not await check_repo_exists(url, pat): + raise ValueError("Repository not found, make sure it is public or provide valid PAT") try: if commit: # Scenario 1: Clone and checkout a specific commit # Clone the repository without depth to ensure full history for checkout - clone_cmd = ["git", "clone", "--single-branch", url, local_path] + clone_cmd = ["git", "clone", "--single-branch"] + if pat: + url = url.replace("https://", f"https://oauth2:{pat}@") + clone_cmd.extend([url, local_path]) await run_git_command(*clone_cmd) # Checkout the specific commit @@ -131,13 +158,18 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: return await run_git_command(*checkout_cmd) if branch and branch.lower() not in ("main", "master"): - # Scenario 2: Clone a specific branch with shallow depth - clone_cmd = ["git", "clone", "--depth=1", "--single-branch", "--branch", branch, url, local_path] + clone_cmd = ["git", "clone", "--depth=1", "--single-branch", "--branch", branch] + if pat: + url = url.replace("https://", f"https://oauth2:{pat}@") + clone_cmd.extend([url, local_path]) return await run_git_command(*clone_cmd) # Scenario 3: Clone the default branch with shallow depth - clone_cmd = ["git", "clone", "--depth=1", "--single-branch", url, local_path] + clone_cmd = ["git", "clone", "--depth=1", "--single-branch"] + if pat: + url = url.replace("https://", f"https://oauth2:{pat}@") + clone_cmd.extend([url, local_path]) return await run_git_command(*clone_cmd) except (RuntimeError, asyncio.TimeoutError, AsyncTimeoutError): diff --git a/src/gitingest/tests/test_clone.py b/src/gitingest/tests/test_clone.py index 585ba6eb..c0cf513f 100644 --- a/src/gitingest/tests/test_clone.py +++ b/src/gitingest/tests/test_clone.py @@ -16,18 +16,18 @@ async def test_clone_repo_with_commit() -> None: with patch("gitingest.clone.check_repo_exists", return_value=True) as mock_check: with patch("gitingest.clone.run_git_command", new_callable=AsyncMock) as mock_exec: - mock_process = AsyncMock() mock_process.communicate.return_value = (b"output", b"error") mock_exec.return_value = mock_process + await clone_repo(clone_config) - mock_check.assert_called_once_with(clone_config.url) + mock_check.assert_called_once_with(clone_config.url, None) assert mock_exec.call_count == 2 # Clone and checkout calls @pytest.mark.asyncio async def test_clone_repo_without_commit() -> None: - query = CloneConfig(url="https://github.com/user/repo", local_path="/tmp/repo", commit=None, branch="main") + clone_config = CloneConfig(url="https://github.com/user/repo", local_path="/tmp/repo", commit=None, branch="main") with patch("gitingest.clone.check_repo_exists", return_value=True) as mock_check: with patch("gitingest.clone.run_git_command", new_callable=AsyncMock) as mock_exec: @@ -35,8 +35,8 @@ async def test_clone_repo_without_commit() -> None: mock_process.communicate.return_value = (b"output", b"error") mock_exec.return_value = mock_process - await clone_repo(query) - mock_check.assert_called_once_with(query.url) + await clone_repo(clone_config) + mock_check.assert_called_once_with(clone_config.url, None) assert mock_exec.call_count == 1 # Only clone call @@ -51,7 +51,7 @@ async def test_clone_repo_nonexistent_repository() -> None: with patch("gitingest.clone.check_repo_exists", return_value=False) as mock_check: with pytest.raises(ValueError, match="Repository not found"): await clone_repo(clone_config) - mock_check.assert_called_once_with(clone_config.url) + mock_check.assert_called_once_with(clone_config.url, None) @pytest.mark.asyncio @@ -75,3 +75,77 @@ async def test_check_repo_exists() -> None: # Test failed request mock_process.returncode = 1 assert await check_repo_exists(url) is False + + +@pytest.mark.asyncio +async def test_check_repo_exists_with_pat() -> None: + url = "https://github.com/user/repo" + pat = "test_token_123" + + with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec: + mock_process = AsyncMock() + mock_process.communicate.return_value = (b"HTTP/1.1 200 OK\n", b"") + mock_process.returncode = 0 + mock_exec.return_value = mock_process + + await check_repo_exists(url, pat) + + # Verify curl command includes authorization header + mock_exec.assert_called_with( + "curl", "-I", + "-H", f"Authorization: token {pat}", + url, + stdout=-1, # asyncio.subprocess.PIPE + stderr=-1, # asyncio.subprocess.PIPE + ) + + +@pytest.mark.asyncio +async def test_check_repo_exists_custom_git_server() -> None: + url = "https://git.custom.com/user/repo" + pat = "test_token_123" + + with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec: + mock_process = AsyncMock() + mock_process.communicate.return_value = (b"HTTP/1.1 200 OK\n", b"") + mock_process.returncode = 0 + mock_exec.return_value = mock_process + + await check_repo_exists(url, pat) + + # Verify curl command uses correct API endpoint and includes authorization header + mock_exec.assert_called_with( + "curl", "-I", + "-H", f"Authorization: token {pat}", + "https://git.custom.com/api/v1/repos/user/repo", + stdout=-1, # asyncio.subprocess.PIPE + stderr=-1, # asyncio.subprocess.PIPE + ) + + +@pytest.mark.asyncio +async def test_clone_repo_with_pat() -> None: + clone_config = CloneConfig( + url="https://git.custom.com/user/repo", + local_path="/tmp/repo", + commit=None, + branch="main", + pat="test_token_123" + ) + + with patch("gitingest.clone.check_repo_exists", return_value=True) as mock_check: + with patch("gitingest.clone.run_git_command", new_callable=AsyncMock) as mock_exec: + mock_process = AsyncMock() + mock_process.communicate.return_value = (b"output", b"error") + mock_exec.return_value = mock_process + + await clone_repo(clone_config) + mock_check.assert_called_once_with(clone_config.url, clone_config.pat) + + # Verify git clone command includes PAT in URL + expected_url = clone_config.url.replace("https://", f"https://oauth2:{clone_config.pat}@") + # Check that the command was called with the correct arguments + mock_exec.assert_called_with( + "git", "clone", "--depth=1", "--single-branch", + expected_url, clone_config.local_path + ) diff --git a/src/process_query.py b/src/process_query.py index f55068cb..0f543647 100644 --- a/src/process_query.py +++ b/src/process_query.py @@ -40,6 +40,7 @@ async def process_query( slider_position: int, pattern_type: str = "exclude", pattern: str = "", + pat: str | None = None, is_index: bool = False, ) -> _TemplateResponse: template = "index.jinja" if is_index else "github.jinja" @@ -65,6 +66,7 @@ async def process_query( local_path=query["local_path"], commit=query.get("commit"), branch=query.get("branch"), + pat=pat, ) await clone_repo(clone_config) summary, tree, content = ingest_from_query(query) diff --git a/src/templates/components/github_form.jinja b/src/templates/components/github_form.jinja index 7be65aee..57e369e0 100644 --- a/src/templates/components/github_form.jinja +++ b/src/templates/components/github_form.jinja @@ -3,9 +3,10 @@
-
+
-
+ + +
+ + +
+ + + + + +
+ + + +
From 8ddea21bd8a08ee9ffcd4763060645896a74f8a8 Mon Sep 17 00:00:00 2001 From: Romain Courtois Date: Mon, 30 Dec 2024 07:26:45 +0000 Subject: [PATCH 2/2] Fix broken names due to merge --- src/gitingest/tests/test_clone.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/gitingest/tests/test_clone.py b/src/gitingest/tests/test_clone.py index 8d3b2f7a..354f7007 100644 --- a/src/gitingest/tests/test_clone.py +++ b/src/gitingest/tests/test_clone.py @@ -88,12 +88,14 @@ async def test_check_repo_exists_with_pat() -> None: mock_process.returncode = 0 mock_exec.return_value = mock_process - await check_repo_exists(url, pat) - + await _check_repo_exists(url, pat) + # Verify curl command includes authorization header mock_exec.assert_called_with( - "curl", "-I", - "-H", f"Authorization: token {pat}", + "curl", + "-I", + "-H", + f"Authorization: token {pat}", url, stdout=-1, # asyncio.subprocess.PIPE stderr=-1, # asyncio.subprocess.PIPE @@ -111,12 +113,14 @@ async def test_check_repo_exists_custom_git_server() -> None: mock_process.returncode = 0 mock_exec.return_value = mock_process - await check_repo_exists(url, pat) - + await _check_repo_exists(url, pat) + # Verify curl command uses correct API endpoint and includes authorization header mock_exec.assert_called_with( - "curl", "-I", - "-H", f"Authorization: token {pat}", + "curl", + "-I", + "-H", + f"Authorization: token {pat}", "https://git.custom.com/api/v1/repos/user/repo", stdout=-1, # asyncio.subprocess.PIPE stderr=-1, # asyncio.subprocess.PIPE @@ -130,22 +134,21 @@ async def test_clone_repo_with_pat() -> None: local_path="/tmp/repo", commit=None, branch="main", - pat="test_token_123" + pat="test_token_123", ) - with patch("gitingest.clone.check_repo_exists", return_value=True) as mock_check: - with patch("gitingest.clone.run_git_command", new_callable=AsyncMock) as mock_exec: + with patch("gitingest.clone._check_repo_exists", return_value=True) as mock_check: + with patch("gitingest.clone._run_git_command", new_callable=AsyncMock) as mock_exec: mock_process = AsyncMock() mock_process.communicate.return_value = (b"output", b"error") mock_exec.return_value = mock_process await clone_repo(clone_config) mock_check.assert_called_once_with(clone_config.url, clone_config.pat) - + # Verify git clone command includes PAT in URL expected_url = clone_config.url.replace("https://", f"https://oauth2:{clone_config.pat}@") # Check that the command was called with the correct arguments mock_exec.assert_called_with( - "git", "clone", "--depth=1", "--single-branch", - expected_url, clone_config.local_path + "git", "clone", "--depth=1", "--single-branch", expected_url, clone_config.local_path )