From 85da895e97de9fe9aae2881d146ab8a5fa8d5f64 Mon Sep 17 00:00:00 2001 From: Kerim Incedayi Date: Sun, 29 Dec 2024 10:44:10 +0100 Subject: [PATCH 1/3] feat: add PAT support with custom Git server compatibility - Add optional PAT field with toggle checkbox in UI\n- Support custom Git server API endpoints (api/v1/repos)\n- Add comprehensive test coverage for PAT authentication\n- Update repository existence check to handle both GitHub and custom Git servers --- src/gitingest/clone.py | 161 ++++++++++++--------- src/gitingest/tests/test_clone.py | 85 ++++++++++- src/process_query.py | 2 + src/templates/components/github_form.jinja | 39 ++++- 4 files changed, 213 insertions(+), 74 deletions(-) diff --git a/src/gitingest/clone.py b/src/gitingest/clone.py index da6550f1..6ab290bf 100644 --- a/src/gitingest/clone.py +++ b/src/gitingest/clone.py @@ -31,6 +31,89 @@ class CloneConfig: local_path: str commit: str | None = None branch: str | None = None + pat: str | None = None + + +async def _check_repo_exists(url: str, pat: str | None = None) -> bool: + """ + Check if a repository exists at the given URL using an HTTP HEAD request. + + Parameters + ---------- + url : str + The URL of the repository. + pat : str | None + Personal Access Token for authentication, optional. + + Returns + ------- + bool + True if the repository exists, False otherwise. + """ + # Parse URL to get components + parts = url.split('/') + if len(parts) < 5: # Need at least protocol, empty, host, username, repo + return False + + host = parts[2] + username = parts[3] + repo = parts[4] + + # Construct API URL based on host + if 'github.com' in host: + api_url = url + else: + # For custom Git servers, use API v1 endpoint + api_url = f"https://{host}/api/v1/repos/{username}/{repo}" + + cmd = ["curl", "-I"] + if pat: + cmd.extend(["-H", f"Authorization: token {pat}"]) + cmd.append(api_url) + + proc = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, _ = await proc.communicate() + if proc.returncode != 0: + return False + # Check if stdout contains "404" status code + stdout_str = stdout.decode() + return "HTTP/1.1 404" not in stdout_str and "HTTP/2 404" not in stdout_str + + +async def _run_git_command(*args: str) -> tuple[bytes, bytes]: + """ + Executes a git command asynchronously and captures its output. + + Parameters + ---------- + *args : str + The git command and its arguments to execute. + + Returns + ------- + tuple[bytes, bytes] + A tuple containing the stdout and stderr of the git command. + + Raises + ------ + RuntimeError + If the git command exits with a non-zero status. + """ + proc = await asyncio.create_subprocess_exec( + *args, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await proc.communicate() + if proc.returncode != 0: + error_message = stderr.decode().strip() + raise RuntimeError(f"Git command failed: {' '.join(args)}\nError: {error_message}") + + return stdout, stderr @async_timeout(CLONE_TIMEOUT) @@ -45,11 +128,12 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: Parameters ---------- config : CloneConfig - A dictionary containing the following keys: + Configuration object containing: - url (str): The URL of the repository. - local_path (str): The local path to clone the repository to. - commit (Optional[str]): The specific commit hash to checkout. - branch (Optional[str]): The branch to clone. Defaults to 'main' or 'master' if not provided. + - pat (Optional[str]): Personal Access Token for authentication. Returns ------- @@ -65,11 +149,12 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: AsyncTimeoutError If the cloning process exceeds the specified timeout. """ - # Extract and validate query parameters + # Extract and validate parameters url: str = config.url local_path: str = config.local_path commit: str | None = config.commit branch: str | None = config.branch + pat: str | None = config.pat if not url: raise ValueError("The 'url' parameter is required.") @@ -78,13 +163,15 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: raise ValueError("The 'local_path' parameter is required.") # Check if the repository exists - if not await _check_repo_exists(url): - raise ValueError("Repository not found, make sure it is public") + if not await _check_repo_exists(url, pat): + raise ValueError("Repository not found, make sure it is public or provide valid PAT") try: if commit: # Scenario 1: Clone and checkout a specific commit # Clone the repository without depth to ensure full history for checkout + if pat: + url = url.replace("https://", f"https://oauth2:{pat}@") clone_cmd = ["git", "clone", "--single-branch", url, local_path] await _run_git_command(*clone_cmd) @@ -93,75 +180,17 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: return await _run_git_command(*checkout_cmd) if branch and branch.lower() not in ("main", "master"): - # Scenario 2: Clone a specific branch with shallow depth + if pat: + url = url.replace("https://", f"https://oauth2:{pat}@") clone_cmd = ["git", "clone", "--depth=1", "--single-branch", "--branch", branch, url, local_path] return await _run_git_command(*clone_cmd) # Scenario 3: Clone the default branch with shallow depth + if pat: + url = url.replace("https://", f"https://oauth2:{pat}@") clone_cmd = ["git", "clone", "--depth=1", "--single-branch", url, local_path] return await _run_git_command(*clone_cmd) except (RuntimeError, asyncio.TimeoutError, AsyncTimeoutError): raise # Re-raise the exception - - -async def _check_repo_exists(url: str) -> bool: - """ - Check if a repository exists at the given URL using an HTTP HEAD request. - - Parameters - ---------- - url : str - The URL of the repository. - - Returns - ------- - bool - True if the repository exists, False otherwise. - """ - proc = await asyncio.create_subprocess_exec( - "curl", - "-I", - url, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - stdout, _ = await proc.communicate() - if proc.returncode != 0: - return False - # Check if stdout contains "404" status code - stdout_str = stdout.decode() - return "HTTP/1.1 404" not in stdout_str and "HTTP/2 404" not in stdout_str - - -async def _run_git_command(*args: str) -> tuple[bytes, bytes]: - """ - Executes a git command asynchronously and captures its output. - - Parameters - ---------- - *args : str - The git command and its arguments to execute. - - Returns - ------- - tuple[bytes, bytes] - A tuple containing the stdout and stderr of the git command. - - Raises - ------ - RuntimeError - If the git command exits with a non-zero status. - """ - proc = await asyncio.create_subprocess_exec( - *args, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - stdout, stderr = await proc.communicate() - if proc.returncode != 0: - error_message = stderr.decode().strip() - raise RuntimeError(f"Git command failed: {' '.join(args)}\nError: {error_message}") - - return stdout, stderr diff --git a/src/gitingest/tests/test_clone.py b/src/gitingest/tests/test_clone.py index c124730a..b19266e6 100644 --- a/src/gitingest/tests/test_clone.py +++ b/src/gitingest/tests/test_clone.py @@ -19,14 +19,15 @@ async def test_clone_repo_with_commit() -> None: mock_process = AsyncMock() mock_process.communicate.return_value = (b"output", b"error") mock_exec.return_value = mock_process + await clone_repo(clone_config) - mock_check.assert_called_once_with(clone_config.url) + mock_check.assert_called_once_with(clone_config.url, None) assert mock_exec.call_count == 2 # Clone and checkout calls @pytest.mark.asyncio async def test_clone_repo_without_commit() -> None: - query = CloneConfig(url="https://github.com/user/repo", local_path="/tmp/repo", commit=None, branch="main") + clone_config = CloneConfig(url="https://github.com/user/repo", local_path="/tmp/repo", commit=None, branch="main") with patch("gitingest.clone._check_repo_exists", return_value=True) as mock_check: with patch("gitingest.clone._run_git_command", new_callable=AsyncMock) as mock_exec: @@ -34,8 +35,8 @@ async def test_clone_repo_without_commit() -> None: mock_process.communicate.return_value = (b"output", b"error") mock_exec.return_value = mock_process - await clone_repo(query) - mock_check.assert_called_once_with(query.url) + await clone_repo(clone_config) + mock_check.assert_called_once_with(clone_config.url, None) assert mock_exec.call_count == 1 # Only clone call @@ -50,7 +51,7 @@ async def test_clone_repo_nonexistent_repository() -> None: with patch("gitingest.clone._check_repo_exists", return_value=False) as mock_check: with pytest.raises(ValueError, match="Repository not found"): await clone_repo(clone_config) - mock_check.assert_called_once_with(clone_config.url) + mock_check.assert_called_once_with(clone_config.url, None) @pytest.mark.asyncio @@ -167,5 +168,77 @@ async def test_check_repo_exists_with_redirect() -> None: mock_process.communicate.return_value = (b"HTTP/1.1 302 Found\n", b"") mock_process.returncode = 0 # Simulate successful request mock_exec.return_value = mock_process - assert await _check_repo_exists(url) + +@pytest.mark.asyncio +async def test_check_repo_exists_with_pat() -> None: + url = "https://github.com/user/repo" + pat = "test_token_123" + + with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec: + mock_process = AsyncMock() + mock_process.communicate.return_value = (b"HTTP/1.1 200 OK\n", b"") + mock_process.returncode = 0 + mock_exec.return_value = mock_process + + await _check_repo_exists(url, pat) + + # Verify curl command includes authorization header + mock_exec.assert_called_with( + "curl", "-I", + "-H", f"Authorization: token {pat}", + url, + stdout=-1, # asyncio.subprocess.PIPE + stderr=-1, # asyncio.subprocess.PIPE + ) + + +@pytest.mark.asyncio +async def test_check_repo_exists_custom_git_server() -> None: + url = "https://git.custom.com/user/repo" + pat = "test_token_123" + + with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec: + mock_process = AsyncMock() + mock_process.communicate.return_value = (b"HTTP/1.1 200 OK\n", b"") + mock_process.returncode = 0 + mock_exec.return_value = mock_process + + await _check_repo_exists(url, pat) + + # Verify curl command uses correct API endpoint and includes authorization header + mock_exec.assert_called_with( + "curl", "-I", + "-H", f"Authorization: token {pat}", + "https://git.custom.com/api/v1/repos/user/repo", + stdout=-1, # asyncio.subprocess.PIPE + stderr=-1, # asyncio.subprocess.PIPE + ) + + +@pytest.mark.asyncio +async def test_clone_repo_with_pat() -> None: + clone_config = CloneConfig( + url="https://git.custom.com/user/repo", + local_path="/tmp/repo", + commit=None, + branch="main", + pat="test_token_123" + ) + + with patch("gitingest.clone.check_repo_exists", return_value=True) as mock_check: + with patch("gitingest.clone.run_git_command", new_callable=AsyncMock) as mock_exec: + mock_process = AsyncMock() + mock_process.communicate.return_value = (b"output", b"error") + mock_exec.return_value = mock_process + + await clone_repo(clone_config) + mock_check.assert_called_once_with(clone_config.url, clone_config.pat) + + # Verify git clone command includes PAT in URL + expected_url = clone_config.url.replace("https://", f"https://oauth2:{clone_config.pat}@") + # Check that the command was called with the correct arguments + mock_exec.assert_called_with( + "git", "clone", "--depth=1", "--single-branch", + expected_url, clone_config.local_path + ) diff --git a/src/process_query.py b/src/process_query.py index 4053e45c..9afe8f04 100644 --- a/src/process_query.py +++ b/src/process_query.py @@ -17,6 +17,7 @@ async def process_query( slider_position: int, pattern_type: str = "exclude", pattern: str = "", + pat: str | None = None, is_index: bool = False, ) -> _TemplateResponse: """ @@ -68,6 +69,7 @@ async def process_query( local_path=query["local_path"], commit=query.get("commit"), branch=query.get("branch"), + pat=pat, ) await clone_repo(clone_config) summary, tree, content = ingest_from_query(query) diff --git a/src/templates/components/github_form.jinja b/src/templates/components/github_form.jinja index 7be65aee..57e369e0 100644 --- a/src/templates/components/github_form.jinja +++ b/src/templates/components/github_form.jinja @@ -3,9 +3,10 @@
-
+
-
+ + +
+ + +
+ + + + + +
+ + + +
From b98647fd768cb906494f9f2374a14a8eeb851519 Mon Sep 17 00:00:00 2001 From: Romain Courtois Date: Mon, 30 Dec 2024 07:26:45 +0000 Subject: [PATCH 2/3] Fix broken names due to merge --- src/gitingest/tests/test_clone.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/gitingest/tests/test_clone.py b/src/gitingest/tests/test_clone.py index b19266e6..98778076 100644 --- a/src/gitingest/tests/test_clone.py +++ b/src/gitingest/tests/test_clone.py @@ -185,8 +185,10 @@ async def test_check_repo_exists_with_pat() -> None: # Verify curl command includes authorization header mock_exec.assert_called_with( - "curl", "-I", - "-H", f"Authorization: token {pat}", + "curl", + "-I", + "-H", + f"Authorization: token {pat}", url, stdout=-1, # asyncio.subprocess.PIPE stderr=-1, # asyncio.subprocess.PIPE @@ -208,8 +210,10 @@ async def test_check_repo_exists_custom_git_server() -> None: # Verify curl command uses correct API endpoint and includes authorization header mock_exec.assert_called_with( - "curl", "-I", - "-H", f"Authorization: token {pat}", + "curl", + "-I", + "-H", + f"Authorization: token {pat}", "https://git.custom.com/api/v1/repos/user/repo", stdout=-1, # asyncio.subprocess.PIPE stderr=-1, # asyncio.subprocess.PIPE @@ -223,11 +227,11 @@ async def test_clone_repo_with_pat() -> None: local_path="/tmp/repo", commit=None, branch="main", - pat="test_token_123" + pat="test_token_123", ) - with patch("gitingest.clone.check_repo_exists", return_value=True) as mock_check: - with patch("gitingest.clone.run_git_command", new_callable=AsyncMock) as mock_exec: + with patch("gitingest.clone._check_repo_exists", return_value=True) as mock_check: + with patch("gitingest.clone._run_git_command", new_callable=AsyncMock) as mock_exec: mock_process = AsyncMock() mock_process.communicate.return_value = (b"output", b"error") mock_exec.return_value = mock_process @@ -239,6 +243,5 @@ async def test_clone_repo_with_pat() -> None: expected_url = clone_config.url.replace("https://", f"https://oauth2:{clone_config.pat}@") # Check that the command was called with the correct arguments mock_exec.assert_called_with( - "git", "clone", "--depth=1", "--single-branch", - expected_url, clone_config.local_path + "git", "clone", "--depth=1", "--single-branch", expected_url, clone_config.local_path ) From 5e037955343b1493ceb04a7718c84eadd0d8d97b Mon Sep 17 00:00:00 2001 From: Filip Christiansen <22807962+filipchristiansen@users.noreply.github.com> Date: Wed, 1 Jan 2025 01:15:05 +0100 Subject: [PATCH 3/3] Ran pre-commit on cevatkerim's branch to ensure CI passes --- src/gitingest/clone.py | 4 ++-- src/gitingest/tests/test_clone.py | 1 + src/templates/components/github_form.jinja | 6 ------ 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/gitingest/clone.py b/src/gitingest/clone.py index 6ab290bf..6b048c09 100644 --- a/src/gitingest/clone.py +++ b/src/gitingest/clone.py @@ -51,7 +51,7 @@ async def _check_repo_exists(url: str, pat: str | None = None) -> bool: True if the repository exists, False otherwise. """ # Parse URL to get components - parts = url.split('/') + parts = url.split("/") if len(parts) < 5: # Need at least protocol, empty, host, username, repo return False @@ -60,7 +60,7 @@ async def _check_repo_exists(url: str, pat: str | None = None) -> bool: repo = parts[4] # Construct API URL based on host - if 'github.com' in host: + if "github.com" in host: api_url = url else: # For custom Git servers, use API v1 endpoint diff --git a/src/gitingest/tests/test_clone.py b/src/gitingest/tests/test_clone.py index 98778076..40b377a7 100644 --- a/src/gitingest/tests/test_clone.py +++ b/src/gitingest/tests/test_clone.py @@ -170,6 +170,7 @@ async def test_check_repo_exists_with_redirect() -> None: mock_exec.return_value = mock_process assert await _check_repo_exists(url) + @pytest.mark.asyncio async def test_check_repo_exists_with_pat() -> None: url = "https://github.com/user/repo" diff --git a/src/templates/components/github_form.jinja b/src/templates/components/github_form.jinja index 57e369e0..4952c633 100644 --- a/src/templates/components/github_form.jinja +++ b/src/templates/components/github_form.jinja @@ -17,7 +17,6 @@ required class="border-[3px] w-full relative z-20 border-gray-900 placeholder-gray-600 text-lg font-medium focus:outline-none py-3.5 px-6 rounded">
-
- -
@@ -48,11 +45,9 @@ Ingest
- - -