Skip to content

Commit 5eea53f

Browse files
committed
feat: add PAT support with custom Git server compatibility
- Add optional PAT field with toggle checkbox in UI\n- Support custom Git server API endpoints (api/v1/repos)\n- Add comprehensive test coverage for PAT authentication\n- Update repository existence check to handle both GitHub and custom Git servers
1 parent 65b4b4a commit 5eea53f

File tree

4 files changed

+166
-23
lines changed

4 files changed

+166
-23
lines changed

src/gitingest/clone.py

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,26 +12,48 @@ class CloneConfig:
1212
local_path: str
1313
commit: str | None = None
1414
branch: str | None = None
15+
pat: str | None = None
1516

1617

17-
async def check_repo_exists(url: str) -> bool:
18+
async def check_repo_exists(url: str, pat: str | None = None) -> bool:
1819
"""
1920
Check if a repository exists at the given URL using an HTTP HEAD request.
2021
2122
Parameters
2223
----------
2324
url : str
2425
The URL of the repository.
26+
pat : str | None
27+
Personal Access Token for authentication, optional.
2528
2629
Returns
2730
-------
2831
bool
2932
True if the repository exists, False otherwise.
3033
"""
34+
# Parse URL to get components
35+
parts = url.split('/')
36+
if len(parts) < 5: # Need at least protocol, empty, host, username, repo
37+
return False
38+
39+
host = parts[2]
40+
username = parts[3]
41+
repo = parts[4]
42+
43+
# Construct API URL based on host
44+
if 'github.com' in host:
45+
api_url = url
46+
else:
47+
# For custom Git servers, use API v1 endpoint
48+
api_url = f"https://{host}/api/v1/repos/{username}/{repo}"
49+
50+
cmd = ["curl", "-I"]
51+
if pat:
52+
cmd.extend(["-H", f"Authorization: token {pat}"])
53+
cmd.append(api_url)
54+
3155
proc = await asyncio.create_subprocess_exec(
32-
"curl",
33-
"-I",
34-
url,
56+
*cmd,
3557
stdout=asyncio.subprocess.PIPE,
3658
stderr=asyncio.subprocess.PIPE,
3759
)
@@ -78,16 +100,17 @@ async def run_git_command(*args: str) -> tuple[bytes, bytes]:
78100
@async_timeout(CLONE_TIMEOUT)
79101
async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:
80102
"""
81-
Clones a repository to a local path based on the provided query parameters.
103+
Clones a repository to a local path based on the provided configuration.
82104
83105
Parameters
84106
----------
85107
config : CloneConfig
86-
A dictionary containing the following keys:
108+
Configuration object containing:
87109
- url (str): The URL of the repository.
88110
- local_path (str): The local path to clone the repository to.
89111
- commit (Optional[str]): The specific commit hash to checkout.
90-
- branch (Optional[str]): The branch to clone. Defaults to 'main' or 'master' if not provided.
112+
- branch (Optional[str]): The branch to clone.
113+
- pat (Optional[str]): Personal Access Token for authentication.
91114
92115
Returns
93116
-------
@@ -97,17 +120,18 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:
97120
Raises
98121
------
99122
ValueError
100-
If the repository does not exist or if required query parameters are missing.
123+
If the repository does not exist or if required parameters are missing.
101124
RuntimeError
102125
If any git command fails during execution.
103126
AsyncTimeoutError
104127
If the cloning process exceeds the specified timeout.
105128
"""
106-
# Extract and validate query parameters
129+
# Extract and validate parameters
107130
url: str = config.url
108131
local_path: str = config.local_path
109132
commit: str | None = config.commit
110133
branch: str | None = config.branch
134+
pat: str | None = config.pat
111135

112136
if not url:
113137
raise ValueError("The 'url' parameter is required.")
@@ -116,28 +140,36 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:
116140
raise ValueError("The 'local_path' parameter is required.")
117141

118142
# Check if the repository exists
119-
if not await check_repo_exists(url):
120-
raise ValueError("Repository not found, make sure it is public")
143+
if not await check_repo_exists(url, pat):
144+
raise ValueError("Repository not found, make sure it is public or provide valid PAT")
121145

122146
try:
123147
if commit:
124148
# Scenario 1: Clone and checkout a specific commit
125149
# Clone the repository without depth to ensure full history for checkout
126-
clone_cmd = ["git", "clone", "--single-branch", url, local_path]
150+
clone_cmd = ["git", "clone", "--single-branch"]
151+
if pat:
152+
url = url.replace("https://", f"https://oauth2:{pat}@")
153+
clone_cmd.extend([url, local_path])
127154
await run_git_command(*clone_cmd)
128155

129156
# Checkout the specific commit
130157
checkout_cmd = ["git", "-C", local_path, "checkout", commit]
131158
return await run_git_command(*checkout_cmd)
132159

133160
if branch and branch.lower() not in ("main", "master"):
134-
135161
# Scenario 2: Clone a specific branch with shallow depth
136-
clone_cmd = ["git", "clone", "--depth=1", "--single-branch", "--branch", branch, url, local_path]
162+
clone_cmd = ["git", "clone", "--depth=1", "--single-branch", "--branch", branch]
163+
if pat:
164+
url = url.replace("https://", f"https://oauth2:{pat}@")
165+
clone_cmd.extend([url, local_path])
137166
return await run_git_command(*clone_cmd)
138167

139168
# Scenario 3: Clone the default branch with shallow depth
140-
clone_cmd = ["git", "clone", "--depth=1", "--single-branch", url, local_path]
169+
clone_cmd = ["git", "clone", "--depth=1", "--single-branch"]
170+
if pat:
171+
url = url.replace("https://", f"https://oauth2:{pat}@")
172+
clone_cmd.extend([url, local_path])
141173
return await run_git_command(*clone_cmd)
142174

143175
except (RuntimeError, asyncio.TimeoutError, AsyncTimeoutError):

src/gitingest/tests/test_clone.py

Lines changed: 80 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,27 +16,27 @@ async def test_clone_repo_with_commit() -> None:
1616

1717
with patch("gitingest.clone.check_repo_exists", return_value=True) as mock_check:
1818
with patch("gitingest.clone.run_git_command", new_callable=AsyncMock) as mock_exec:
19-
2019
mock_process = AsyncMock()
2120
mock_process.communicate.return_value = (b"output", b"error")
2221
mock_exec.return_value = mock_process
22+
2323
await clone_repo(clone_config)
24-
mock_check.assert_called_once_with(clone_config.url)
24+
mock_check.assert_called_once_with(clone_config.url, None)
2525
assert mock_exec.call_count == 2 # Clone and checkout calls
2626

2727

2828
@pytest.mark.asyncio
2929
async def test_clone_repo_without_commit() -> None:
30-
query = CloneConfig(url="https://github.com/user/repo", local_path="/tmp/repo", commit=None, branch="main")
30+
clone_config = CloneConfig(url="https://github.com/user/repo", local_path="/tmp/repo", commit=None, branch="main")
3131

3232
with patch("gitingest.clone.check_repo_exists", return_value=True) as mock_check:
3333
with patch("gitingest.clone.run_git_command", new_callable=AsyncMock) as mock_exec:
3434
mock_process = AsyncMock()
3535
mock_process.communicate.return_value = (b"output", b"error")
3636
mock_exec.return_value = mock_process
3737

38-
await clone_repo(query)
39-
mock_check.assert_called_once_with(query.url)
38+
await clone_repo(clone_config)
39+
mock_check.assert_called_once_with(clone_config.url, None)
4040
assert mock_exec.call_count == 1 # Only clone call
4141

4242

@@ -51,7 +51,7 @@ async def test_clone_repo_nonexistent_repository() -> None:
5151
with patch("gitingest.clone.check_repo_exists", return_value=False) as mock_check:
5252
with pytest.raises(ValueError, match="Repository not found"):
5353
await clone_repo(clone_config)
54-
mock_check.assert_called_once_with(clone_config.url)
54+
mock_check.assert_called_once_with(clone_config.url, None)
5555

5656

5757
@pytest.mark.asyncio
@@ -75,3 +75,77 @@ async def test_check_repo_exists() -> None:
7575
# Test failed request
7676
mock_process.returncode = 1
7777
assert await check_repo_exists(url) is False
78+
79+
80+
@pytest.mark.asyncio
81+
async def test_check_repo_exists_with_pat() -> None:
82+
url = "https://github.com/user/repo"
83+
pat = "test_token_123"
84+
85+
with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec:
86+
mock_process = AsyncMock()
87+
mock_process.communicate.return_value = (b"HTTP/1.1 200 OK\n", b"")
88+
mock_process.returncode = 0
89+
mock_exec.return_value = mock_process
90+
91+
await check_repo_exists(url, pat)
92+
93+
# Verify curl command includes authorization header
94+
mock_exec.assert_called_with(
95+
"curl", "-I",
96+
"-H", f"Authorization: token {pat}",
97+
url,
98+
stdout=-1, # asyncio.subprocess.PIPE
99+
stderr=-1, # asyncio.subprocess.PIPE
100+
)
101+
102+
103+
@pytest.mark.asyncio
104+
async def test_check_repo_exists_custom_git_server() -> None:
105+
url = "https://git.custom.com/user/repo"
106+
pat = "test_token_123"
107+
108+
with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec:
109+
mock_process = AsyncMock()
110+
mock_process.communicate.return_value = (b"HTTP/1.1 200 OK\n", b"")
111+
mock_process.returncode = 0
112+
mock_exec.return_value = mock_process
113+
114+
await check_repo_exists(url, pat)
115+
116+
# Verify curl command uses correct API endpoint and includes authorization header
117+
mock_exec.assert_called_with(
118+
"curl", "-I",
119+
"-H", f"Authorization: token {pat}",
120+
"https://git.custom.com/api/v1/repos/user/repo",
121+
stdout=-1, # asyncio.subprocess.PIPE
122+
stderr=-1, # asyncio.subprocess.PIPE
123+
)
124+
125+
126+
@pytest.mark.asyncio
127+
async def test_clone_repo_with_pat() -> None:
128+
clone_config = CloneConfig(
129+
url="https://git.custom.com/user/repo",
130+
local_path="/tmp/repo",
131+
commit=None,
132+
branch="main",
133+
pat="test_token_123"
134+
)
135+
136+
with patch("gitingest.clone.check_repo_exists", return_value=True) as mock_check:
137+
with patch("gitingest.clone.run_git_command", new_callable=AsyncMock) as mock_exec:
138+
mock_process = AsyncMock()
139+
mock_process.communicate.return_value = (b"output", b"error")
140+
mock_exec.return_value = mock_process
141+
142+
await clone_repo(clone_config)
143+
mock_check.assert_called_once_with(clone_config.url, clone_config.pat)
144+
145+
# Verify git clone command includes PAT in URL
146+
expected_url = clone_config.url.replace("https://", f"https://oauth2:{clone_config.pat}@")
147+
# Check that the command was called with the correct arguments
148+
mock_exec.assert_called_with(
149+
"git", "clone", "--depth=1", "--single-branch",
150+
expected_url, clone_config.local_path
151+
)

src/process_query.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ async def process_query(
4040
slider_position: int,
4141
pattern_type: str = "exclude",
4242
pattern: str = "",
43+
pat: str | None = None,
4344
is_index: bool = False,
4445
) -> _TemplateResponse:
4546
template = "index.jinja" if is_index else "github.jinja"
@@ -65,6 +66,7 @@ async def process_query(
6566
local_path=query["local_path"],
6667
commit=query.get("commit"),
6768
branch=query.get("branch"),
69+
pat=pat,
6870
)
6971
await clone_repo(clone_config)
7072
summary, tree, content = ingest_from_query(query)

src/templates/components/github_form.jinja

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
<div class="rounded-xl relative z-20 pl-8 sm:pl-10 pr-8 sm:pr-16 py-8 border-[3px] border-gray-900 bg-[#fff4da]">
44
<img src="https://cdn.devdojo.com/images/january2023/shape-1.png"
55
class="absolute md:block hidden left-0 h-[4.5rem] w-[4.5rem] bottom-0 -translate-x-full ml-3">
6-
<form class="flex md:flex-row flex-col w-full h-full justify-center items-stretch space-y-5 md:space-y-0 md:space-x-5"
6+
<form class="flex flex-col w-full h-full justify-center items-stretch space-y-5"
77
id="ingestForm"
88
onsubmit="handleSubmit(event{% if is_index %}, true{% endif %})">
9+
<!-- Repository URL input -->
910
<div class="relative w-full h-full">
1011
<div class="w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0 z-10"></div>
1112
<input type="text"
@@ -16,16 +17,50 @@
1617
required
1718
class="border-[3px] w-full relative z-20 border-gray-900 placeholder-gray-600 text-lg font-medium focus:outline-none py-3.5 px-6 rounded">
1819
</div>
19-
<div class="relative w-auto flex-shrink-0 h-full group">
20+
21+
<!-- Access Settings Checkbox -->
22+
<div class="flex items-center space-x-2 ml-2">
23+
<input type="checkbox"
24+
id="showAccessSettings"
25+
class="w-4 h-4 rounded border-gray-900"
26+
onchange="toggleAccessSettings()">
27+
<label for="showAccessSettings" class="text-gray-900">Access Settings</label>
28+
</div>
29+
30+
<!-- PAT input (hidden by default) -->
31+
<div id="accessSettingsContainer" class="hidden">
32+
<div class="relative w-full h-full">
33+
<div class="w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0 z-10"></div>
34+
<input type="password"
35+
name="pat"
36+
id="pat"
37+
placeholder="Personal Access Token"
38+
value="{{ pat if pat else '' }}"
39+
class="border-[3px] w-full relative z-20 border-gray-900 placeholder-gray-600 text-lg font-medium focus:outline-none py-3.5 px-6 rounded">
40+
</div>
41+
</div>
42+
43+
<!-- Submit Button -->
44+
<div class="relative w-full sm:w-auto flex-shrink-0 h-full group">
2045
<div class="w-full h-full rounded bg-gray-800 translate-y-1 translate-x-1 absolute inset-0 z-10"></div>
2146
<button type="submit"
2247
class="py-3.5 rounded px-6 group-hover:-translate-y-px group-hover:-translate-x-px ease-out duration-300 z-20 relative w-full border-[3px] border-gray-900 font-medium bg-[#ffc480] tracking-wide text-lg flex-shrink-0 text-gray-900">
2348
Ingest
2449
</button>
2550
</div>
51+
2652
<input type="hidden" name="pattern_type" value="exclude">
2753
<input type="hidden" name="pattern" value="">
2854
</form>
55+
56+
<script>
57+
function toggleAccessSettings() {
58+
const container = document.getElementById('accessSettingsContainer');
59+
const checkbox = document.getElementById('showAccessSettings');
60+
container.classList.toggle('hidden', !checkbox.checked);
61+
}
62+
</script>
63+
2964
<div class="mt-4 relative z-20 flex flex-wrap gap-4 items-start">
3065
<!-- Pattern selector -->
3166
<div class="w-[200px] sm:w-[250px] mr-9 mt-4">

0 commit comments

Comments
 (0)