Skip to content

Commit 85da895

Browse files
cevatkerimfilipchristiansen
authored andcommitted
feat: add PAT support with custom Git server compatibility
- Add optional PAT field with toggle checkbox in UI\n- Support custom Git server API endpoints (api/v1/repos)\n- Add comprehensive test coverage for PAT authentication\n- Update repository existence check to handle both GitHub and custom Git servers
1 parent 36b04a5 commit 85da895

File tree

4 files changed

+213
-74
lines changed

4 files changed

+213
-74
lines changed

src/gitingest/clone.py

Lines changed: 95 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,89 @@ class CloneConfig:
3131
local_path: str
3232
commit: str | None = None
3333
branch: str | None = None
34+
pat: str | None = None
35+
36+
37+
async def _check_repo_exists(url: str, pat: str | None = None) -> bool:
38+
"""
39+
Check if a repository exists at the given URL using an HTTP HEAD request.
40+
41+
Parameters
42+
----------
43+
url : str
44+
The URL of the repository.
45+
pat : str | None
46+
Personal Access Token for authentication, optional.
47+
48+
Returns
49+
-------
50+
bool
51+
True if the repository exists, False otherwise.
52+
"""
53+
# Parse URL to get components
54+
parts = url.split('/')
55+
if len(parts) < 5: # Need at least protocol, empty, host, username, repo
56+
return False
57+
58+
host = parts[2]
59+
username = parts[3]
60+
repo = parts[4]
61+
62+
# Construct API URL based on host
63+
if 'github.com' in host:
64+
api_url = url
65+
else:
66+
# For custom Git servers, use API v1 endpoint
67+
api_url = f"https://{host}/api/v1/repos/{username}/{repo}"
68+
69+
cmd = ["curl", "-I"]
70+
if pat:
71+
cmd.extend(["-H", f"Authorization: token {pat}"])
72+
cmd.append(api_url)
73+
74+
proc = await asyncio.create_subprocess_exec(
75+
*cmd,
76+
stdout=asyncio.subprocess.PIPE,
77+
stderr=asyncio.subprocess.PIPE,
78+
)
79+
stdout, _ = await proc.communicate()
80+
if proc.returncode != 0:
81+
return False
82+
# Check if stdout contains "404" status code
83+
stdout_str = stdout.decode()
84+
return "HTTP/1.1 404" not in stdout_str and "HTTP/2 404" not in stdout_str
85+
86+
87+
async def _run_git_command(*args: str) -> tuple[bytes, bytes]:
88+
"""
89+
Executes a git command asynchronously and captures its output.
90+
91+
Parameters
92+
----------
93+
*args : str
94+
The git command and its arguments to execute.
95+
96+
Returns
97+
-------
98+
tuple[bytes, bytes]
99+
A tuple containing the stdout and stderr of the git command.
100+
101+
Raises
102+
------
103+
RuntimeError
104+
If the git command exits with a non-zero status.
105+
"""
106+
proc = await asyncio.create_subprocess_exec(
107+
*args,
108+
stdout=asyncio.subprocess.PIPE,
109+
stderr=asyncio.subprocess.PIPE,
110+
)
111+
stdout, stderr = await proc.communicate()
112+
if proc.returncode != 0:
113+
error_message = stderr.decode().strip()
114+
raise RuntimeError(f"Git command failed: {' '.join(args)}\nError: {error_message}")
115+
116+
return stdout, stderr
34117

35118

36119
@async_timeout(CLONE_TIMEOUT)
@@ -45,11 +128,12 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:
45128
Parameters
46129
----------
47130
config : CloneConfig
48-
A dictionary containing the following keys:
131+
Configuration object containing:
49132
- url (str): The URL of the repository.
50133
- local_path (str): The local path to clone the repository to.
51134
- commit (Optional[str]): The specific commit hash to checkout.
52135
- branch (Optional[str]): The branch to clone. Defaults to 'main' or 'master' if not provided.
136+
- pat (Optional[str]): Personal Access Token for authentication.
53137
54138
Returns
55139
-------
@@ -65,11 +149,12 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:
65149
AsyncTimeoutError
66150
If the cloning process exceeds the specified timeout.
67151
"""
68-
# Extract and validate query parameters
152+
# Extract and validate parameters
69153
url: str = config.url
70154
local_path: str = config.local_path
71155
commit: str | None = config.commit
72156
branch: str | None = config.branch
157+
pat: str | None = config.pat
73158

74159
if not url:
75160
raise ValueError("The 'url' parameter is required.")
@@ -78,13 +163,15 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:
78163
raise ValueError("The 'local_path' parameter is required.")
79164

80165
# Check if the repository exists
81-
if not await _check_repo_exists(url):
82-
raise ValueError("Repository not found, make sure it is public")
166+
if not await _check_repo_exists(url, pat):
167+
raise ValueError("Repository not found, make sure it is public or provide valid PAT")
83168

84169
try:
85170
if commit:
86171
# Scenario 1: Clone and checkout a specific commit
87172
# Clone the repository without depth to ensure full history for checkout
173+
if pat:
174+
url = url.replace("https://", f"https://oauth2:{pat}@")
88175
clone_cmd = ["git", "clone", "--single-branch", url, local_path]
89176
await _run_git_command(*clone_cmd)
90177

@@ -93,75 +180,17 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:
93180
return await _run_git_command(*checkout_cmd)
94181

95182
if branch and branch.lower() not in ("main", "master"):
96-
97183
# Scenario 2: Clone a specific branch with shallow depth
184+
if pat:
185+
url = url.replace("https://", f"https://oauth2:{pat}@")
98186
clone_cmd = ["git", "clone", "--depth=1", "--single-branch", "--branch", branch, url, local_path]
99187
return await _run_git_command(*clone_cmd)
100188

101189
# Scenario 3: Clone the default branch with shallow depth
190+
if pat:
191+
url = url.replace("https://", f"https://oauth2:{pat}@")
102192
clone_cmd = ["git", "clone", "--depth=1", "--single-branch", url, local_path]
103193
return await _run_git_command(*clone_cmd)
104194

105195
except (RuntimeError, asyncio.TimeoutError, AsyncTimeoutError):
106196
raise # Re-raise the exception
107-
108-
109-
async def _check_repo_exists(url: str) -> bool:
110-
"""
111-
Check if a repository exists at the given URL using an HTTP HEAD request.
112-
113-
Parameters
114-
----------
115-
url : str
116-
The URL of the repository.
117-
118-
Returns
119-
-------
120-
bool
121-
True if the repository exists, False otherwise.
122-
"""
123-
proc = await asyncio.create_subprocess_exec(
124-
"curl",
125-
"-I",
126-
url,
127-
stdout=asyncio.subprocess.PIPE,
128-
stderr=asyncio.subprocess.PIPE,
129-
)
130-
stdout, _ = await proc.communicate()
131-
if proc.returncode != 0:
132-
return False
133-
# Check if stdout contains "404" status code
134-
stdout_str = stdout.decode()
135-
return "HTTP/1.1 404" not in stdout_str and "HTTP/2 404" not in stdout_str
136-
137-
138-
async def _run_git_command(*args: str) -> tuple[bytes, bytes]:
139-
"""
140-
Executes a git command asynchronously and captures its output.
141-
142-
Parameters
143-
----------
144-
*args : str
145-
The git command and its arguments to execute.
146-
147-
Returns
148-
-------
149-
tuple[bytes, bytes]
150-
A tuple containing the stdout and stderr of the git command.
151-
152-
Raises
153-
------
154-
RuntimeError
155-
If the git command exits with a non-zero status.
156-
"""
157-
proc = await asyncio.create_subprocess_exec(
158-
*args,
159-
stdout=asyncio.subprocess.PIPE,
160-
stderr=asyncio.subprocess.PIPE,
161-
)
162-
stdout, stderr = await proc.communicate()
163-
if proc.returncode != 0:
164-
error_message = stderr.decode().strip()
165-
raise RuntimeError(f"Git command failed: {' '.join(args)}\nError: {error_message}")
166-
167-
return stdout, stderr

src/gitingest/tests/test_clone.py

Lines changed: 79 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,23 +19,24 @@ async def test_clone_repo_with_commit() -> None:
1919
mock_process = AsyncMock()
2020
mock_process.communicate.return_value = (b"output", b"error")
2121
mock_exec.return_value = mock_process
22+
2223
await clone_repo(clone_config)
23-
mock_check.assert_called_once_with(clone_config.url)
24+
mock_check.assert_called_once_with(clone_config.url, None)
2425
assert mock_exec.call_count == 2 # Clone and checkout calls
2526

2627

2728
@pytest.mark.asyncio
2829
async def test_clone_repo_without_commit() -> None:
29-
query = CloneConfig(url="https://github.com/user/repo", local_path="/tmp/repo", commit=None, branch="main")
30+
clone_config = CloneConfig(url="https://github.com/user/repo", local_path="/tmp/repo", commit=None, branch="main")
3031

3132
with patch("gitingest.clone._check_repo_exists", return_value=True) as mock_check:
3233
with patch("gitingest.clone._run_git_command", new_callable=AsyncMock) as mock_exec:
3334
mock_process = AsyncMock()
3435
mock_process.communicate.return_value = (b"output", b"error")
3536
mock_exec.return_value = mock_process
3637

37-
await clone_repo(query)
38-
mock_check.assert_called_once_with(query.url)
38+
await clone_repo(clone_config)
39+
mock_check.assert_called_once_with(clone_config.url, None)
3940
assert mock_exec.call_count == 1 # Only clone call
4041

4142

@@ -50,7 +51,7 @@ async def test_clone_repo_nonexistent_repository() -> None:
5051
with patch("gitingest.clone._check_repo_exists", return_value=False) as mock_check:
5152
with pytest.raises(ValueError, match="Repository not found"):
5253
await clone_repo(clone_config)
53-
mock_check.assert_called_once_with(clone_config.url)
54+
mock_check.assert_called_once_with(clone_config.url, None)
5455

5556

5657
@pytest.mark.asyncio
@@ -167,5 +168,77 @@ async def test_check_repo_exists_with_redirect() -> None:
167168
mock_process.communicate.return_value = (b"HTTP/1.1 302 Found\n", b"")
168169
mock_process.returncode = 0 # Simulate successful request
169170
mock_exec.return_value = mock_process
170-
171171
assert await _check_repo_exists(url)
172+
173+
@pytest.mark.asyncio
174+
async def test_check_repo_exists_with_pat() -> None:
175+
url = "https://github.com/user/repo"
176+
pat = "test_token_123"
177+
178+
with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec:
179+
mock_process = AsyncMock()
180+
mock_process.communicate.return_value = (b"HTTP/1.1 200 OK\n", b"")
181+
mock_process.returncode = 0
182+
mock_exec.return_value = mock_process
183+
184+
await _check_repo_exists(url, pat)
185+
186+
# Verify curl command includes authorization header
187+
mock_exec.assert_called_with(
188+
"curl", "-I",
189+
"-H", f"Authorization: token {pat}",
190+
url,
191+
stdout=-1, # asyncio.subprocess.PIPE
192+
stderr=-1, # asyncio.subprocess.PIPE
193+
)
194+
195+
196+
@pytest.mark.asyncio
197+
async def test_check_repo_exists_custom_git_server() -> None:
198+
url = "https://git.custom.com/user/repo"
199+
pat = "test_token_123"
200+
201+
with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec:
202+
mock_process = AsyncMock()
203+
mock_process.communicate.return_value = (b"HTTP/1.1 200 OK\n", b"")
204+
mock_process.returncode = 0
205+
mock_exec.return_value = mock_process
206+
207+
await _check_repo_exists(url, pat)
208+
209+
# Verify curl command uses correct API endpoint and includes authorization header
210+
mock_exec.assert_called_with(
211+
"curl", "-I",
212+
"-H", f"Authorization: token {pat}",
213+
"https://git.custom.com/api/v1/repos/user/repo",
214+
stdout=-1, # asyncio.subprocess.PIPE
215+
stderr=-1, # asyncio.subprocess.PIPE
216+
)
217+
218+
219+
@pytest.mark.asyncio
220+
async def test_clone_repo_with_pat() -> None:
221+
clone_config = CloneConfig(
222+
url="https://git.custom.com/user/repo",
223+
local_path="/tmp/repo",
224+
commit=None,
225+
branch="main",
226+
pat="test_token_123"
227+
)
228+
229+
with patch("gitingest.clone.check_repo_exists", return_value=True) as mock_check:
230+
with patch("gitingest.clone.run_git_command", new_callable=AsyncMock) as mock_exec:
231+
mock_process = AsyncMock()
232+
mock_process.communicate.return_value = (b"output", b"error")
233+
mock_exec.return_value = mock_process
234+
235+
await clone_repo(clone_config)
236+
mock_check.assert_called_once_with(clone_config.url, clone_config.pat)
237+
238+
# Verify git clone command includes PAT in URL
239+
expected_url = clone_config.url.replace("https://", f"https://oauth2:{clone_config.pat}@")
240+
# Check that the command was called with the correct arguments
241+
mock_exec.assert_called_with(
242+
"git", "clone", "--depth=1", "--single-branch",
243+
expected_url, clone_config.local_path
244+
)

src/process_query.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ async def process_query(
1717
slider_position: int,
1818
pattern_type: str = "exclude",
1919
pattern: str = "",
20+
pat: str | None = None,
2021
is_index: bool = False,
2122
) -> _TemplateResponse:
2223
"""
@@ -68,6 +69,7 @@ async def process_query(
6869
local_path=query["local_path"],
6970
commit=query.get("commit"),
7071
branch=query.get("branch"),
72+
pat=pat,
7173
)
7274
await clone_repo(clone_config)
7375
summary, tree, content = ingest_from_query(query)

0 commit comments

Comments
 (0)