Skip to content

Commit 9414238

Browse files
rootclaude
andcommitted
style: apply pre-commit fixes
🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 3283428 commit 9414238

21 files changed

+662
-263
lines changed

diff.diff

Lines changed: 365 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,365 @@
1+
diff --git a/src/gitingest/clone.py b/src/gitingest/clone.py
2+
index 1b776e8..b486fa1 100644
3+
--- a/src/gitingest/clone.py
4+
+++ b/src/gitingest/clone.py
5+
@@ -14,7 +14,6 @@ from gitingest.utils.git_utils import (
6+
checkout_partial_clone,
7+
create_git_repo,
8+
ensure_git_installed,
9+
- git_auth_context,
10+
is_github_host,
11+
resolve_commit,
12+
)
13+
@@ -87,7 +86,12 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
14+
commit = await resolve_commit(config, token=token)
15+
logger.debug("Resolved commit", extra={"commit": commit})
16+
17+
- # Clone the repository using GitPython with proper authentication
18+
+ # Prepare URL with authentication if needed
19+
+ clone_url = url
20+
+ if token and is_github_host(url):
21+
+ clone_url = _add_token_to_url(url, token)
22+
+
23+
+ # Clone the repository using GitPython
24+
logger.info("Executing git clone operation", extra={"url": "<redacted>", "local_path": local_path})
25+
try:
26+
clone_kwargs = {
27+
@@ -96,20 +100,18 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
28+
"depth": 1,
29+
}
30+
31+
- with git_auth_context(url, token) as (git_cmd, auth_url):
32+
+ if partial_clone:
33+
+ # GitPython doesn't directly support --filter and --sparse in clone
34+
+ # We'll need to use git.Git() for the initial clone with these options
35+
+ git_cmd = git.Git()
36+
+ cmd_args = ["--single-branch", "--no-checkout", "--depth=1"]
37+
if partial_clone:
38+
- # For partial clones, use git.Git() with filter and sparse options
39+
- cmd_args = ["--single-branch", "--no-checkout", "--depth=1"]
40+
cmd_args.extend(["--filter=blob:none", "--sparse"])
41+
- cmd_args.extend([auth_url, local_path])
42+
- git_cmd.clone(*cmd_args)
43+
- elif token and is_github_host(url):
44+
- # For authenticated GitHub repos, use git_cmd with auth URL
45+
- cmd_args = ["--single-branch", "--no-checkout", "--depth=1", auth_url, local_path]
46+
- git_cmd.clone(*cmd_args)
47+
- else:
48+
- # For non-authenticated repos, use the standard GitPython method
49+
- git.Repo.clone_from(url, local_path, **clone_kwargs)
50+
+ cmd_args.extend([clone_url, local_path])
51+
+ git_cmd.clone(*cmd_args)
52+
+ else:
53+
+ git.Repo.clone_from(clone_url, local_path, **clone_kwargs)
54+
+
55+
logger.info("Git clone completed successfully")
56+
except git.GitCommandError as exc:
57+
msg = f"Git clone failed: {exc}"
58+
diff --git a/src/gitingest/utils/git_utils.py b/src/gitingest/utils/git_utils.py
59+
index 1c1a986..b7f293a 100644
60+
--- a/src/gitingest/utils/git_utils.py
61+
+++ b/src/gitingest/utils/git_utils.py
62+
@@ -6,12 +6,13 @@ import asyncio
63+
import base64
64+
import re
65+
import sys
66+
-from contextlib import contextmanager
67+
from pathlib import Path
68+
-from typing import TYPE_CHECKING, Final, Generator, Iterable
69+
+from typing import TYPE_CHECKING, Final, Iterable
70+
from urllib.parse import urlparse, urlunparse
71+
72+
import git
73+
+import httpx
74+
+from starlette.status import HTTP_200_OK, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND
75+
76+
from gitingest.utils.compat_func import removesuffix
77+
from gitingest.utils.exceptions import InvalidGitHubTokenError
78+
@@ -135,15 +136,35 @@ async def check_repo_exists(url: str, token: str | None = None) -> bool:
79+
bool
80+
``True`` if the repository exists, ``False`` otherwise.
81+
82+
+ Raises
83+
+ ------
84+
+ RuntimeError
85+
+ If the host returns an unrecognised status code.
86+
+
87+
"""
88+
- try:
89+
- # Try to resolve HEAD - if repo exists, this will work
90+
- await _resolve_ref_to_sha(url, "HEAD", token=token)
91+
- except (ValueError, Exception):
92+
- # Repository doesn't exist, is private without proper auth, or other error
93+
- return False
94+
+ headers = {}
95+
+
96+
+ if token and is_github_host(url):
97+
+ host, owner, repo = _parse_github_url(url)
98+
+ # Public GitHub vs. GitHub Enterprise
99+
+ base_api = "https://api.github.com" if host == "github.com" else f"https://{host}/api/v3"
100+
+ url = f"{base_api}/repos/{owner}/{repo}"
101+
+ headers["Authorization"] = f"Bearer {token}"
102+
103+
- return True
104+
+ async with httpx.AsyncClient(follow_redirects=True) as client:
105+
+ try:
106+
+ response = await client.head(url, headers=headers)
107+
+ except httpx.RequestError:
108+
+ return False
109+
+
110+
+ status_code = response.status_code
111+
+
112+
+ if status_code == HTTP_200_OK:
113+
+ return True
114+
+ if status_code in {HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND}:
115+
+ return False
116+
+ msg = f"Unexpected HTTP status {status_code} for {url}"
117+
+ raise RuntimeError(msg)
118+
119+
120+
def _parse_github_url(url: str) -> tuple[str, str, str]:
121+
@@ -217,6 +238,13 @@ async def fetch_remote_branches_or_tags(url: str, *, ref_type: str, token: str |
122+
123+
# Use GitPython to get remote references
124+
try:
125+
+ git_cmd = git.Git()
126+
+
127+
+ # Prepare authentication if needed
128+
+ if token and is_github_host(url):
129+
+ auth_url = _add_token_to_url(url, token)
130+
+ url = auth_url
131+
+
132+
fetch_tags = ref_type == "tags"
133+
to_fetch = "tags" if fetch_tags else "heads"
134+
135+
@@ -226,11 +254,8 @@ async def fetch_remote_branches_or_tags(url: str, *, ref_type: str, token: str |
136+
cmd_args.append("--refs") # Filter out peeled tag objects
137+
cmd_args.append(url)
138+
139+
- # Run the command with proper authentication
140+
- with git_auth_context(url, token) as (git_cmd, auth_url):
141+
- # Replace the URL in cmd_args with the authenticated URL
142+
- cmd_args[-1] = auth_url # URL is the last argument
143+
- output = git_cmd.ls_remote(*cmd_args)
144+
+ # Run the command using git_cmd.ls_remote() method
145+
+ output = git_cmd.ls_remote(*cmd_args)
146+
147+
# Parse output
148+
return [
149+
@@ -314,70 +339,6 @@ def create_git_auth_header(token: str, url: str = "https://github.com") -> str:
150+
return f"http.https://{hostname}/.extraheader=Authorization: Basic {basic}"
151+
152+
153+
-def create_authenticated_url(url: str, token: str | None = None) -> str:
154+
- """Create an authenticated URL for Git operations.
155+
-
156+
- This is the safest approach for multi-user environments - no global state.
157+
-
158+
- Parameters
159+
- ----------
160+
- url : str
161+
- The repository URL.
162+
- token : str | None
163+
- GitHub personal access token (PAT) for accessing private repositories.
164+
-
165+
- Returns
166+
- -------
167+
- str
168+
- The URL with authentication embedded (for GitHub) or original URL.
169+
-
170+
- """
171+
- if not (token and is_github_host(url)):
172+
- return url
173+
-
174+
- parsed = urlparse(url)
175+
- # Add token as username in URL (GitHub supports this)
176+
- netloc = f"x-oauth-basic:{token}@{parsed.hostname}"
177+
- if parsed.port:
178+
- netloc += f":{parsed.port}"
179+
-
180+
- return urlunparse(
181+
- (
182+
- parsed.scheme,
183+
- netloc,
184+
- parsed.path,
185+
- parsed.params,
186+
- parsed.query,
187+
- parsed.fragment,
188+
- ),
189+
- )
190+
-
191+
-
192+
-@contextmanager
193+
-def git_auth_context(url: str, token: str | None = None) -> Generator[tuple[git.Git, str]]:
194+
- """Context manager that provides Git command and authenticated URL.
195+
-
196+
- Returns both a Git command object and the authenticated URL to use.
197+
- This avoids any global state contamination between users.
198+
-
199+
- Parameters
200+
- ----------
201+
- url : str
202+
- The repository URL to check if authentication is needed.
203+
- token : str | None
204+
- GitHub personal access token (PAT) for accessing private repositories.
205+
-
206+
- Yields
207+
- ------
208+
- Generator[tuple[git.Git, str]]
209+
- Tuple of (Git command object, authenticated URL to use).
210+
-
211+
- """
212+
- git_cmd = git.Git()
213+
- auth_url = create_authenticated_url(url, token)
214+
- yield git_cmd, auth_url
215+
-
216+
-
217+
def validate_github_token(token: str) -> None:
218+
"""Validate the format of a GitHub Personal Access Token.
219+
220+
@@ -479,9 +440,15 @@ async def _resolve_ref_to_sha(url: str, pattern: str, token: str | None = None)
221+
222+
"""
223+
try:
224+
- # Execute ls-remote command with proper authentication
225+
- with git_auth_context(url, token) as (git_cmd, auth_url):
226+
- output = git_cmd.ls_remote(auth_url, pattern)
227+
+ git_cmd = git.Git()
228+
+
229+
+ # Prepare authentication if needed
230+
+ auth_url = url
231+
+ if token and is_github_host(url):
232+
+ auth_url = _add_token_to_url(url, token)
233+
+
234+
+ # Execute ls-remote command
235+
+ output = git_cmd.ls_remote(auth_url, pattern)
236+
lines = output.splitlines()
237+
238+
sha = _pick_commit_sha(lines)
239+
@@ -490,7 +457,7 @@ async def _resolve_ref_to_sha(url: str, pattern: str, token: str | None = None)
240+
raise ValueError(msg)
241+
242+
except git.GitCommandError as exc:
243+
- msg = f"Failed to resolve {pattern} in {url}:\n{exc}"
244+
+ msg = f"Failed to resolve {pattern} in {url}: {exc}"
245+
raise ValueError(msg) from exc
246+
247+
return sha
248+
@@ -547,8 +514,6 @@ def _add_token_to_url(url: str, token: str) -> str:
249+
The URL with embedded authentication.
250+
251+
"""
252+
- from urllib.parse import urlparse, urlunparse
253+
-
254+
parsed = urlparse(url)
255+
# Add token as username in URL (GitHub supports this)
256+
netloc = f"x-oauth-basic:{token}@{parsed.hostname}"
257+
diff --git a/src/server/query_processor.py b/src/server/query_processor.py
258+
index f2f2ae9..03f52f1 100644
259+
--- a/src/server/query_processor.py
260+
+++ b/src/server/query_processor.py
261+
@@ -308,7 +308,7 @@ async def process_query(
262+
_print_error(query.url, exc, max_file_size, pattern_type, pattern)
263+
# Clean up repository even if processing failed
264+
_cleanup_repository(clone_config)
265+
- return IngestErrorResponse(error=f"{exc!s}")
266+
+ return IngestErrorResponse(error=str(exc))
267+
268+
if len(content) > MAX_DISPLAY_SIZE:
269+
content = (
270+
diff --git a/tests/test_clone.py b/tests/test_clone.py
271+
index 6abbd87..8c44523 100644
272+
--- a/tests/test_clone.py
273+
+++ b/tests/test_clone.py
274+
@@ -8,8 +8,11 @@ from __future__ import annotations
275+
276+
import sys
277+
from typing import TYPE_CHECKING
278+
+from unittest.mock import AsyncMock
279+
280+
+import httpx
281+
import pytest
282+
+from starlette.status import HTTP_200_OK, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND
283+
284+
from gitingest.clone import clone_repo
285+
from gitingest.schemas import CloneConfig
286+
@@ -18,7 +21,6 @@ from tests.conftest import DEMO_URL, LOCAL_REPO_PATH
287+
288+
if TYPE_CHECKING:
289+
from pathlib import Path
290+
- from unittest.mock import AsyncMock
291+
292+
from pytest_mock import MockerFixture
293+
294+
@@ -91,30 +93,24 @@ async def test_clone_nonexistent_repository(repo_exists_true: AsyncMock) -> None
295+
296+
@pytest.mark.asyncio
297+
@pytest.mark.parametrize(
298+
- ("git_command_succeeds", "expected"),
299+
+ ("status_code", "expected"),
300+
[
301+
- (True, True), # git ls-remote succeeds -> repo exists
302+
- (False, False), # git ls-remote fails -> repo doesn't exist or no access
303+
+ (HTTP_200_OK, True),
304+
+ (HTTP_401_UNAUTHORIZED, False),
305+
+ (HTTP_403_FORBIDDEN, False),
306+
+ (HTTP_404_NOT_FOUND, False),
307+
],
308+
)
309+
-async def test_check_repo_exists(
310+
- git_command_succeeds: bool, # noqa: FBT001
311+
- *,
312+
- expected: bool,
313+
- mocker: MockerFixture,
314+
-) -> None:
315+
- """Verify that ``check_repo_exists`` works by using _resolve_ref_to_sha."""
316+
- mock_resolve = mocker.patch("gitingest.utils.git_utils._resolve_ref_to_sha")
317+
-
318+
- if git_command_succeeds:
319+
- mock_resolve.return_value = "abc123def456" # Mock SHA
320+
- else:
321+
- mock_resolve.side_effect = ValueError("Repository not found")
322+
+async def test_check_repo_exists(status_code: int, *, expected: bool, mocker: MockerFixture) -> None:
323+
+ """Verify that ``check_repo_exists`` interprets httpx results correctly."""
324+
+ mock_client = AsyncMock()
325+
+ mock_client.__aenter__.return_value = mock_client # context-manager protocol
326+
+ mock_client.head.return_value = httpx.Response(status_code=status_code)
327+
+ mocker.patch("httpx.AsyncClient", return_value=mock_client)
328+
329+
result = await check_repo_exists(DEMO_URL)
330+
331+
assert result is expected
332+
- mock_resolve.assert_called_once_with(DEMO_URL, "HEAD", token=None)
333+
334+
335+
@pytest.mark.asyncio
336+
@@ -206,18 +202,19 @@ async def test_clone_with_include_submodules(gitpython_mocks: dict) -> None:
337+
338+
339+
@pytest.mark.asyncio
340+
-async def test_check_repo_exists_with_auth_token(mocker: MockerFixture) -> None:
341+
- """Test ``check_repo_exists`` with authentication token.
342+
+async def test_check_repo_exists_with_redirect(mocker: MockerFixture) -> None:
343+
+ """Test ``check_repo_exists`` when a redirect (302) is returned.
344+
345+
- Given a GitHub URL and a token:
346+
+ Given a URL that responds with "302 Found":
347+
When ``check_repo_exists`` is called,
348+
- Then it should pass the token to _resolve_ref_to_sha.
349+
+ Then it should return ``False``, indicating the repo is inaccessible.
350+
"""
351+
- mock_resolve = mocker.patch("gitingest.utils.git_utils._resolve_ref_to_sha")
352+
- mock_resolve.return_value = "abc123def456" # Mock SHA
353+
+ mock_exec = mocker.patch("asyncio.create_subprocess_exec", new_callable=AsyncMock)
354+
+ mock_process = AsyncMock()
355+
+ mock_process.communicate.return_value = (b"302\n", b"")
356+
+ mock_process.returncode = 0 # Simulate successful request
357+
+ mock_exec.return_value = mock_process
358+
359+
- test_token = "token123" # noqa: S105
360+
- result = await check_repo_exists("https://github.com/test/repo", token=test_token)
361+
+ repo_exists = await check_repo_exists(DEMO_URL)
362+
363+
- assert result is True
364+
- mock_resolve.assert_called_once_with("https://github.com/test/repo", "HEAD", token=test_token)
365+
+ assert repo_exists is False

examples/mcp-config.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@
88
}
99
}
1010
}
11-
}
11+
}

examples/mcp_client_example.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
import asyncio
2+
23
from mcp.client.session import ClientSession
34
from mcp.client.stdio import StdioServerParameters, stdio_client
45

56

67
async def main():
78
async with stdio_client(
8-
StdioServerParameters(command="gitingest", args=["--mcp-server"])
9+
StdioServerParameters(command="gitingest", args=["--mcp-server"]),
910
) as (read, write):
1011
async with ClientSession(read, write) as session:
1112
await session.initialize()
@@ -18,7 +19,9 @@ async def main():
1819

1920
# Call the ingest_repository tool
2021
print("\n📞 Appel de l'outil ingest_repository...")
21-
result = await session.call_tool("ingest_repository", {"source": "https://github.com/coderamp-labs/gitingest"})
22+
result = await session.call_tool(
23+
"ingest_repository", {"source": "https://github.com/coderamp-labs/gitingest"}
24+
)
2225
print(result)
2326

2427

0 commit comments

Comments
 (0)