Skip to content

Commit fd67762

Browse files
authored
Merge branch 'main' into 365_improve_ui_and_backend
2 parents 17266ab + 8445050 commit fd67762

File tree

8 files changed

+56
-93
lines changed

8 files changed

+56
-93
lines changed

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,13 @@ requires-python = ">= 3.8"
77
dependencies = [
88
"click>=8.0.0",
99
"fastapi[standard]>=0.109.1", # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2024-38)
10+
"httpx",
11+
"pathspec>=0.12.1",
1012
"pydantic",
1113
"python-dotenv",
1214
"slowapi",
1315
"starlette>=0.40.0", # Minimum safe release (https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw)
1416
"tiktoken>=0.7.0", # Support for o200k_base encoding
15-
"pathspec>=0.12.1",
1617
"typing_extensions>= 4.0.0; python_version < '3.10'",
1718
"uvicorn>=0.11.7", # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2020-150)
1819
]

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
click>=8.0.0
22
fastapi[standard]>=0.109.1 # Vulnerable to https://osv.dev/vulnerability/PYSEC-2024-38
3+
httpx
34
pathspec>=0.12.1
45
pydantic
56
python-dotenv

src/gitingest/utils/git_utils.py

Lines changed: 31 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,17 @@
44

55
import asyncio
66
import base64
7-
import os
87
import re
8+
import sys
99
from typing import Final
1010
from urllib.parse import urlparse
1111

12-
from starlette.status import (
13-
HTTP_200_OK,
14-
HTTP_301_MOVED_PERMANENTLY,
15-
HTTP_302_FOUND,
16-
HTTP_401_UNAUTHORIZED,
17-
HTTP_403_FORBIDDEN,
18-
HTTP_404_NOT_FOUND,
19-
)
12+
import httpx
13+
from starlette.status import HTTP_200_OK, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND
2014

2115
from gitingest.utils.compat_func import removesuffix
2216
from gitingest.utils.exceptions import InvalidGitHubTokenError
17+
from server.server_utils import Colors
2318

2419
# GitHub Personal-Access tokens (classic + fine-grained).
2520
# - ghp_ / gho_ / ghu_ / ghs_ / ghr_ → 36 alphanumerics
@@ -81,6 +76,8 @@ async def run_command(*args: str) -> tuple[bytes, bytes]:
8176
async def ensure_git_installed() -> None:
8277
"""Ensure Git is installed and accessible on the system.
8378
79+
On Windows, this also checks whether Git is configured to support long file paths.
80+
8481
Raises
8582
------
8683
RuntimeError
@@ -92,6 +89,20 @@ async def ensure_git_installed() -> None:
9289
except RuntimeError as exc:
9390
msg = "Git is not installed or not accessible. Please install Git first."
9491
raise RuntimeError(msg) from exc
92+
if sys.platform == "win32":
93+
try:
94+
stdout, _ = await run_command("git", "config", "core.longpaths")
95+
if stdout.decode().strip().lower() != "true":
96+
print(
97+
f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}Git clone may fail on Windows "
98+
f"due to long file paths:{Colors.END}",
99+
)
100+
print(f"{Colors.RED}To avoid this issue, consider enabling long path support with:{Colors.END}")
101+
print(f"{Colors.RED} git config --global core.longpaths true{Colors.END}")
102+
print(f"{Colors.RED}Note: This command may require administrator privileges.{Colors.END}")
103+
except RuntimeError:
104+
# Ignore if checking 'core.longpaths' fails.
105+
pass
95106

96107

97108
async def check_repo_exists(url: str, token: str | None = None) -> bool:
@@ -115,45 +126,28 @@ async def check_repo_exists(url: str, token: str | None = None) -> bool:
115126
If the host returns an unrecognised status code.
116127
117128
"""
118-
# TODO: use `requests` instead of `curl`
119-
cmd: list[str] = [
120-
"curl",
121-
"--silent", # Suppress output
122-
"--location", # Follow redirects
123-
"--write-out",
124-
"%{http_code}", # Write the HTTP status code to stdout
125-
"-o",
126-
os.devnull,
127-
]
129+
headers = {}
128130

129131
if token and is_github_host(url):
130132
host, owner, repo = _parse_github_url(url)
131133
# Public GitHub vs. GitHub Enterprise
132134
base_api = "https://api.github.com" if host == "github.com" else f"https://{host}/api/v3"
133135
url = f"{base_api}/repos/{owner}/{repo}"
134-
cmd += ["--header", f"Authorization: Bearer {token}"]
135-
136-
cmd.append(url)
136+
headers["Authorization"] = f"Bearer {token}"
137137

138-
proc = await asyncio.create_subprocess_exec(
139-
*cmd,
140-
stdout=asyncio.subprocess.PIPE,
141-
stderr=asyncio.subprocess.PIPE,
142-
)
143-
stdout, _ = await proc.communicate()
138+
async with httpx.AsyncClient(follow_redirects=True) as client:
139+
try:
140+
response = await client.head(url, headers=headers)
141+
except httpx.RequestError:
142+
return False
144143

145-
if proc.returncode != 0:
146-
return False
144+
status_code = response.status_code
147145

148-
status = int(stdout.decode().strip())
149-
if status in {HTTP_200_OK, HTTP_301_MOVED_PERMANENTLY}:
146+
if status_code == HTTP_200_OK:
150147
return True
151-
# TODO: handle 302 redirects
152-
if status in {HTTP_404_NOT_FOUND, HTTP_302_FOUND}:
153-
return False
154-
if status in {HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN}:
148+
if status_code in {HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND}:
155149
return False
156-
msg = f"Unexpected HTTP status {status} for {url}"
150+
msg = f"Unexpected HTTP status {status_code} for {url}"
157151
raise RuntimeError(msg)
158152

159153

src/server/main.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,20 +89,20 @@ async def robots() -> FileResponse:
8989
return FileResponse("static/robots.txt")
9090

9191

92-
@app.get("/llm.txt")
92+
@app.get("/llms.txt")
9393
async def llm_txt() -> FileResponse:
9494
"""Serve the llm.txt file to provide information about the site to LLMs.
9595
96-
**This endpoint serves the ``llm.txt`` file located in the static directory**
96+
**This endpoint serves the ``llms.txt`` file located in the static directory**
9797
to provide information about the site to Large Language Models (LLMs)
9898
and other AI systems that may be crawling the site.
9999
100100
**Returns**
101101
102-
- **FileResponse**: The ``llm.txt`` file located in the static directory
102+
- **FileResponse**: The ``llms.txt`` file located in the static directory
103103
104104
"""
105-
return FileResponse("static/llm.txt")
105+
return FileResponse("static/llms.txt")
106106

107107

108108
@app.get("/docs", response_class=HTMLResponse, include_in_schema=False)

src/server/templates/components/navbar.jinja

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
</div>
1212
{# Navigation with updated styling #}
1313
<nav class="flex items-center space-x-6">
14-
<a href="/llm.txt" class="link-bounce flex items-center text-gray-900">
14+
<a href="/llms.txt" class="link-bounce flex items-center text-gray-900">
1515
<span class="badge-new">NEW</span>
16-
/llm.txt
16+
/llms.txt
1717
</a>
1818
{# GitHub link #}
1919
<div class="flex items-center gap-2">
File renamed without changes.

tests/test_clone.py

Lines changed: 15 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@
99
from pathlib import Path
1010
from unittest.mock import AsyncMock
1111

12+
import httpx
1213
import pytest
1314
from pytest_mock import MockerFixture
15+
from starlette.status import HTTP_200_OK, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND
1416

1517
from gitingest.clone import clone_repo
1618
from gitingest.schemas import CloneConfig
@@ -87,35 +89,24 @@ async def test_clone_nonexistent_repository(repo_exists_true: AsyncMock) -> None
8789

8890
@pytest.mark.asyncio
8991
@pytest.mark.parametrize(
90-
("mock_stdout", "return_code", "expected"),
92+
("status_code", "expected"),
9193
[
92-
(b"200\n", 0, True), # Existing repo
93-
(b"404\n", 0, False), # Non-existing repo
94-
(b"200\n", 1, False), # Failed request
94+
(HTTP_200_OK, True),
95+
(HTTP_401_UNAUTHORIZED, False),
96+
(HTTP_403_FORBIDDEN, False),
97+
(HTTP_404_NOT_FOUND, False),
9598
],
9699
)
97-
async def test_check_repo_exists(
98-
mock_stdout: bytes,
99-
*,
100-
return_code: int,
101-
expected: bool,
102-
mocker: MockerFixture,
103-
) -> None:
104-
"""Test the ``check_repo_exists`` function with different Git HTTP responses.
105-
106-
Given various stdout lines and return codes:
107-
When ``check_repo_exists`` is called,
108-
Then it should correctly indicate whether the repository exists.
109-
"""
110-
mock_exec = mocker.patch("asyncio.create_subprocess_exec", new_callable=AsyncMock)
111-
mock_process = AsyncMock()
112-
mock_process.communicate.return_value = (mock_stdout, b"")
113-
mock_process.returncode = return_code
114-
mock_exec.return_value = mock_process
100+
async def test_check_repo_exists(status_code: int, *, expected: bool, mocker: MockerFixture) -> None:
101+
"""Verify that ``check_repo_exists`` interprets httpx results correctly."""
102+
mock_client = AsyncMock()
103+
mock_client.__aenter__.return_value = mock_client # context-manager protocol
104+
mock_client.head.return_value = httpx.Response(status_code=status_code)
105+
mocker.patch("httpx.AsyncClient", return_value=mock_client)
115106

116-
repo_exists = await check_repo_exists(DEMO_URL)
107+
result = await check_repo_exists(DEMO_URL)
117108

118-
assert repo_exists is expected
109+
assert result is expected
119110

120111

121112
@pytest.mark.asyncio
@@ -218,25 +209,6 @@ async def test_check_repo_exists_with_redirect(mocker: MockerFixture) -> None:
218209
assert repo_exists is False
219210

220211

221-
@pytest.mark.asyncio
222-
async def test_check_repo_exists_with_permanent_redirect(mocker: MockerFixture) -> None:
223-
"""Test ``check_repo_exists`` when a permanent redirect (301) is returned.
224-
225-
Given a URL that responds with "301 Found":
226-
When ``check_repo_exists`` is called,
227-
Then it should return ``True``, indicating the repo may exist at the new location.
228-
"""
229-
mock_exec = mocker.patch("asyncio.create_subprocess_exec", new_callable=AsyncMock)
230-
mock_process = AsyncMock()
231-
mock_process.communicate.return_value = (b"301\n", b"")
232-
mock_process.returncode = 0 # Simulate successful request
233-
mock_exec.return_value = mock_process
234-
235-
repo_exists = await check_repo_exists(DEMO_URL)
236-
237-
assert repo_exists
238-
239-
240212
@pytest.mark.asyncio
241213
async def test_clone_with_timeout(run_command_mock: AsyncMock) -> None:
242214
"""Test cloning a repository when a timeout occurs.

tests/test_git_utils.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,7 @@
1212
import pytest
1313

1414
from gitingest.utils.exceptions import InvalidGitHubTokenError
15-
from gitingest.utils.git_utils import (
16-
create_git_auth_header,
17-
create_git_command,
18-
is_github_host,
19-
validate_github_token,
20-
)
15+
from gitingest.utils.git_utils import create_git_auth_header, create_git_command, is_github_host, validate_github_token
2116

2217
if TYPE_CHECKING:
2318
from pathlib import Path

0 commit comments

Comments
 (0)