Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@ requires-python = ">= 3.8"
dependencies = [
"click>=8.0.0",
"fastapi[standard]>=0.109.1", # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2024-38)
"httpx",
"pathspec>=0.12.1",
"pydantic",
"python-dotenv",
"slowapi",
"starlette>=0.40.0", # Minimum safe release (https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw)
"tiktoken>=0.7.0", # Support for o200k_base encoding
"pathspec>=0.12.1",
"typing_extensions>= 4.0.0; python_version < '3.10'",
"uvicorn>=0.11.7", # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2020-150)
]
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
click>=8.0.0
fastapi[standard]>=0.109.1 # Vulnerable to https://osv.dev/vulnerability/PYSEC-2024-38
httpx
pathspec>=0.12.1
pydantic
python-dotenv
Expand Down
50 changes: 13 additions & 37 deletions src/gitingest/utils/git_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,12 @@

import asyncio
import base64
import os
import re
from typing import Final
from urllib.parse import urlparse

from starlette.status import (
HTTP_200_OK,
HTTP_301_MOVED_PERMANENTLY,
HTTP_302_FOUND,
HTTP_401_UNAUTHORIZED,
HTTP_403_FORBIDDEN,
HTTP_404_NOT_FOUND,
)
import httpx
from starlette.status import HTTP_200_OK, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND

from gitingest.utils.compat_func import removesuffix
from gitingest.utils.exceptions import InvalidGitHubTokenError
Expand Down Expand Up @@ -115,45 +108,28 @@ async def check_repo_exists(url: str, token: str | None = None) -> bool:
If the host returns an unrecognised status code.

"""
# TODO: use `requests` instead of `curl`
cmd: list[str] = [
"curl",
"--silent", # Suppress output
"--location", # Follow redirects
"--write-out",
"%{http_code}", # Write the HTTP status code to stdout
"-o",
os.devnull,
]
headers = {}

if token and is_github_host(url):
host, owner, repo = _parse_github_url(url)
# Public GitHub vs. GitHub Enterprise
base_api = "https://api.github.com" if host == "github.com" else f"https://{host}/api/v3"
url = f"{base_api}/repos/{owner}/{repo}"
cmd += ["--header", f"Authorization: Bearer {token}"]
headers["Authorization"] = f"Bearer {token}"

cmd.append(url)
async with httpx.AsyncClient(follow_redirects=True) as client:
try:
response = await client.head(url, headers=headers)
except httpx.RequestError:
return False

proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, _ = await proc.communicate()
status_code = response.status_code

if proc.returncode != 0:
return False

status = int(stdout.decode().strip())
if status in {HTTP_200_OK, HTTP_301_MOVED_PERMANENTLY}:
if status_code == HTTP_200_OK:
return True
# TODO: handle 302 redirects
if status in {HTTP_404_NOT_FOUND, HTTP_302_FOUND}:
return False
if status in {HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN}:
if status_code in {HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND}:
return False
msg = f"Unexpected HTTP status {status} for {url}"
msg = f"Unexpected HTTP status {status_code} for {url}"
raise RuntimeError(msg)


Expand Down
58 changes: 15 additions & 43 deletions tests/test_clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@
from pathlib import Path
from unittest.mock import AsyncMock

import httpx
import pytest
from pytest_mock import MockerFixture
from starlette.status import HTTP_200_OK, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND

from gitingest.clone import clone_repo
from gitingest.schemas import CloneConfig
Expand Down Expand Up @@ -87,35 +89,24 @@ async def test_clone_nonexistent_repository(repo_exists_true: AsyncMock) -> None

@pytest.mark.asyncio
@pytest.mark.parametrize(
("mock_stdout", "return_code", "expected"),
("status_code", "expected"),
[
(b"200\n", 0, True), # Existing repo
(b"404\n", 0, False), # Non-existing repo
(b"200\n", 1, False), # Failed request
(HTTP_200_OK, True),
(HTTP_401_UNAUTHORIZED, False),
(HTTP_403_FORBIDDEN, False),
(HTTP_404_NOT_FOUND, False),
],
)
async def test_check_repo_exists(
mock_stdout: bytes,
*,
return_code: int,
expected: bool,
mocker: MockerFixture,
) -> None:
"""Test the ``check_repo_exists`` function with different Git HTTP responses.

Given various stdout lines and return codes:
When ``check_repo_exists`` is called,
Then it should correctly indicate whether the repository exists.
"""
mock_exec = mocker.patch("asyncio.create_subprocess_exec", new_callable=AsyncMock)
mock_process = AsyncMock()
mock_process.communicate.return_value = (mock_stdout, b"")
mock_process.returncode = return_code
mock_exec.return_value = mock_process
async def test_check_repo_exists(status_code: int, *, expected: bool, mocker: MockerFixture) -> None:
"""Verify that ``check_repo_exists`` interprets httpx results correctly."""
mock_client = AsyncMock()
mock_client.__aenter__.return_value = mock_client # context-manager protocol
mock_client.head.return_value = httpx.Response(status_code=status_code)
mocker.patch("httpx.AsyncClient", return_value=mock_client)

repo_exists = await check_repo_exists(DEMO_URL)
result = await check_repo_exists(DEMO_URL)

assert repo_exists is expected
assert result is expected


@pytest.mark.asyncio
Expand Down Expand Up @@ -218,25 +209,6 @@ async def test_check_repo_exists_with_redirect(mocker: MockerFixture) -> None:
assert repo_exists is False


@pytest.mark.asyncio
async def test_check_repo_exists_with_permanent_redirect(mocker: MockerFixture) -> None:
"""Test ``check_repo_exists`` when a permanent redirect (301) is returned.

Given a URL that responds with "301 Found":
When ``check_repo_exists`` is called,
Then it should return ``True``, indicating the repo may exist at the new location.
"""
mock_exec = mocker.patch("asyncio.create_subprocess_exec", new_callable=AsyncMock)
mock_process = AsyncMock()
mock_process.communicate.return_value = (b"301\n", b"")
mock_process.returncode = 0 # Simulate successful request
mock_exec.return_value = mock_process

repo_exists = await check_repo_exists(DEMO_URL)

assert repo_exists


@pytest.mark.asyncio
async def test_clone_with_timeout(run_command_mock: AsyncMock) -> None:
"""Test cloning a repository when a timeout occurs.
Expand Down
7 changes: 1 addition & 6 deletions tests/test_git_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,7 @@
import pytest

from gitingest.utils.exceptions import InvalidGitHubTokenError
from gitingest.utils.git_utils import (
create_git_auth_header,
create_git_command,
is_github_host,
validate_github_token,
)
from gitingest.utils.git_utils import create_git_auth_header, create_git_command, is_github_host, validate_github_token

if TYPE_CHECKING:
from pathlib import Path
Expand Down
Loading