diff --git a/pyproject.toml b/pyproject.toml index a6792cb9..c0f38d1b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,12 +7,13 @@ requires-python = ">= 3.8" dependencies = [ "click>=8.0.0", "fastapi[standard]>=0.109.1", # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2024-38) + "httpx", + "pathspec>=0.12.1", "pydantic", "python-dotenv", "slowapi", "starlette>=0.40.0", # Minimum safe release (https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw) "tiktoken>=0.7.0", # Support for o200k_base encoding - "pathspec>=0.12.1", "typing_extensions>= 4.0.0; python_version < '3.10'", "uvicorn>=0.11.7", # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2020-150) ] diff --git a/requirements.txt b/requirements.txt index 74042c48..f9f9b50a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ click>=8.0.0 fastapi[standard]>=0.109.1 # Vulnerable to https://osv.dev/vulnerability/PYSEC-2024-38 +httpx pathspec>=0.12.1 pydantic python-dotenv diff --git a/src/gitingest/utils/git_utils.py b/src/gitingest/utils/git_utils.py index 2e1e2ebb..70b4cf4b 100644 --- a/src/gitingest/utils/git_utils.py +++ b/src/gitingest/utils/git_utils.py @@ -4,19 +4,12 @@ import asyncio import base64 -import os import re from typing import Final from urllib.parse import urlparse -from starlette.status import ( - HTTP_200_OK, - HTTP_301_MOVED_PERMANENTLY, - HTTP_302_FOUND, - HTTP_401_UNAUTHORIZED, - HTTP_403_FORBIDDEN, - HTTP_404_NOT_FOUND, -) +import httpx +from starlette.status import HTTP_200_OK, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND from gitingest.utils.compat_func import removesuffix from gitingest.utils.exceptions import InvalidGitHubTokenError @@ -115,45 +108,28 @@ async def check_repo_exists(url: str, token: str | None = None) -> bool: If the host returns an unrecognised status code. """ - # TODO: use `requests` instead of `curl` - cmd: list[str] = [ - "curl", - "--silent", # Suppress output - "--location", # Follow redirects - "--write-out", - "%{http_code}", # Write the HTTP status code to stdout - "-o", - os.devnull, - ] + headers = {} if token and is_github_host(url): host, owner, repo = _parse_github_url(url) # Public GitHub vs. GitHub Enterprise base_api = "https://api.github.com" if host == "github.com" else f"https://{host}/api/v3" url = f"{base_api}/repos/{owner}/{repo}" - cmd += ["--header", f"Authorization: Bearer {token}"] + headers["Authorization"] = f"Bearer {token}" - cmd.append(url) + async with httpx.AsyncClient(follow_redirects=True) as client: + try: + response = await client.head(url, headers=headers) + except httpx.RequestError: + return False - proc = await asyncio.create_subprocess_exec( - *cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - stdout, _ = await proc.communicate() + status_code = response.status_code - if proc.returncode != 0: - return False - - status = int(stdout.decode().strip()) - if status in {HTTP_200_OK, HTTP_301_MOVED_PERMANENTLY}: + if status_code == HTTP_200_OK: return True - # TODO: handle 302 redirects - if status in {HTTP_404_NOT_FOUND, HTTP_302_FOUND}: - return False - if status in {HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN}: + if status_code in {HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND}: return False - msg = f"Unexpected HTTP status {status} for {url}" + msg = f"Unexpected HTTP status {status_code} for {url}" raise RuntimeError(msg) diff --git a/tests/test_clone.py b/tests/test_clone.py index 9ffaa376..4048543c 100644 --- a/tests/test_clone.py +++ b/tests/test_clone.py @@ -9,8 +9,10 @@ from pathlib import Path from unittest.mock import AsyncMock +import httpx import pytest from pytest_mock import MockerFixture +from starlette.status import HTTP_200_OK, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND from gitingest.clone import clone_repo from gitingest.schemas import CloneConfig @@ -87,35 +89,24 @@ async def test_clone_nonexistent_repository(repo_exists_true: AsyncMock) -> None @pytest.mark.asyncio @pytest.mark.parametrize( - ("mock_stdout", "return_code", "expected"), + ("status_code", "expected"), [ - (b"200\n", 0, True), # Existing repo - (b"404\n", 0, False), # Non-existing repo - (b"200\n", 1, False), # Failed request + (HTTP_200_OK, True), + (HTTP_401_UNAUTHORIZED, False), + (HTTP_403_FORBIDDEN, False), + (HTTP_404_NOT_FOUND, False), ], ) -async def test_check_repo_exists( - mock_stdout: bytes, - *, - return_code: int, - expected: bool, - mocker: MockerFixture, -) -> None: - """Test the ``check_repo_exists`` function with different Git HTTP responses. - - Given various stdout lines and return codes: - When ``check_repo_exists`` is called, - Then it should correctly indicate whether the repository exists. - """ - mock_exec = mocker.patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) - mock_process = AsyncMock() - mock_process.communicate.return_value = (mock_stdout, b"") - mock_process.returncode = return_code - mock_exec.return_value = mock_process +async def test_check_repo_exists(status_code: int, *, expected: bool, mocker: MockerFixture) -> None: + """Verify that ``check_repo_exists`` interprets httpx results correctly.""" + mock_client = AsyncMock() + mock_client.__aenter__.return_value = mock_client # context-manager protocol + mock_client.head.return_value = httpx.Response(status_code=status_code) + mocker.patch("httpx.AsyncClient", return_value=mock_client) - repo_exists = await check_repo_exists(DEMO_URL) + result = await check_repo_exists(DEMO_URL) - assert repo_exists is expected + assert result is expected @pytest.mark.asyncio @@ -218,25 +209,6 @@ async def test_check_repo_exists_with_redirect(mocker: MockerFixture) -> None: assert repo_exists is False -@pytest.mark.asyncio -async def test_check_repo_exists_with_permanent_redirect(mocker: MockerFixture) -> None: - """Test ``check_repo_exists`` when a permanent redirect (301) is returned. - - Given a URL that responds with "301 Found": - When ``check_repo_exists`` is called, - Then it should return ``True``, indicating the repo may exist at the new location. - """ - mock_exec = mocker.patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) - mock_process = AsyncMock() - mock_process.communicate.return_value = (b"301\n", b"") - mock_process.returncode = 0 # Simulate successful request - mock_exec.return_value = mock_process - - repo_exists = await check_repo_exists(DEMO_URL) - - assert repo_exists - - @pytest.mark.asyncio async def test_clone_with_timeout(run_command_mock: AsyncMock) -> None: """Test cloning a repository when a timeout occurs. diff --git a/tests/test_git_utils.py b/tests/test_git_utils.py index 2b82b043..48408130 100644 --- a/tests/test_git_utils.py +++ b/tests/test_git_utils.py @@ -12,12 +12,7 @@ import pytest from gitingest.utils.exceptions import InvalidGitHubTokenError -from gitingest.utils.git_utils import ( - create_git_auth_header, - create_git_command, - is_github_host, - validate_github_token, -) +from gitingest.utils.git_utils import create_git_auth_header, create_git_command, is_github_host, validate_github_token if TYPE_CHECKING: from pathlib import Path