From e3f1b5433f0f6737cb278f8b9845cf3fed20450d Mon Sep 17 00:00:00 2001 From: ix-56h Date: Thu, 3 Jul 2025 23:00:31 +0200 Subject: [PATCH 1/3] replace curl by httpx --- pyproject.toml | 1 + requirements.txt | 1 + src/server/query_processor.py | 1 + tests/test_git_utils.py | 7 +------ 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 641f9c30..f1dfbf79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "pathspec>=0.12.1", "typing_extensions>= 4.0.0; python_version < '3.10'", "uvicorn>=0.11.7", # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2020-150) + "httpx>= 0.28.1", ] license = {file = "LICENSE"} diff --git a/requirements.txt b/requirements.txt index 74042c48..830bb0e2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ click>=8.0.0 fastapi[standard]>=0.109.1 # Vulnerable to https://osv.dev/vulnerability/PYSEC-2024-38 +httpx>=0.28.1 pathspec>=0.12.1 pydantic python-dotenv diff --git a/src/server/query_processor.py b/src/server/query_processor.py index c5a15e8e..3a3d7909 100644 --- a/src/server/query_processor.py +++ b/src/server/query_processor.py @@ -10,6 +10,7 @@ from gitingest.query_parser import IngestionQuery, parse_query from gitingest.utils.git_utils import validate_github_token from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse + from server.server_config import MAX_DISPLAY_SIZE from server.server_utils import Colors, log_slider_to_size diff --git a/tests/test_git_utils.py b/tests/test_git_utils.py index 2b82b043..48408130 100644 --- a/tests/test_git_utils.py +++ b/tests/test_git_utils.py @@ -12,12 +12,7 @@ import pytest from gitingest.utils.exceptions import InvalidGitHubTokenError -from gitingest.utils.git_utils import ( - create_git_auth_header, - create_git_command, - is_github_host, - validate_github_token, -) +from gitingest.utils.git_utils import create_git_auth_header, create_git_command, is_github_host, validate_github_token if TYPE_CHECKING: from pathlib import Path From 16fa2c6c071f112951d8fddd8302f4db9e2921dd Mon Sep 17 00:00:00 2001 From: ix-56h Date: Fri, 4 Jul 2025 00:57:18 +0200 Subject: [PATCH 2/3] add httpx --- pyproject.toml | 2 +- src/gitingest/utils/git_utils.py | 55 ++++++++++++-------------------- 2 files changed, 21 insertions(+), 36 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f1dfbf79..7241d8ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,6 +7,7 @@ requires-python = ">= 3.8" dependencies = [ "click>=8.0.0", "fastapi[standard]>=0.109.1", # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2024-38) + "httpx>= 0.28.1", "pydantic", "python-dotenv", "slowapi", @@ -15,7 +16,6 @@ dependencies = [ "pathspec>=0.12.1", "typing_extensions>= 4.0.0; python_version < '3.10'", "uvicorn>=0.11.7", # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2020-150) - "httpx>= 0.28.1", ] license = {file = "LICENSE"} diff --git a/src/gitingest/utils/git_utils.py b/src/gitingest/utils/git_utils.py index 2e1e2ebb..0b5abc3a 100644 --- a/src/gitingest/utils/git_utils.py +++ b/src/gitingest/utils/git_utils.py @@ -4,11 +4,11 @@ import asyncio import base64 -import os import re from typing import Final from urllib.parse import urlparse +import httpx from starlette.status import ( HTTP_200_OK, HTTP_301_MOVED_PERMANENTLY, @@ -115,46 +115,31 @@ async def check_repo_exists(url: str, token: str | None = None) -> bool: If the host returns an unrecognised status code. """ - # TODO: use `requests` instead of `curl` - cmd: list[str] = [ - "curl", - "--silent", # Suppress output - "--location", # Follow redirects - "--write-out", - "%{http_code}", # Write the HTTP status code to stdout - "-o", - os.devnull, - ] + headers = {} if token and is_github_host(url): host, owner, repo = _parse_github_url(url) # Public GitHub vs. GitHub Enterprise base_api = "https://api.github.com" if host == "github.com" else f"https://{host}/api/v3" url = f"{base_api}/repos/{owner}/{repo}" - cmd += ["--header", f"Authorization: Bearer {token}"] - - cmd.append(url) - - proc = await asyncio.create_subprocess_exec( - *cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - stdout, _ = await proc.communicate() - - if proc.returncode != 0: - return False - - status = int(stdout.decode().strip()) - if status in {HTTP_200_OK, HTTP_301_MOVED_PERMANENTLY}: - return True - # TODO: handle 302 redirects - if status in {HTTP_404_NOT_FOUND, HTTP_302_FOUND}: - return False - if status in {HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN}: - return False - msg = f"Unexpected HTTP status {status} for {url}" - raise RuntimeError(msg) + headers["Authorization"] = f"Bearer {token}" + + async with httpx.AsyncClient(follow_redirects=True) as client: + try: + response = await client.head(url, headers=headers) + status = response.status_code + + if status in {HTTP_200_OK, HTTP_301_MOVED_PERMANENTLY}: + return True + # TODO: handle 302 redirects + if status in {HTTP_404_NOT_FOUND, HTTP_302_FOUND}: + return False + if status in {HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN}: + return False + msg = f"Unexpected HTTP status {status} for {url}" + raise RuntimeError(msg) + except httpx.RequestError: + return False def _parse_github_url(url: str) -> tuple[str, str, str]: From 7bfa2418daa2ebbb580ee4faa0d91bd498376cd9 Mon Sep 17 00:00:00 2001 From: Filip Christiansen <22807962+filipchristiansen@users.noreply.github.com> Date: Fri, 4 Jul 2025 11:15:21 +0200 Subject: [PATCH 3/3] fix httpx tests etc --- pyproject.toml | 4 +-- requirements.txt | 2 +- src/gitingest/utils/git_utils.py | 29 ++++++---------- src/server/query_processor.py | 1 - tests/test_clone.py | 58 +++++++++----------------------- 5 files changed, 28 insertions(+), 66 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7241d8ce..8555c2fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,13 +7,13 @@ requires-python = ">= 3.8" dependencies = [ "click>=8.0.0", "fastapi[standard]>=0.109.1", # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2024-38) - "httpx>= 0.28.1", + "httpx", + "pathspec>=0.12.1", "pydantic", "python-dotenv", "slowapi", "starlette>=0.40.0", # Minimum safe release (https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw) "tiktoken>=0.7.0", # Support for o200k_base encoding - "pathspec>=0.12.1", "typing_extensions>= 4.0.0; python_version < '3.10'", "uvicorn>=0.11.7", # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2020-150) ] diff --git a/requirements.txt b/requirements.txt index 830bb0e2..f9f9b50a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ click>=8.0.0 fastapi[standard]>=0.109.1 # Vulnerable to https://osv.dev/vulnerability/PYSEC-2024-38 -httpx>=0.28.1 +httpx pathspec>=0.12.1 pydantic python-dotenv diff --git a/src/gitingest/utils/git_utils.py b/src/gitingest/utils/git_utils.py index 0b5abc3a..70b4cf4b 100644 --- a/src/gitingest/utils/git_utils.py +++ b/src/gitingest/utils/git_utils.py @@ -9,14 +9,7 @@ from urllib.parse import urlparse import httpx -from starlette.status import ( - HTTP_200_OK, - HTTP_301_MOVED_PERMANENTLY, - HTTP_302_FOUND, - HTTP_401_UNAUTHORIZED, - HTTP_403_FORBIDDEN, - HTTP_404_NOT_FOUND, -) +from starlette.status import HTTP_200_OK, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND from gitingest.utils.compat_func import removesuffix from gitingest.utils.exceptions import InvalidGitHubTokenError @@ -127,20 +120,18 @@ async def check_repo_exists(url: str, token: str | None = None) -> bool: async with httpx.AsyncClient(follow_redirects=True) as client: try: response = await client.head(url, headers=headers) - status = response.status_code - - if status in {HTTP_200_OK, HTTP_301_MOVED_PERMANENTLY}: - return True - # TODO: handle 302 redirects - if status in {HTTP_404_NOT_FOUND, HTTP_302_FOUND}: - return False - if status in {HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN}: - return False - msg = f"Unexpected HTTP status {status} for {url}" - raise RuntimeError(msg) except httpx.RequestError: return False + status_code = response.status_code + + if status_code == HTTP_200_OK: + return True + if status_code in {HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND}: + return False + msg = f"Unexpected HTTP status {status_code} for {url}" + raise RuntimeError(msg) + def _parse_github_url(url: str) -> tuple[str, str, str]: """Parse a GitHub URL and return (hostname, owner, repo). diff --git a/src/server/query_processor.py b/src/server/query_processor.py index 3a3d7909..c5a15e8e 100644 --- a/src/server/query_processor.py +++ b/src/server/query_processor.py @@ -10,7 +10,6 @@ from gitingest.query_parser import IngestionQuery, parse_query from gitingest.utils.git_utils import validate_github_token from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse - from server.server_config import MAX_DISPLAY_SIZE from server.server_utils import Colors, log_slider_to_size diff --git a/tests/test_clone.py b/tests/test_clone.py index 9ffaa376..4048543c 100644 --- a/tests/test_clone.py +++ b/tests/test_clone.py @@ -9,8 +9,10 @@ from pathlib import Path from unittest.mock import AsyncMock +import httpx import pytest from pytest_mock import MockerFixture +from starlette.status import HTTP_200_OK, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND from gitingest.clone import clone_repo from gitingest.schemas import CloneConfig @@ -87,35 +89,24 @@ async def test_clone_nonexistent_repository(repo_exists_true: AsyncMock) -> None @pytest.mark.asyncio @pytest.mark.parametrize( - ("mock_stdout", "return_code", "expected"), + ("status_code", "expected"), [ - (b"200\n", 0, True), # Existing repo - (b"404\n", 0, False), # Non-existing repo - (b"200\n", 1, False), # Failed request + (HTTP_200_OK, True), + (HTTP_401_UNAUTHORIZED, False), + (HTTP_403_FORBIDDEN, False), + (HTTP_404_NOT_FOUND, False), ], ) -async def test_check_repo_exists( - mock_stdout: bytes, - *, - return_code: int, - expected: bool, - mocker: MockerFixture, -) -> None: - """Test the ``check_repo_exists`` function with different Git HTTP responses. - - Given various stdout lines and return codes: - When ``check_repo_exists`` is called, - Then it should correctly indicate whether the repository exists. - """ - mock_exec = mocker.patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) - mock_process = AsyncMock() - mock_process.communicate.return_value = (mock_stdout, b"") - mock_process.returncode = return_code - mock_exec.return_value = mock_process +async def test_check_repo_exists(status_code: int, *, expected: bool, mocker: MockerFixture) -> None: + """Verify that ``check_repo_exists`` interprets httpx results correctly.""" + mock_client = AsyncMock() + mock_client.__aenter__.return_value = mock_client # context-manager protocol + mock_client.head.return_value = httpx.Response(status_code=status_code) + mocker.patch("httpx.AsyncClient", return_value=mock_client) - repo_exists = await check_repo_exists(DEMO_URL) + result = await check_repo_exists(DEMO_URL) - assert repo_exists is expected + assert result is expected @pytest.mark.asyncio @@ -218,25 +209,6 @@ async def test_check_repo_exists_with_redirect(mocker: MockerFixture) -> None: assert repo_exists is False -@pytest.mark.asyncio -async def test_check_repo_exists_with_permanent_redirect(mocker: MockerFixture) -> None: - """Test ``check_repo_exists`` when a permanent redirect (301) is returned. - - Given a URL that responds with "301 Found": - When ``check_repo_exists`` is called, - Then it should return ``True``, indicating the repo may exist at the new location. - """ - mock_exec = mocker.patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) - mock_process = AsyncMock() - mock_process.communicate.return_value = (b"301\n", b"") - mock_process.returncode = 0 # Simulate successful request - mock_exec.return_value = mock_process - - repo_exists = await check_repo_exists(DEMO_URL) - - assert repo_exists - - @pytest.mark.asyncio async def test_clone_with_timeout(run_command_mock: AsyncMock) -> None: """Test cloning a repository when a timeout occurs.