diff --git a/src/gitingest/schemas/ingestion.py b/src/gitingest/schemas/ingestion.py index 92572aeb..21369075 100644 --- a/src/gitingest/schemas/ingestion.py +++ b/src/gitingest/schemas/ingestion.py @@ -41,7 +41,7 @@ class IngestionQuery(BaseModel): # pylint: disable=too-many-instance-attributes tag : str | None The tag of the repository. max_file_size : int - The maximum file size to ingest (default: 10 MB). + The maximum file size to ingest in bytes (default: 10 MB). ignore_patterns : set[str] The patterns to ignore (default: ``set()``). include_patterns : set[str] | None diff --git a/src/server/models.py b/src/server/models.py index a1aed314..533da611 100644 --- a/src/server/models.py +++ b/src/server/models.py @@ -3,14 +3,16 @@ from __future__ import annotations from enum import Enum -from typing import Union +from typing import TYPE_CHECKING, Union from pydantic import BaseModel, Field, field_validator from gitingest.utils.compat_func import removesuffix +from server.server_config import MAX_FILE_SIZE_KB # needed for type checking (pydantic) -from server.form_types import IntForm, OptStrForm, StrForm # noqa: TC001 (typing-only-first-party-import) +if TYPE_CHECKING: + from server.form_types import IntForm, OptStrForm, StrForm class PatternType(str, Enum): @@ -39,7 +41,7 @@ class IngestRequest(BaseModel): """ input_text: str = Field(..., description="Git repository URL or slug to ingest") - max_file_size: int = Field(..., ge=0, le=500, description="File size slider position (0-500)") + max_file_size: int = Field(..., ge=1, le=MAX_FILE_SIZE_KB, description="File size in KB") pattern_type: PatternType = Field(default=PatternType.EXCLUDE, description="Pattern type for file filtering") pattern: str = Field(default="", description="Glob/regex pattern for file filtering") token: str | None = Field(default=None, description="GitHub PAT for private repositories") diff --git a/src/server/query_processor.py b/src/server/query_processor.py index 88d7ff50..172330ac 100644 --- a/src/server/query_processor.py +++ b/src/server/query_processor.py @@ -13,12 +13,12 @@ from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse, PatternType from server.s3_utils import generate_s3_file_path, is_s3_enabled, upload_to_s3 from server.server_config import MAX_DISPLAY_SIZE -from server.server_utils import Colors, log_slider_to_size +from server.server_utils import Colors async def process_query( input_text: str, - slider_position: int, + max_file_size: int, pattern_type: PatternType, pattern: str, token: str | None = None, @@ -32,8 +32,8 @@ async def process_query( ---------- input_text : str Input text provided by the user, typically a Git repository URL or slug. - slider_position : int - Position of the slider, representing the maximum file size in the query. + max_file_size : int + Max file size in KB to be include in the digest. pattern_type : PatternType Type of pattern to use (either "include" or "exclude") pattern : str @@ -55,8 +55,6 @@ async def process_query( if token: validate_github_token(token) - max_file_size = log_slider_to_size(slider_position) - try: query = await parse_remote_repo(input_text, token=token) except Exception as exc: @@ -65,7 +63,7 @@ async def process_query( return IngestErrorResponse(error=str(exc)) query.url = cast("str", query.url) - query.max_file_size = max_file_size + query.max_file_size = max_file_size * 1024 # Convert to bytes since we currently use KB in higher levels query.ignore_patterns, query.include_patterns = process_patterns( exclude_patterns=pattern if pattern_type == PatternType.EXCLUDE else None, include_patterns=pattern if pattern_type == PatternType.INCLUDE else None, @@ -142,7 +140,7 @@ async def process_query( digest_url=digest_url, tree=tree, content=content, - default_max_file_size=slider_position, + default_max_file_size=max_file_size, pattern_type=pattern_type, pattern=pattern, ) diff --git a/src/server/routers/ingest.py b/src/server/routers/ingest.py index 42efefdf..ce9e6512 100644 --- a/src/server/routers/ingest.py +++ b/src/server/routers/ingest.py @@ -11,7 +11,7 @@ from server.models import IngestRequest from server.routers_utils import COMMON_INGEST_RESPONSES, _perform_ingestion from server.s3_utils import is_s3_enabled -from server.server_config import MAX_DISPLAY_SIZE +from server.server_config import DEFAULT_FILE_SIZE_KB from server.server_utils import limiter ingest_counter = Counter("gitingest_ingest_total", "Number of ingests", ["status", "url"]) @@ -58,7 +58,7 @@ async def api_ingest_get( request: Request, # noqa: ARG001 (unused-function-argument) # pylint: disable=unused-argument user: str, repository: str, - max_file_size: int = MAX_DISPLAY_SIZE, + max_file_size: int = DEFAULT_FILE_SIZE_KB, pattern_type: str = "exclude", pattern: str = "", token: str = "", @@ -74,7 +74,7 @@ async def api_ingest_get( - **repository** (`str`): GitHub repository name **Query Parameters** - - **max_file_size** (`int`, optional): Maximum file size to include in the digest (default: 50 KB) + - **max_file_size** (`int`, optional): Maximum file size in KB to include in the digest (default: 5120 KB) - **pattern_type** (`str`, optional): Type of pattern to use ("include" or "exclude", default: "exclude") - **pattern** (`str`, optional): Pattern to include or exclude in the query (default: "") - **token** (`str`, optional): GitHub personal access token for private repositories (default: "") diff --git a/src/server/routers_utils.py b/src/server/routers_utils.py index 83242e26..3eaf0e59 100644 --- a/src/server/routers_utils.py +++ b/src/server/routers_utils.py @@ -33,7 +33,7 @@ async def _perform_ingestion( result = await process_query( input_text=input_text, - slider_position=max_file_size, + max_file_size=max_file_size, pattern_type=pattern_type, pattern=pattern, token=token, diff --git a/src/server/server_config.py b/src/server/server_config.py index 0257db8b..d0b51c4d 100644 --- a/src/server/server_config.py +++ b/src/server/server_config.py @@ -10,8 +10,8 @@ DELETE_REPO_AFTER: int = 60 * 60 # In seconds (1 hour) # Slider configuration (if updated, update the logSliderToSize function in src/static/js/utils.js) -MAX_FILE_SIZE_KB: int = 100 * 1024 # 100 MB -MAX_SLIDER_POSITION: int = 500 # Maximum slider position +DEFAULT_FILE_SIZE_KB: int = 5 * 1024 # 5 mb +MAX_FILE_SIZE_KB: int = 100 * 1024 # 100 mb EXAMPLE_REPOS: list[dict[str, str]] = [ {"name": "Gitingest", "url": "https://github.com/coderamp-labs/gitingest"}, diff --git a/src/server/server_utils.py b/src/server/server_utils.py index b0371661..ee6f9eca 100644 --- a/src/server/server_utils.py +++ b/src/server/server_utils.py @@ -1,7 +1,6 @@ """Utility functions for the server.""" import asyncio -import math import shutil import time from contextlib import asynccontextmanager, suppress @@ -15,7 +14,7 @@ from slowapi.util import get_remote_address from gitingest.config import TMP_BASE_PATH -from server.server_config import DELETE_REPO_AFTER, MAX_FILE_SIZE_KB, MAX_SLIDER_POSITION +from server.server_config import DELETE_REPO_AFTER # Initialize a rate limiter limiter = Limiter(key_func=get_remote_address) @@ -161,24 +160,6 @@ def _append_line(path: Path, line: str) -> None: fp.write(f"{line}\n") -def log_slider_to_size(position: int) -> int: - """Convert a slider position to a file size in bytes using a logarithmic scale. - - Parameters - ---------- - position : int - Slider position ranging from 0 to 500. - - Returns - ------- - int - File size in bytes corresponding to the slider position. - - """ - maxv = math.log(MAX_FILE_SIZE_KB) - return round(math.exp(maxv * pow(position / MAX_SLIDER_POSITION, 1.5))) * 1024 - - ## Color printing utility class Colors: """ANSI color codes.""" diff --git a/src/server/templates/components/git_form.jinja b/src/server/templates/components/git_form.jinja index 8ea0821f..e2e7c91c 100644 --- a/src/server/templates/components/git_form.jinja +++ b/src/server/templates/components/git_form.jinja @@ -76,12 +76,12 @@ +