diff --git a/src/gitingest/entrypoint.py b/src/gitingest/entrypoint.py index 0af4a4ba..b9a50477 100644 --- a/src/gitingest/entrypoint.py +++ b/src/gitingest/entrypoint.py @@ -13,7 +13,7 @@ async def ingest_async( source: str, - max_file_size: int = 10 * 1024 * 1024, # 10 MB + max_file_size: float = 10 * 1024 * 1024, # 10 MB include_patterns: Optional[Union[str, Set[str]]] = None, exclude_patterns: Optional[Union[str, Set[str]]] = None, branch: Optional[str] = None, @@ -30,7 +30,7 @@ async def ingest_async( ---------- source : str The source to analyze, which can be a URL (for a Git repository) or a local directory path. - max_file_size : int + max_file_size : float Maximum allowed file size for file ingestion. Files larger than this size are ignored, by default 10*1024*1024 (10 MB). include_patterns : Union[str, Set[str]], optional @@ -98,7 +98,7 @@ async def ingest_async( def ingest( source: str, - max_file_size: int = 10 * 1024 * 1024, # 10 MB + max_file_size: float = 10 * 1024 * 1024, # 10 MB include_patterns: Optional[Union[str, Set[str]]] = None, exclude_patterns: Optional[Union[str, Set[str]]] = None, branch: Optional[str] = None, @@ -115,7 +115,7 @@ def ingest( ---------- source : str The source to analyze, which can be a URL (for a Git repository) or a local directory path. - max_file_size : int + max_file_size : float Maximum allowed file size for file ingestion. Files larger than this size are ignored, by default 10*1024*1024 (10 MB). include_patterns : Union[str, Set[str]], optional diff --git a/src/gitingest/query_parsing.py b/src/gitingest/query_parsing.py index 5d547356..08f7cc45 100644 --- a/src/gitingest/query_parsing.py +++ b/src/gitingest/query_parsing.py @@ -25,7 +25,7 @@ async def parse_query( source: str, - max_file_size: int, + max_file_size: float, from_web: bool, include_patterns: Optional[Union[str, Set[str]]] = None, ignore_patterns: Optional[Union[str, Set[str]]] = None, @@ -41,7 +41,7 @@ async def parse_query( ---------- source : str The source URL or file path to parse. - max_file_size : int + max_file_size : float The maximum file size in bytes to include. from_web : bool Flag indicating whether the source is a web URL. diff --git a/src/gitingest/schemas/ingestion_schema.py b/src/gitingest/schemas/ingestion_schema.py index 02b1c678..edf484ab 100644 --- a/src/gitingest/schemas/ingestion_schema.py +++ b/src/gitingest/schemas/ingestion_schema.py @@ -54,7 +54,7 @@ class IngestionQuery(BaseModel): # pylint: disable=too-many-instance-attributes type: Optional[str] = None branch: Optional[str] = None commit: Optional[str] = None - max_file_size: int = Field(default=MAX_FILE_SIZE) + max_file_size: float = Field(default=MAX_FILE_SIZE) ignore_patterns: Optional[Set[str]] = None include_patterns: Optional[Set[str]] = None diff --git a/src/server/query_processor.py b/src/server/query_processor.py index 00b1c640..1d5f08a7 100644 --- a/src/server/query_processor.py +++ b/src/server/query_processor.py @@ -1,6 +1,7 @@ """Process a query by parsing input, cloning a repository, and generating a summary.""" from functools import partial +from typing import Optional from fastapi import Request from starlette.templating import _TemplateResponse @@ -15,10 +16,11 @@ async def process_query( request: Request, input_text: str, - slider_position: int, + slider_position: float, pattern_type: str = "exclude", pattern: str = "", is_index: bool = False, + exact_file_size: Optional[str] = None, ) -> _TemplateResponse: """ Process a query by parsing input, cloning a repository, and generating a summary. @@ -32,7 +34,7 @@ async def process_query( The HTTP request object. input_text : str Input text provided by the user, typically a Git repository URL or slug. - slider_position : int + slider_position : float Position of the slider, representing the maximum file size in the query. pattern_type : str Type of pattern to use, either "include" or "exclude" (default is "exclude"). @@ -40,7 +42,8 @@ async def process_query( Pattern to include or exclude in the query, depending on the pattern type. is_index : bool Flag indicating whether the request is for the index page (default is False). - + exact_file_size : str + The exact file size value in KB, which preserves decimal precision. Returns ------- _TemplateResponse @@ -62,7 +65,17 @@ async def process_query( template = "index.jinja" if is_index else "git.jinja" template_response = partial(templates.TemplateResponse, name=template) - max_file_size = log_slider_to_size(slider_position) + + # Use exact_file_size if provided, otherwise use the slider position + if exact_file_size is not None and exact_file_size.strip(): + try: + # Convert exact_file_size from KB to bytes + max_file_size = float(exact_file_size) * 1024 + except ValueError: + # If conversion fails, fall back to slider position + max_file_size = log_slider_to_size(int(slider_position)) + else: + max_file_size = log_slider_to_size(int(slider_position)) context = { "request": request, @@ -125,13 +138,16 @@ async def process_query( "tree": tree, "content": content, "ingest_id": query.id, + "exact_file_size": ( + exact_file_size if max_file_size < 1024 else max_file_size / 1024 + ), # Pass the exact file size back to the template } ) return template_response(context=context) -def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str) -> None: +def _print_query(url: str, max_file_size: float, pattern_type: str, pattern: str) -> None: """ Print a formatted summary of the query details, including the URL, file size, and pattern information, for easier debugging or logging. @@ -140,7 +156,7 @@ def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str) ---------- url : str The URL associated with the query. - max_file_size : int + max_file_size : float The maximum file size allowed for the query, in bytes. pattern_type : str Specifies the type of pattern to use, either "include" or "exclude". @@ -148,15 +164,23 @@ def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str) The actual pattern string to include or exclude in the query. """ print(f"{Colors.WHITE}{url:<20}{Colors.END}", end="") - if int(max_file_size / 1024) != 50: - print(f" | {Colors.YELLOW}Size: {int(max_file_size/1024)}kb{Colors.END}", end="") + if max_file_size / 1024 != 50.00: + # Format with up to 2 decimal places for KB values + kb_value = max_file_size / 1024 + # If it's a whole number, display as integer + if kb_value == int(kb_value): + kb_display = f"{int(kb_value)}kb" + else: + # Otherwise show up to 2 decimal places, removing trailing zeros + kb_display = f"{kb_value:.2f}".rstrip("0").rstrip(".") + "kb" + print(f" | {Colors.YELLOW}Size: {kb_display}{Colors.END}", end="") if pattern_type == "include" and pattern != "": print(f" | {Colors.YELLOW}Include {pattern}{Colors.END}", end="") elif pattern_type == "exclude" and pattern != "": print(f" | {Colors.YELLOW}Exclude {pattern}{Colors.END}", end="") -def _print_error(url: str, e: Exception, max_file_size: int, pattern_type: str, pattern: str) -> None: +def _print_error(url: str, e: Exception, max_file_size: float, pattern_type: str, pattern: str) -> None: """ Print a formatted error message including the URL, file size, pattern details, and the exception encountered, for debugging or logging purposes. @@ -167,7 +191,7 @@ def _print_error(url: str, e: Exception, max_file_size: int, pattern_type: str, The URL associated with the query that caused the error. e : Exception The exception raised during the query or process. - max_file_size : int + max_file_size : float The maximum file size allowed for the query, in bytes. pattern_type : str Specifies the type of pattern to use, either "include" or "exclude". @@ -179,7 +203,7 @@ def _print_error(url: str, e: Exception, max_file_size: int, pattern_type: str, print(f" | {Colors.RED}{e}{Colors.END}") -def _print_success(url: str, max_file_size: int, pattern_type: str, pattern: str, summary: str) -> None: +def _print_success(url: str, max_file_size: float, pattern_type: str, pattern: str, summary: str) -> None: """ Print a formatted success message, including the URL, file size, pattern details, and a summary with estimated tokens, for debugging or logging purposes. @@ -188,7 +212,7 @@ def _print_success(url: str, max_file_size: int, pattern_type: str, pattern: str ---------- url : str The URL associated with the successful query. - max_file_size : int + max_file_size : float The maximum file size allowed for the query, in bytes. pattern_type : str Specifies the type of pattern to use, either "include" or "exclude". diff --git a/src/server/routers/dynamic.py b/src/server/routers/dynamic.py index bfa31f68..8cced93e 100644 --- a/src/server/routers/dynamic.py +++ b/src/server/routers/dynamic.py @@ -47,9 +47,10 @@ async def catch_all(request: Request, full_path: str) -> HTMLResponse: async def process_catch_all( request: Request, input_text: str = Form(...), - max_file_size: int = Form(...), + max_file_size: float = Form(...), pattern_type: str = Form(...), pattern: str = Form(...), + exact_file_size: str = Form(None), ) -> HTMLResponse: """ Process the form submission with user input for query parameters. @@ -63,12 +64,14 @@ async def process_catch_all( The incoming request object, which provides context for rendering the response. input_text : str The input text provided by the user for processing, by default taken from the form. - max_file_size : int + max_file_size : float The maximum allowed file size for the input, specified by the user. pattern_type : str The type of pattern used for the query, specified by the user. pattern : str The pattern string used in the query, specified by the user. + exact_file_size : str + The exact file size value in KB, which preserves decimal precision. Returns ------- @@ -83,4 +86,5 @@ async def process_catch_all( pattern_type, pattern, is_index=False, + exact_file_size=exact_file_size, ) diff --git a/src/server/routers/index.py b/src/server/routers/index.py index 01b84730..fbfbd6df 100644 --- a/src/server/routers/index.py +++ b/src/server/routers/index.py @@ -34,7 +34,7 @@ async def home(request: Request) -> HTMLResponse: { "request": request, "examples": EXAMPLE_REPOS, - "default_file_size": 243, + "default_file_size": 243.14, }, ) @@ -44,9 +44,10 @@ async def home(request: Request) -> HTMLResponse: async def index_post( request: Request, input_text: str = Form(...), - max_file_size: int = Form(...), + max_file_size: float = Form(...), pattern_type: str = Form(...), pattern: str = Form(...), + exact_file_size: str = Form(None), ) -> HTMLResponse: """ Process the form submission with user input for query parameters. @@ -61,13 +62,14 @@ async def index_post( The incoming request object, which provides context for rendering the response. input_text : str The input text provided by the user for processing, by default taken from the form. - max_file_size : int + max_file_size : float The maximum allowed file size for the input, specified by the user. pattern_type : str The type of pattern used for the query, specified by the user. pattern : str The pattern string used in the query, specified by the user. - + exact_file_size : str + The exact file size value in KB, which preserves decimal precision. Returns ------- HTMLResponse @@ -81,4 +83,5 @@ async def index_post( pattern_type, pattern, is_index=True, + exact_file_size=exact_file_size, ) diff --git a/src/server/templates/components/git_form.jinja b/src/server/templates/components/git_form.jinja index 764fff70..c05c0d6d 100644 --- a/src/server/templates/components/git_form.jinja +++ b/src/server/templates/components/git_form.jinja @@ -84,12 +84,20 @@
-