diff --git a/src/gitingest/entrypoint.py b/src/gitingest/entrypoint.py
index 0af4a4ba..b9a50477 100644
--- a/src/gitingest/entrypoint.py
+++ b/src/gitingest/entrypoint.py
@@ -13,7 +13,7 @@
async def ingest_async(
source: str,
- max_file_size: int = 10 * 1024 * 1024, # 10 MB
+ max_file_size: float = 10 * 1024 * 1024, # 10 MB
include_patterns: Optional[Union[str, Set[str]]] = None,
exclude_patterns: Optional[Union[str, Set[str]]] = None,
branch: Optional[str] = None,
@@ -30,7 +30,7 @@ async def ingest_async(
----------
source : str
The source to analyze, which can be a URL (for a Git repository) or a local directory path.
- max_file_size : int
+ max_file_size : float
Maximum allowed file size for file ingestion. Files larger than this size are ignored, by default
10*1024*1024 (10 MB).
include_patterns : Union[str, Set[str]], optional
@@ -98,7 +98,7 @@ async def ingest_async(
def ingest(
source: str,
- max_file_size: int = 10 * 1024 * 1024, # 10 MB
+ max_file_size: float = 10 * 1024 * 1024, # 10 MB
include_patterns: Optional[Union[str, Set[str]]] = None,
exclude_patterns: Optional[Union[str, Set[str]]] = None,
branch: Optional[str] = None,
@@ -115,7 +115,7 @@ def ingest(
----------
source : str
The source to analyze, which can be a URL (for a Git repository) or a local directory path.
- max_file_size : int
+ max_file_size : float
Maximum allowed file size for file ingestion. Files larger than this size are ignored, by default
10*1024*1024 (10 MB).
include_patterns : Union[str, Set[str]], optional
diff --git a/src/gitingest/query_parsing.py b/src/gitingest/query_parsing.py
index 5d547356..08f7cc45 100644
--- a/src/gitingest/query_parsing.py
+++ b/src/gitingest/query_parsing.py
@@ -25,7 +25,7 @@
async def parse_query(
source: str,
- max_file_size: int,
+ max_file_size: float,
from_web: bool,
include_patterns: Optional[Union[str, Set[str]]] = None,
ignore_patterns: Optional[Union[str, Set[str]]] = None,
@@ -41,7 +41,7 @@ async def parse_query(
----------
source : str
The source URL or file path to parse.
- max_file_size : int
+ max_file_size : float
The maximum file size in bytes to include.
from_web : bool
Flag indicating whether the source is a web URL.
diff --git a/src/gitingest/schemas/ingestion_schema.py b/src/gitingest/schemas/ingestion_schema.py
index 02b1c678..edf484ab 100644
--- a/src/gitingest/schemas/ingestion_schema.py
+++ b/src/gitingest/schemas/ingestion_schema.py
@@ -54,7 +54,7 @@ class IngestionQuery(BaseModel): # pylint: disable=too-many-instance-attributes
type: Optional[str] = None
branch: Optional[str] = None
commit: Optional[str] = None
- max_file_size: int = Field(default=MAX_FILE_SIZE)
+ max_file_size: float = Field(default=MAX_FILE_SIZE)
ignore_patterns: Optional[Set[str]] = None
include_patterns: Optional[Set[str]] = None
diff --git a/src/server/query_processor.py b/src/server/query_processor.py
index 00b1c640..1d5f08a7 100644
--- a/src/server/query_processor.py
+++ b/src/server/query_processor.py
@@ -1,6 +1,7 @@
"""Process a query by parsing input, cloning a repository, and generating a summary."""
from functools import partial
+from typing import Optional
from fastapi import Request
from starlette.templating import _TemplateResponse
@@ -15,10 +16,11 @@
async def process_query(
request: Request,
input_text: str,
- slider_position: int,
+ slider_position: float,
pattern_type: str = "exclude",
pattern: str = "",
is_index: bool = False,
+ exact_file_size: Optional[str] = None,
) -> _TemplateResponse:
"""
Process a query by parsing input, cloning a repository, and generating a summary.
@@ -32,7 +34,7 @@ async def process_query(
The HTTP request object.
input_text : str
Input text provided by the user, typically a Git repository URL or slug.
- slider_position : int
+ slider_position : float
Position of the slider, representing the maximum file size in the query.
pattern_type : str
Type of pattern to use, either "include" or "exclude" (default is "exclude").
@@ -40,7 +42,8 @@ async def process_query(
Pattern to include or exclude in the query, depending on the pattern type.
is_index : bool
Flag indicating whether the request is for the index page (default is False).
-
+ exact_file_size : str
+ The exact file size value in KB, which preserves decimal precision.
Returns
-------
_TemplateResponse
@@ -62,7 +65,17 @@ async def process_query(
template = "index.jinja" if is_index else "git.jinja"
template_response = partial(templates.TemplateResponse, name=template)
- max_file_size = log_slider_to_size(slider_position)
+
+ # Use exact_file_size if provided, otherwise use the slider position
+ if exact_file_size is not None and exact_file_size.strip():
+ try:
+ # Convert exact_file_size from KB to bytes
+ max_file_size = float(exact_file_size) * 1024
+ except ValueError:
+ # If conversion fails, fall back to slider position
+ max_file_size = log_slider_to_size(int(slider_position))
+ else:
+ max_file_size = log_slider_to_size(int(slider_position))
context = {
"request": request,
@@ -125,13 +138,16 @@ async def process_query(
"tree": tree,
"content": content,
"ingest_id": query.id,
+ "exact_file_size": (
+ exact_file_size if max_file_size < 1024 else max_file_size / 1024
+ ), # Pass the exact file size back to the template
}
)
return template_response(context=context)
-def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str) -> None:
+def _print_query(url: str, max_file_size: float, pattern_type: str, pattern: str) -> None:
"""
Print a formatted summary of the query details, including the URL, file size,
and pattern information, for easier debugging or logging.
@@ -140,7 +156,7 @@ def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str)
----------
url : str
The URL associated with the query.
- max_file_size : int
+ max_file_size : float
The maximum file size allowed for the query, in bytes.
pattern_type : str
Specifies the type of pattern to use, either "include" or "exclude".
@@ -148,15 +164,23 @@ def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str)
The actual pattern string to include or exclude in the query.
"""
print(f"{Colors.WHITE}{url:<20}{Colors.END}", end="")
- if int(max_file_size / 1024) != 50:
- print(f" | {Colors.YELLOW}Size: {int(max_file_size/1024)}kb{Colors.END}", end="")
+ if max_file_size / 1024 != 50.00:
+ # Format with up to 2 decimal places for KB values
+ kb_value = max_file_size / 1024
+ # If it's a whole number, display as integer
+ if kb_value == int(kb_value):
+ kb_display = f"{int(kb_value)}kb"
+ else:
+ # Otherwise show up to 2 decimal places, removing trailing zeros
+ kb_display = f"{kb_value:.2f}".rstrip("0").rstrip(".") + "kb"
+ print(f" | {Colors.YELLOW}Size: {kb_display}{Colors.END}", end="")
if pattern_type == "include" and pattern != "":
print(f" | {Colors.YELLOW}Include {pattern}{Colors.END}", end="")
elif pattern_type == "exclude" and pattern != "":
print(f" | {Colors.YELLOW}Exclude {pattern}{Colors.END}", end="")
-def _print_error(url: str, e: Exception, max_file_size: int, pattern_type: str, pattern: str) -> None:
+def _print_error(url: str, e: Exception, max_file_size: float, pattern_type: str, pattern: str) -> None:
"""
Print a formatted error message including the URL, file size, pattern details, and the exception encountered,
for debugging or logging purposes.
@@ -167,7 +191,7 @@ def _print_error(url: str, e: Exception, max_file_size: int, pattern_type: str,
The URL associated with the query that caused the error.
e : Exception
The exception raised during the query or process.
- max_file_size : int
+ max_file_size : float
The maximum file size allowed for the query, in bytes.
pattern_type : str
Specifies the type of pattern to use, either "include" or "exclude".
@@ -179,7 +203,7 @@ def _print_error(url: str, e: Exception, max_file_size: int, pattern_type: str,
print(f" | {Colors.RED}{e}{Colors.END}")
-def _print_success(url: str, max_file_size: int, pattern_type: str, pattern: str, summary: str) -> None:
+def _print_success(url: str, max_file_size: float, pattern_type: str, pattern: str, summary: str) -> None:
"""
Print a formatted success message, including the URL, file size, pattern details, and a summary with estimated
tokens, for debugging or logging purposes.
@@ -188,7 +212,7 @@ def _print_success(url: str, max_file_size: int, pattern_type: str, pattern: str
----------
url : str
The URL associated with the successful query.
- max_file_size : int
+ max_file_size : float
The maximum file size allowed for the query, in bytes.
pattern_type : str
Specifies the type of pattern to use, either "include" or "exclude".
diff --git a/src/server/routers/dynamic.py b/src/server/routers/dynamic.py
index bfa31f68..8cced93e 100644
--- a/src/server/routers/dynamic.py
+++ b/src/server/routers/dynamic.py
@@ -47,9 +47,10 @@ async def catch_all(request: Request, full_path: str) -> HTMLResponse:
async def process_catch_all(
request: Request,
input_text: str = Form(...),
- max_file_size: int = Form(...),
+ max_file_size: float = Form(...),
pattern_type: str = Form(...),
pattern: str = Form(...),
+ exact_file_size: str = Form(None),
) -> HTMLResponse:
"""
Process the form submission with user input for query parameters.
@@ -63,12 +64,14 @@ async def process_catch_all(
The incoming request object, which provides context for rendering the response.
input_text : str
The input text provided by the user for processing, by default taken from the form.
- max_file_size : int
+ max_file_size : float
The maximum allowed file size for the input, specified by the user.
pattern_type : str
The type of pattern used for the query, specified by the user.
pattern : str
The pattern string used in the query, specified by the user.
+ exact_file_size : str
+ The exact file size value in KB, which preserves decimal precision.
Returns
-------
@@ -83,4 +86,5 @@ async def process_catch_all(
pattern_type,
pattern,
is_index=False,
+ exact_file_size=exact_file_size,
)
diff --git a/src/server/routers/index.py b/src/server/routers/index.py
index 01b84730..fbfbd6df 100644
--- a/src/server/routers/index.py
+++ b/src/server/routers/index.py
@@ -34,7 +34,7 @@ async def home(request: Request) -> HTMLResponse:
{
"request": request,
"examples": EXAMPLE_REPOS,
- "default_file_size": 243,
+ "default_file_size": 243.14,
},
)
@@ -44,9 +44,10 @@ async def home(request: Request) -> HTMLResponse:
async def index_post(
request: Request,
input_text: str = Form(...),
- max_file_size: int = Form(...),
+ max_file_size: float = Form(...),
pattern_type: str = Form(...),
pattern: str = Form(...),
+ exact_file_size: str = Form(None),
) -> HTMLResponse:
"""
Process the form submission with user input for query parameters.
@@ -61,13 +62,14 @@ async def index_post(
The incoming request object, which provides context for rendering the response.
input_text : str
The input text provided by the user for processing, by default taken from the form.
- max_file_size : int
+ max_file_size : float
The maximum allowed file size for the input, specified by the user.
pattern_type : str
The type of pattern used for the query, specified by the user.
pattern : str
The pattern string used in the query, specified by the user.
-
+ exact_file_size : str
+ The exact file size value in KB, which preserves decimal precision.
Returns
-------
HTMLResponse
@@ -81,4 +83,5 @@ async def index_post(
pattern_type,
pattern,
is_index=True,
+ exact_file_size=exact_file_size,
)
diff --git a/src/server/templates/components/git_form.jinja b/src/server/templates/components/git_form.jinja
index 764fff70..c05c0d6d 100644
--- a/src/server/templates/components/git_form.jinja
+++ b/src/server/templates/components/git_form.jinja
@@ -84,12 +84,20 @@