Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/gitingest/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

async def ingest_async(
source: str,
max_file_size: int = 10 * 1024 * 1024, # 10 MB
max_file_size: float = 10 * 1024 * 1024, # 10 MB
include_patterns: Optional[Union[str, Set[str]]] = None,
exclude_patterns: Optional[Union[str, Set[str]]] = None,
branch: Optional[str] = None,
Expand All @@ -30,7 +30,7 @@ async def ingest_async(
----------
source : str
The source to analyze, which can be a URL (for a Git repository) or a local directory path.
max_file_size : int
max_file_size : float
Maximum allowed file size for file ingestion. Files larger than this size are ignored, by default
10*1024*1024 (10 MB).
include_patterns : Union[str, Set[str]], optional
Expand Down Expand Up @@ -98,7 +98,7 @@ async def ingest_async(

def ingest(
source: str,
max_file_size: int = 10 * 1024 * 1024, # 10 MB
max_file_size: float = 10 * 1024 * 1024, # 10 MB
include_patterns: Optional[Union[str, Set[str]]] = None,
exclude_patterns: Optional[Union[str, Set[str]]] = None,
branch: Optional[str] = None,
Expand All @@ -115,7 +115,7 @@ def ingest(
----------
source : str
The source to analyze, which can be a URL (for a Git repository) or a local directory path.
max_file_size : int
max_file_size : float
Maximum allowed file size for file ingestion. Files larger than this size are ignored, by default
10*1024*1024 (10 MB).
include_patterns : Union[str, Set[str]], optional
Expand Down
4 changes: 2 additions & 2 deletions src/gitingest/query_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

async def parse_query(
source: str,
max_file_size: int,
max_file_size: float,
from_web: bool,
include_patterns: Optional[Union[str, Set[str]]] = None,
ignore_patterns: Optional[Union[str, Set[str]]] = None,
Expand All @@ -41,7 +41,7 @@ async def parse_query(
----------
source : str
The source URL or file path to parse.
max_file_size : int
max_file_size : float
The maximum file size in bytes to include.
from_web : bool
Flag indicating whether the source is a web URL.
Expand Down
2 changes: 1 addition & 1 deletion src/gitingest/schemas/ingestion_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class IngestionQuery(BaseModel): # pylint: disable=too-many-instance-attributes
type: Optional[str] = None
branch: Optional[str] = None
commit: Optional[str] = None
max_file_size: int = Field(default=MAX_FILE_SIZE)
max_file_size: float = Field(default=MAX_FILE_SIZE)
ignore_patterns: Optional[Set[str]] = None
include_patterns: Optional[Set[str]] = None

Expand Down
48 changes: 36 additions & 12 deletions src/server/query_processor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Process a query by parsing input, cloning a repository, and generating a summary."""

from functools import partial
from typing import Optional

from fastapi import Request
from starlette.templating import _TemplateResponse
Expand All @@ -15,10 +16,11 @@
async def process_query(
request: Request,
input_text: str,
slider_position: int,
slider_position: float,
pattern_type: str = "exclude",
pattern: str = "",
is_index: bool = False,
exact_file_size: Optional[str] = None,
) -> _TemplateResponse:
"""
Process a query by parsing input, cloning a repository, and generating a summary.
Expand All @@ -32,15 +34,16 @@ async def process_query(
The HTTP request object.
input_text : str
Input text provided by the user, typically a Git repository URL or slug.
slider_position : int
slider_position : float
Position of the slider, representing the maximum file size in the query.
pattern_type : str
Type of pattern to use, either "include" or "exclude" (default is "exclude").
pattern : str
Pattern to include or exclude in the query, depending on the pattern type.
is_index : bool
Flag indicating whether the request is for the index page (default is False).

exact_file_size : str
The exact file size value in KB, which preserves decimal precision.
Returns
-------
_TemplateResponse
Expand All @@ -62,7 +65,17 @@ async def process_query(

template = "index.jinja" if is_index else "git.jinja"
template_response = partial(templates.TemplateResponse, name=template)
max_file_size = log_slider_to_size(slider_position)

# Use exact_file_size if provided, otherwise use the slider position
if exact_file_size is not None and exact_file_size.strip():
try:
# Convert exact_file_size from KB to bytes
max_file_size = float(exact_file_size) * 1024
except ValueError:
# If conversion fails, fall back to slider position
max_file_size = log_slider_to_size(int(slider_position))
else:
max_file_size = log_slider_to_size(int(slider_position))

context = {
"request": request,
Expand Down Expand Up @@ -125,13 +138,16 @@ async def process_query(
"tree": tree,
"content": content,
"ingest_id": query.id,
"exact_file_size": (
exact_file_size if max_file_size < 1024 else max_file_size / 1024
), # Pass the exact file size back to the template
}
)

return template_response(context=context)


def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str) -> None:
def _print_query(url: str, max_file_size: float, pattern_type: str, pattern: str) -> None:
"""
Print a formatted summary of the query details, including the URL, file size,
and pattern information, for easier debugging or logging.
Expand All @@ -140,23 +156,31 @@ def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str)
----------
url : str
The URL associated with the query.
max_file_size : int
max_file_size : float
The maximum file size allowed for the query, in bytes.
pattern_type : str
Specifies the type of pattern to use, either "include" or "exclude".
pattern : str
The actual pattern string to include or exclude in the query.
"""
print(f"{Colors.WHITE}{url:<20}{Colors.END}", end="")
if int(max_file_size / 1024) != 50:
print(f" | {Colors.YELLOW}Size: {int(max_file_size/1024)}kb{Colors.END}", end="")
if max_file_size / 1024 != 50.00:
# Format with up to 2 decimal places for KB values
kb_value = max_file_size / 1024
# If it's a whole number, display as integer
if kb_value == int(kb_value):
kb_display = f"{int(kb_value)}kb"
else:
# Otherwise show up to 2 decimal places, removing trailing zeros
kb_display = f"{kb_value:.2f}".rstrip("0").rstrip(".") + "kb"
print(f" | {Colors.YELLOW}Size: {kb_display}{Colors.END}", end="")
if pattern_type == "include" and pattern != "":
print(f" | {Colors.YELLOW}Include {pattern}{Colors.END}", end="")
elif pattern_type == "exclude" and pattern != "":
print(f" | {Colors.YELLOW}Exclude {pattern}{Colors.END}", end="")


def _print_error(url: str, e: Exception, max_file_size: int, pattern_type: str, pattern: str) -> None:
def _print_error(url: str, e: Exception, max_file_size: float, pattern_type: str, pattern: str) -> None:
"""
Print a formatted error message including the URL, file size, pattern details, and the exception encountered,
for debugging or logging purposes.
Expand All @@ -167,7 +191,7 @@ def _print_error(url: str, e: Exception, max_file_size: int, pattern_type: str,
The URL associated with the query that caused the error.
e : Exception
The exception raised during the query or process.
max_file_size : int
max_file_size : float
The maximum file size allowed for the query, in bytes.
pattern_type : str
Specifies the type of pattern to use, either "include" or "exclude".
Expand All @@ -179,7 +203,7 @@ def _print_error(url: str, e: Exception, max_file_size: int, pattern_type: str,
print(f" | {Colors.RED}{e}{Colors.END}")


def _print_success(url: str, max_file_size: int, pattern_type: str, pattern: str, summary: str) -> None:
def _print_success(url: str, max_file_size: float, pattern_type: str, pattern: str, summary: str) -> None:
"""
Print a formatted success message, including the URL, file size, pattern details, and a summary with estimated
tokens, for debugging or logging purposes.
Expand All @@ -188,7 +212,7 @@ def _print_success(url: str, max_file_size: int, pattern_type: str, pattern: str
----------
url : str
The URL associated with the successful query.
max_file_size : int
max_file_size : float
The maximum file size allowed for the query, in bytes.
pattern_type : str
Specifies the type of pattern to use, either "include" or "exclude".
Expand Down
8 changes: 6 additions & 2 deletions src/server/routers/dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,10 @@ async def catch_all(request: Request, full_path: str) -> HTMLResponse:
async def process_catch_all(
request: Request,
input_text: str = Form(...),
max_file_size: int = Form(...),
max_file_size: float = Form(...),
pattern_type: str = Form(...),
pattern: str = Form(...),
exact_file_size: str = Form(None),
) -> HTMLResponse:
"""
Process the form submission with user input for query parameters.
Expand All @@ -63,12 +64,14 @@ async def process_catch_all(
The incoming request object, which provides context for rendering the response.
input_text : str
The input text provided by the user for processing, by default taken from the form.
max_file_size : int
max_file_size : float
The maximum allowed file size for the input, specified by the user.
pattern_type : str
The type of pattern used for the query, specified by the user.
pattern : str
The pattern string used in the query, specified by the user.
exact_file_size : str
The exact file size value in KB, which preserves decimal precision.

Returns
-------
Expand All @@ -83,4 +86,5 @@ async def process_catch_all(
pattern_type,
pattern,
is_index=False,
exact_file_size=exact_file_size,
)
11 changes: 7 additions & 4 deletions src/server/routers/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ async def home(request: Request) -> HTMLResponse:
{
"request": request,
"examples": EXAMPLE_REPOS,
"default_file_size": 243,
"default_file_size": 243.14,
},
)

Expand All @@ -44,9 +44,10 @@ async def home(request: Request) -> HTMLResponse:
async def index_post(
request: Request,
input_text: str = Form(...),
max_file_size: int = Form(...),
max_file_size: float = Form(...),
pattern_type: str = Form(...),
pattern: str = Form(...),
exact_file_size: str = Form(None),
) -> HTMLResponse:
"""
Process the form submission with user input for query parameters.
Expand All @@ -61,13 +62,14 @@ async def index_post(
The incoming request object, which provides context for rendering the response.
input_text : str
The input text provided by the user for processing, by default taken from the form.
max_file_size : int
max_file_size : float
The maximum allowed file size for the input, specified by the user.
pattern_type : str
The type of pattern used for the query, specified by the user.
pattern : str
The pattern string used in the query, specified by the user.

exact_file_size : str
The exact file size value in KB, which preserves decimal precision.
Returns
-------
HTMLResponse
Expand All @@ -81,4 +83,5 @@ async def index_post(
pattern_type,
pattern,
is_index=True,
exact_file_size=exact_file_size,
)
12 changes: 10 additions & 2 deletions src/server/templates/components/git_form.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,20 @@
</div>
</div>
<div class="w-[200px] sm:w-[200px] mt-3">
<label for="file_size" class="block text-gray-700 mb-1">
Include files under: <span id="size_value" class="font-bold">50kb</span>
<label for="file_size" class="flex gap-2 text-gray-700 mb-1 text-nowrap">
Include files under:
<div class="inline-block relative">
<input type="text"
id="size_value_input"
class="w-24 font-bold bg-transparent border-b-2 border-gray-900 focus:outline-none text-center"
value="{{ exact_file_size ~ 'kb' }}"
data-exact-value="{{ exact_file_size }}">
</div>
</label>
<input type="range"
id="file_size"
name="max_file_size"
step="any"
min="0"
max="500"
required
Expand Down
Loading