Skip to content

Commit a2ae334

Browse files
Refactor code for lifespan, template usage, and improved tests
- Move background tasks and rate-limit handler into utils.py - Reference TEMPLATES from config instead of inline Jinja2Templates - Adopt Given/When/Then docstrings for test clarity - Parametrize some tests and consolidate code across query_parser tests - Add pytest.warns context handler to test_parse_repo_source_with_failed_git_command
1 parent d721b00 commit a2ae334

File tree

14 files changed

+743
-408
lines changed

14 files changed

+743
-408
lines changed

src/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
from pathlib import Path
44

5+
from fastapi.templating import Jinja2Templates
6+
57
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
68
MAX_DIRECTORY_DEPTH = 20 # Maximum depth of directory traversal
79
MAX_FILES = 10_000 # Maximum number of files to process
@@ -18,3 +20,5 @@
1820
{"name": "Tldraw", "url": "https://github.com/tldraw/tldraw"},
1921
{"name": "ApiAnalytics", "url": "https://github.com/tom-draper/api-analytics"},
2022
]
23+
24+
TEMPLATES = Jinja2Templates(directory="templates")

src/gitingest/cli.py

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,51 @@
22

33
# pylint: disable=no-value-for-parameter
44

5+
import asyncio
6+
57
import click
68

79
from config import MAX_FILE_SIZE
810
from gitingest.repository_ingest import ingest
911

1012

1113
@click.command()
12-
@click.argument("source", type=str, required=True)
14+
@click.argument("source", type=str, default=".")
1315
@click.option("--output", "-o", default=None, help="Output file path (default: <repo_name>.txt in current directory)")
1416
@click.option("--max-size", "-s", default=MAX_FILE_SIZE, help="Maximum file size to process in bytes")
1517
@click.option("--exclude-pattern", "-e", multiple=True, help="Patterns to exclude")
1618
@click.option("--include-pattern", "-i", multiple=True, help="Patterns to include")
17-
async def main(
19+
def main(
20+
source: str,
21+
output: str | None,
22+
max_size: int,
23+
exclude_pattern: tuple[str, ...],
24+
include_pattern: tuple[str, ...],
25+
):
26+
"""
27+
Main entry point for the CLI. This function is called when the CLI is run as a script.
28+
29+
It calls the async main function to run the command.
30+
31+
Parameters
32+
----------
33+
source : str
34+
The source directory or repository to analyze.
35+
output : str | None
36+
The path where the output file will be written. If not specified, the output will be written
37+
to a file named `<repo_name>.txt` in the current directory.
38+
max_size : int
39+
The maximum file size to process, in bytes. Files larger than this size will be ignored.
40+
exclude_pattern : tuple[str, ...]
41+
A tuple of patterns to exclude during the analysis. Files matching these patterns will be ignored.
42+
include_pattern : tuple[str, ...]
43+
A tuple of patterns to include during the analysis. Only files matching these patterns will be processed.
44+
"""
45+
# Main entry point for the CLI. This function is called when the CLI is run as a script.
46+
asyncio.run(_async_main(source, output, max_size, exclude_pattern, include_pattern))
47+
48+
49+
async def _async_main(
1850
source: str,
1951
output: str | None,
2052
max_size: int,
@@ -24,9 +56,8 @@ async def main(
2456
"""
2557
Analyze a directory or repository and create a text dump of its contents.
2658
27-
This command analyzes the contents of a specified source directory or repository,
28-
applies custom include and exclude patterns, and generates a text summary of the analysis
29-
which is then written to an output file.
59+
This command analyzes the contents of a specified source directory or repository, applies custom include and
60+
exclude patterns, and generates a text summary of the analysis which is then written to an output file.
3061
3162
Parameters
3263
----------

src/gitingest/query_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ async def _configure_branch_and_subpath(remaining_parts: list[str], url: str) ->
227227
# Fetch the list of branches from the remote repository
228228
branches: list[str] = await fetch_remote_branch_list(url)
229229
except RuntimeError as e:
230-
warnings.warn(f"Warning: Failed to fetch branch list: {e}")
230+
warnings.warn(f"Warning: Failed to fetch branch list: {e}", RuntimeWarning)
231231
return remaining_parts.pop(0)
232232

233233
branch = []

src/main.py

Lines changed: 4 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -1,157 +1,27 @@
11
""" Main module for the FastAPI application. """
22

3-
import asyncio
43
import os
5-
import shutil
6-
import time
7-
from contextlib import asynccontextmanager
8-
from pathlib import Path
94

105
from api_analytics.fastapi import Analytics
116
from dotenv import load_dotenv
127
from fastapi import FastAPI, Request
13-
from fastapi.responses import FileResponse, HTMLResponse, Response
8+
from fastapi.responses import FileResponse, HTMLResponse
149
from fastapi.staticfiles import StaticFiles
15-
from fastapi.templating import Jinja2Templates
16-
from slowapi import _rate_limit_exceeded_handler
1710
from slowapi.errors import RateLimitExceeded
1811
from starlette.middleware.trustedhost import TrustedHostMiddleware
1912

20-
from config import DELETE_REPO_AFTER, TMP_BASE_PATH
13+
from config import TEMPLATES
2114
from routers import download, dynamic, index
2215
from server_utils import limiter
16+
from utils import lifespan, rate_limit_exception_handler
2317

2418
# Load environment variables from .env file
2519
load_dotenv()
2620

27-
28-
async def remove_old_repositories():
29-
"""
30-
Background task that runs periodically to clean up old repository directories.
31-
32-
This task:
33-
- Scans the TMP_BASE_PATH directory every 60 seconds
34-
- Removes directories older than DELETE_REPO_AFTER seconds
35-
- Before deletion, logs repository URLs to history.txt if a matching .txt file exists
36-
- Handles errors gracefully if deletion fails
37-
38-
The repository URL is extracted from the first .txt file in each directory,
39-
assuming the filename format: "owner-repository.txt"
40-
"""
41-
while True:
42-
try:
43-
if not TMP_BASE_PATH.exists():
44-
await asyncio.sleep(60)
45-
continue
46-
47-
current_time = time.time()
48-
49-
for folder in TMP_BASE_PATH.iterdir():
50-
if not folder.is_dir():
51-
continue
52-
53-
# Skip if folder is not old enough
54-
if current_time - folder.stat().st_ctime <= DELETE_REPO_AFTER:
55-
continue
56-
57-
await process_folder(folder)
58-
59-
except Exception as e:
60-
print(f"Error in remove_old_repositories: {e}")
61-
62-
await asyncio.sleep(60)
63-
64-
65-
async def process_folder(folder: Path) -> None:
66-
"""
67-
Process a single folder for deletion and logging.
68-
69-
Parameters
70-
----------
71-
folder : Path
72-
The path to the folder to be processed.
73-
"""
74-
# Try to log repository URL before deletion
75-
try:
76-
txt_files = [f for f in folder.iterdir() if f.suffix == ".txt"]
77-
78-
# Extract owner and repository name from the filename
79-
if txt_files and "-" in (filename := txt_files[0].stem):
80-
owner, repo = filename.split("-", 1)
81-
repo_url = f"{owner}/{repo}"
82-
with open("history.txt", mode="a", encoding="utf-8") as history:
83-
history.write(f"{repo_url}\n")
84-
85-
except Exception as e:
86-
print(f"Error logging repository URL for {folder}: {e}")
87-
88-
# Delete the folder
89-
try:
90-
shutil.rmtree(folder)
91-
except Exception as e:
92-
print(f"Error deleting {folder}: {e}")
93-
94-
95-
@asynccontextmanager
96-
async def lifespan(_: FastAPI):
97-
"""
98-
Lifecycle manager for the FastAPI application.
99-
Handles startup and shutdown events.
100-
101-
Parameters
102-
----------
103-
_ : FastAPI
104-
The FastAPI application instance (unused).
105-
106-
Yields
107-
-------
108-
None
109-
Yields control back to the FastAPI application while the background task runs.
110-
"""
111-
task = asyncio.create_task(remove_old_repositories())
112-
113-
yield
114-
# Cancel the background task on shutdown
115-
task.cancel()
116-
try:
117-
await task
118-
except asyncio.CancelledError:
119-
pass
120-
121-
12221
# Initialize the FastAPI application with lifespan
12322
app = FastAPI(lifespan=lifespan)
12423
app.state.limiter = limiter
12524

126-
127-
async def rate_limit_exception_handler(request: Request, exc: Exception) -> Response:
128-
"""
129-
Custom exception handler for rate-limiting errors.
130-
131-
Parameters
132-
----------
133-
request : Request
134-
The incoming HTTP request.
135-
exc : Exception
136-
The exception raised, expected to be RateLimitExceeded.
137-
138-
Returns
139-
-------
140-
Response
141-
A response indicating that the rate limit has been exceeded.
142-
143-
Raises
144-
------
145-
exc
146-
If the exception is not a RateLimitExceeded error, it is re-raised.
147-
"""
148-
if isinstance(exc, RateLimitExceeded):
149-
# Delegate to the default rate limit handler
150-
return _rate_limit_exceeded_handler(request, exc)
151-
# Re-raise other exceptions
152-
raise exc
153-
154-
15525
# Register the custom exception handler for rate limits
15626
app.add_exception_handler(RateLimitExceeded, rate_limit_exception_handler)
15727

@@ -174,9 +44,6 @@ async def rate_limit_exception_handler(request: Request, exc: Exception) -> Resp
17444
# Add middleware to enforce allowed hosts
17545
app.add_middleware(TrustedHostMiddleware, allowed_hosts=allowed_hosts)
17646

177-
# Set up template rendering
178-
templates = Jinja2Templates(directory="templates")
179-
18047

18148
@app.get("/health")
18249
async def health_check() -> dict[str, str]:
@@ -222,7 +89,7 @@ async def api_docs(request: Request) -> HTMLResponse:
22289
HTMLResponse
22390
A rendered HTML page displaying API documentation.
22491
"""
225-
return templates.TemplateResponse("api.jinja", {"request": request})
92+
return TEMPLATES.TemplateResponse("api.jinja", {"request": request})
22693

22794

22895
@app.get("/robots.txt")

src/query_processor.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,14 @@
33
from functools import partial
44

55
from fastapi import Request
6-
from fastapi.templating import Jinja2Templates
76
from starlette.templating import _TemplateResponse
87

9-
from config import EXAMPLE_REPOS, MAX_DISPLAY_SIZE
8+
from config import EXAMPLE_REPOS, MAX_DISPLAY_SIZE, TEMPLATES
109
from gitingest.query_ingestion import run_ingest_query
1110
from gitingest.query_parser import ParsedQuery, parse_query
1211
from gitingest.repository_clone import CloneConfig, clone_repo
1312
from server_utils import Colors, log_slider_to_size
1413

15-
templates = Jinja2Templates(directory="templates")
16-
1714

1815
async def process_query(
1916
request: Request,
@@ -64,7 +61,7 @@ async def process_query(
6461
raise ValueError(f"Invalid pattern type: {pattern_type}")
6562

6663
template = "index.jinja" if is_index else "git.jinja"
67-
template_response = partial(templates.TemplateResponse, name=template)
64+
template_response = partial(TEMPLATES.TemplateResponse, name=template)
6865
max_file_size = log_slider_to_size(slider_position)
6966

7067
context = {

src/routers/dynamic.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,12 @@
22

33
from fastapi import APIRouter, Form, Request
44
from fastapi.responses import HTMLResponse
5-
from fastapi.templating import Jinja2Templates
65

6+
from config import TEMPLATES
77
from query_processor import process_query
88
from server_utils import limiter
99

1010
router = APIRouter()
11-
templates = Jinja2Templates(directory="templates")
1211

1312

1413
@router.get("/{full_path:path}")
@@ -32,7 +31,7 @@ async def catch_all(request: Request, full_path: str) -> HTMLResponse:
3231
An HTML response containing the rendered template, with the Git URL
3332
and other default parameters such as loading state and file size.
3433
"""
35-
return templates.TemplateResponse(
34+
return TEMPLATES.TemplateResponse(
3635
"git.jinja",
3736
{
3837
"request": request,

src/routers/index.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,12 @@
22

33
from fastapi import APIRouter, Form, Request
44
from fastapi.responses import HTMLResponse
5-
from fastapi.templating import Jinja2Templates
65

7-
from config import EXAMPLE_REPOS
6+
from config import EXAMPLE_REPOS, TEMPLATES
87
from query_processor import process_query
98
from server_utils import limiter
109

1110
router = APIRouter()
12-
templates = Jinja2Templates(directory="templates")
1311

1412

1513
@router.get("/", response_class=HTMLResponse)
@@ -31,7 +29,7 @@ async def home(request: Request) -> HTMLResponse:
3129
An HTML response containing the rendered home page template, with example repositories
3230
and other default parameters such as file size.
3331
"""
34-
return templates.TemplateResponse(
32+
return TEMPLATES.TemplateResponse(
3533
"index.jinja",
3634
{
3735
"request": request,

0 commit comments

Comments
 (0)