Skip to content

Commit c055398

Browse files
committed
Merge branch 'main' into pr/85
2 parents 2c66967 + dc9031d commit c055398

File tree

4 files changed

+92
-3
lines changed

4 files changed

+92
-3
lines changed

src/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
MAX_DISPLAY_SIZE: int = 300_000
44
TMP_BASE_PATH: str = "/tmp/gitingest"
5+
DELETE_REPO_AFTER: int = 60 * 60 # In seconds
56

67
EXAMPLE_REPOS: list[dict[str, str]] = [
78
{"name": "GitIngest", "url": "https://github.com/cyclotruc/gitingest"},

src/gitingest/parse_query.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import string
66
import uuid
77
from typing import Any
8-
from urllib.parse import unquote
8+
from urllib.parse import unquote, urlparse
99

1010
from gitingest.exceptions import InvalidPatternError
1111
from gitingest.ignore_patterns import DEFAULT_IGNORE_PATTERNS
@@ -105,6 +105,10 @@ def _parse_url(url: str) -> dict[str, Any]:
105105
if not url.startswith(("https://", "http://")):
106106
url = "https://" + url
107107

108+
# Parse URL and reconstruct it without query parameters and fragments
109+
parsed_url = urlparse(url)
110+
url = f"{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}"
111+
108112
# Extract domain and path
109113
url_parts = url.split("/")
110114
domain = url_parts[2]

src/main.py

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
""" Main module for the FastAPI application. """
22

3+
import asyncio
34
import os
5+
import shutil
6+
import time
7+
from contextlib import asynccontextmanager
48

59
from api_analytics.fastapi import Analytics
610
from dotenv import load_dotenv
@@ -12,14 +16,86 @@
1216
from slowapi.errors import RateLimitExceeded
1317
from starlette.middleware.trustedhost import TrustedHostMiddleware
1418

19+
from config import DELETE_REPO_AFTER, TMP_BASE_PATH
1520
from routers import download, dynamic, index
1621
from server_utils import limiter
1722

1823
# Load environment variables from .env file
1924
load_dotenv()
2025

21-
# Initialize the FastAPI application
22-
app = FastAPI()
26+
27+
async def remove_old_repositories():
28+
"""
29+
Background task that runs periodically to clean up old repository directories.
30+
31+
This task:
32+
- Scans the TMP_BASE_PATH directory every 60 seconds
33+
- Removes directories older than DELETE_REPO_AFTER seconds
34+
- Before deletion, logs repository URLs to history.txt if a matching .txt file exists
35+
- Handles errors gracefully if deletion fails
36+
37+
The repository URL is extracted from the first .txt file in each directory,
38+
assuming the filename format: "owner-repository.txt"
39+
"""
40+
while True:
41+
try:
42+
if not os.path.exists(TMP_BASE_PATH):
43+
await asyncio.sleep(60)
44+
continue
45+
46+
current_time = time.time()
47+
48+
for folder in os.listdir(TMP_BASE_PATH):
49+
folder_path = os.path.join(TMP_BASE_PATH, folder)
50+
51+
# Skip if folder is not old enough
52+
if current_time - os.path.getctime(folder_path) <= DELETE_REPO_AFTER:
53+
continue
54+
55+
# Try to log repository URL before deletion
56+
try:
57+
txt_files = [f for f in os.listdir(folder_path) if f.endswith(".txt")]
58+
if txt_files:
59+
filename = txt_files[0].replace(".txt", "")
60+
if "-" in filename:
61+
owner, repo = filename.split("-", 1)
62+
repo_url = f"https://github.com/{owner}/{repo}"
63+
with open("history.txt", "a") as history:
64+
history.write(f"{repo_url}\n")
65+
except Exception as e:
66+
print(f"Error logging repository URL for {folder_path}: {str(e)}")
67+
68+
# Delete the folder
69+
try:
70+
shutil.rmtree(folder_path)
71+
except Exception as e:
72+
print(f"Error deleting {folder_path}: {str(e)}")
73+
74+
except Exception as e:
75+
print(f"Error in remove_old_repositories: {str(e)}")
76+
77+
await asyncio.sleep(60)
78+
79+
80+
@asynccontextmanager
81+
async def lifespan(app: FastAPI):
82+
"""
83+
Lifecycle manager for the FastAPI application.
84+
Handles startup and shutdown events.
85+
"""
86+
task = asyncio.create_task(remove_old_repositories())
87+
88+
yield
89+
# Cancel the background task on shutdown
90+
task.cancel()
91+
try:
92+
await task
93+
except asyncio.CancelledError:
94+
pass
95+
96+
97+
# Initialize the FastAPI application with lifespan
98+
app = FastAPI(lifespan=lifespan)
2399
app.state.limiter = limiter
24100

25101

tests/test_parse_query.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,3 +155,11 @@ def test_parse_query_uuid_uniqueness() -> None:
155155
result1 = parse_query(path, max_file_size=100, from_web=False)
156156
result2 = parse_query(path, max_file_size=100, from_web=False)
157157
assert result1["id"] != result2["id"]
158+
159+
160+
def test_parse_url_with_query_and_fragment() -> None:
161+
url = "https://github.com/user/repo?arg=value#fragment"
162+
result = _parse_url(url)
163+
assert result["user_name"] == "user"
164+
assert result["repo_name"] == "repo"
165+
assert result["url"] == "https://github.com/user/repo" # URL should be cleaned

0 commit comments

Comments
 (0)