Skip to content

Commit 93d789e

Browse files
committed
Add cleanup periodic task in the web server
1 parent caacc86 commit 93d789e

File tree

2 files changed

+79
-2
lines changed

2 files changed

+79
-2
lines changed

src/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
MAX_DISPLAY_SIZE: int = 300_000
22
TMP_BASE_PATH: str = "/tmp/gitingest"
3+
DELETE_REPO_AFTER: int = 30 # In seconds
34

45
EXAMPLE_REPOS: list[dict[str, str]] = [
56
{"name": "Gitingest", "url": "https://github.com/cyclotruc/gitingest"},

src/main.py

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1+
import asyncio
12
import os
3+
import shutil
4+
import time
5+
from contextlib import asynccontextmanager
26

37
from api_analytics.fastapi import Analytics
48
from dotenv import load_dotenv
@@ -10,14 +14,86 @@
1014
from slowapi.errors import RateLimitExceeded
1115
from starlette.middleware.trustedhost import TrustedHostMiddleware
1216

17+
from config import DELETE_REPO_AFTER, TMP_BASE_PATH
1318
from routers import download, dynamic, index
1419
from server_utils import limiter
1520

1621
# Load environment variables from .env file
1722
load_dotenv()
1823

19-
# Initialize the FastAPI application
20-
app = FastAPI()
24+
25+
async def remove_old_repositories():
26+
"""
27+
Background task that runs periodically to clean up old repository directories.
28+
29+
This task:
30+
- Scans the TMP_BASE_PATH directory every 60 seconds
31+
- Removes directories older than DELETE_REPO_AFTER seconds
32+
- Before deletion, logs repository URLs to history.txt if a matching .txt file exists
33+
- Handles errors gracefully if deletion fails
34+
35+
The repository URL is extracted from the first .txt file in each directory,
36+
assuming the filename format: "owner-repository.txt"
37+
"""
38+
while True:
39+
try:
40+
if not os.path.exists(TMP_BASE_PATH):
41+
await asyncio.sleep(60)
42+
continue
43+
44+
current_time = time.time()
45+
46+
for folder in os.listdir(TMP_BASE_PATH):
47+
folder_path = os.path.join(TMP_BASE_PATH, folder)
48+
49+
# Skip if folder is not old enough
50+
if current_time - os.path.getctime(folder_path) <= DELETE_REPO_AFTER:
51+
continue
52+
53+
# Try to log repository URL before deletion
54+
try:
55+
txt_files = [f for f in os.listdir(folder_path) if f.endswith(".txt")]
56+
if txt_files:
57+
filename = txt_files[0].replace(".txt", "")
58+
if "-" in filename:
59+
owner, repo = filename.split("-", 1)
60+
repo_url = f"https://github.com/{owner}/{repo}"
61+
with open("history.txt", "a") as history:
62+
history.write(f"{repo_url}\n")
63+
except Exception as e:
64+
print(f"Error logging repository URL for {folder_path}: {str(e)}")
65+
66+
# Delete the folder
67+
try:
68+
shutil.rmtree(folder_path)
69+
except Exception as e:
70+
print(f"Error deleting {folder_path}: {str(e)}")
71+
72+
except Exception as e:
73+
print(f"Error in remove_old_repositories: {str(e)}")
74+
75+
await asyncio.sleep(60)
76+
77+
78+
@asynccontextmanager
79+
async def lifespan(app: FastAPI):
80+
"""
81+
Lifecycle manager for the FastAPI application.
82+
Handles startup and shutdown events.
83+
"""
84+
task = asyncio.create_task(remove_old_repositories())
85+
86+
yield
87+
# Cancel the background task on shutdown
88+
task.cancel()
89+
try:
90+
await task
91+
except asyncio.CancelledError:
92+
pass
93+
94+
95+
# Initialize the FastAPI application with lifespan
96+
app = FastAPI(lifespan=lifespan)
2197
app.state.limiter = limiter
2298

2399

0 commit comments

Comments
 (0)