diff --git a/.env.example b/.env.example
index 72351421..35525aad 100644
--- a/.env.example
+++ b/.env.example
@@ -16,6 +16,58 @@
# 'Authorization' header as a Bearer token (e.g., "Authorization: Bearer YOUR_PROXY_API_KEY").
#PROXY_API_KEY="YOUR_PROXY_API_KEY"
+# App runtime mode.
+# dev = allows insecure defaults when ALLOW_INSECURE_DEFAULTS is not set
+# prod = refuses startup when critical secrets are default/missing
+#APP_ENV="dev"
+
+# Override safety guard for local experimentation only.
+# In prod, keep this false and set strong SESSION_SECRET/API_TOKEN_PEPPER.
+#ALLOW_INSECURE_DEFAULTS=true
+
+# Auth transition mode for /v1 API auth.
+# users = only database-backed user API keys
+# legacy = only PROXY_API_KEY
+# both = accepts both user keys and PROXY_API_KEY
+# Default is "both" for migration compatibility.
+#AUTH_MODE="both"
+
+# Pepper used when hashing user API tokens at rest:
+# token_hash = SHA256(API_TOKEN_PEPPER + plaintext_token)
+# Keep this stable after issuing user API keys, or existing keys will no longer match.
+#API_TOKEN_PEPPER="replace-with-random-token-pepper"
+
+# Initial admin bootstrap user (created once on first startup).
+# Required when AUTH_MODE includes "users" and you need dashboard/admin access.
+#INITIAL_ADMIN_USERNAME="admin"
+#INITIAL_ADMIN_PASSWORD="change-me"
+
+# Dashboard session cookie settings.
+# SESSION_SECRET should be a long random value in production.
+#SESSION_SECRET="replace-with-random-session-secret"
+#SESSION_TTL_SECONDS=86400
+#SESSION_COOKIE_SECURE=false
+#SESSION_COOKIE_SAMESITE=lax
+
+# API key hash secret (pepper). Must be set to a strong random value in production.
+#API_TOKEN_PEPPER="replace-with-random-token-pepper"
+
+# CORS settings for browser clients.
+# Comma-separated origins, e.g. "http://localhost:3000,https://admin.example.com"
+#CORS_ALLOW_ORIGINS=""
+#CORS_ALLOW_CREDENTIALS=false
+#CORS_ALLOW_METHODS="GET,POST,PUT,PATCH,DELETE,OPTIONS"
+#CORS_ALLOW_HEADERS="Authorization,Content-Type,X-API-Key,X-Requested-With,X-CSRF-Token"
+# SESSION_COOKIE_SAMESITE supports: lax, strict, none
+#SESSION_COOKIE_SAMESITE="lax"
+
+# CSRF protection for UI forms is enabled by default.
+# Cookie/form field names are fixed internally: proxy_csrf / csrf_token.
+# CSRF cookie security follows SESSION_COOKIE_SECURE + SESSION_COOKIE_SAMESITE.
+
+# Optional DB override. By default uses sqlite file: data/proxy.db
+#DATABASE_URL="sqlite+aiosqlite:///data/proxy.db"
+
# ------------------------------------------------------------------------------
# | [API KEYS] Provider API Keys |
@@ -418,4 +470,4 @@
#
# Set to "0" to show these warnings (useful for debugging).
# Default: "1" (suppress warnings)
-# SUPPRESS_LITELLM_SERIALIZATION_WARNINGS=1
\ No newline at end of file
+# SUPPRESS_LITELLM_SERIALIZATION_WARNINGS=1
diff --git a/DOCUMENTATION.md b/DOCUMENTATION.md
index 905ab4b0..491a5372 100644
--- a/DOCUMENTATION.md
+++ b/DOCUMENTATION.md
@@ -919,6 +919,18 @@ The proxy accepts both Anthropic and OpenAI authentication styles:
- `x-api-key` header (Anthropic style)
- `Authorization: Bearer` header (OpenAI style)
+For `/v1/*` token validation, behavior is controlled by `AUTH_MODE`:
+
+| AUTH_MODE | Accepted tokens | Migration intent |
+|-----------|-----------------|------------------|
+| `users` | Database-backed user API keys | Target steady-state |
+| `legacy` | `PROXY_API_KEY` only | Legacy compatibility mode |
+| `both` | User API keys + `PROXY_API_KEY` | Default migration mode |
+
+Legacy compatibility statement: existing clients that still send `PROXY_API_KEY` remain supported when `AUTH_MODE=legacy` or `AUTH_MODE=both`.
+
+Recommended rollout: bootstrap admin via `INITIAL_ADMIN_USERNAME`/`INITIAL_ADMIN_PASSWORD`, run with `AUTH_MODE=both` during client migration, then move to `AUTH_MODE=users`.
+
### 3.5. Antigravity (`antigravity_provider.py`)
The most sophisticated provider implementation, supporting Google's internal Antigravity API for Gemini 3 and Claude models (including **Claude Opus 4.5**, Anthropic's most powerful model).
@@ -1925,4 +1937,3 @@ The GUI modifies the same environment variables that the `RotatingClient` reads:
3. **Proxy applies rules** → `get_available_models()` filters based on rules
**Note**: The proxy must be restarted to pick up rule changes made via the GUI (or use the Launcher TUI's reload functionality if available).
-
diff --git a/Dockerfile b/Dockerfile
index fe209886..782fafaa 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -34,8 +34,8 @@ ENV PATH=/root/.local/bin:$PATH
# Copy application code
COPY src/ ./src/
-# Create directories for logs and oauth credentials
-RUN mkdir -p logs oauth_creds
+# Create runtime directories for persisted state
+RUN mkdir -p logs oauth_creds usage data
# Expose the default port
EXPOSE 8000
diff --git a/README.md b/README.md
index c15ed094..41b42f88 100644
--- a/README.md
+++ b/README.md
@@ -54,6 +54,7 @@ docker run -d \
-v $(pwd)/oauth_creds:/app/oauth_creds \
-v $(pwd)/logs:/app/logs \
-v $(pwd)/usage:/app/usage \
+ -v $(pwd)/data:/app/data \
-e SKIP_OAUTH_INIT_CHECK=true \
ghcr.io/mirrowel/llm-api-key-proxy:latest
```
@@ -61,13 +62,13 @@ docker run -d \
**Using Docker Compose:**
```bash
-# Create your .env file and usage directory first, then:
+# Create your .env file and runtime data directories first, then:
cp .env.example .env
-mkdir usage
+mkdir -p usage data
docker compose up -d
```
-> **Important:** Create the `usage/` directory before running Docker Compose so usage stats persist on the host.
+> **Important:** Create `usage/` and `data/` before running Docker Compose so usage stats and SQLite DB data persist on the host.
> **Note:** For OAuth providers, complete authentication locally first using the credential tool, then mount the `oauth_creds/` directory or export credentials to environment variables.
@@ -93,7 +94,15 @@ Once the proxy is running, configure your application with these settings:
| Setting | Value |
|---------|-------|
| **Base URL / API Endpoint** | `http://127.0.0.1:8000/v1` |
-| **API Key** | Your `PROXY_API_KEY` |
+| **API Key** | Depends on `AUTH_MODE` (`PROXY_API_KEY`, user API key, or both) |
+
+### Auth Transition Modes
+
+- `AUTH_MODE=users`: only per-user API keys are accepted for `/v1/*`
+- `AUTH_MODE=legacy`: only `PROXY_API_KEY` is accepted
+- `AUTH_MODE=both`: accepts either user API keys or `PROXY_API_KEY` (recommended migration mode)
+
+Recommended rollout: start with `AUTH_MODE=both`, move clients to user keys, then switch to `AUTH_MODE=users`.
### Model Format: `provider/model_name`
@@ -241,6 +250,27 @@ print(response.content[0].text)
---
+## AUTH_MODE Migration (MVP)
+
+`AUTH_MODE` controls which tokens are accepted on `/v1/*` endpoints.
+
+| `AUTH_MODE` | Accepted token(s) | Intended phase |
+|-------------|-------------------|----------------|
+| `users` | User API keys created in the dashboard/API | Final state after migration |
+| `legacy` | `PROXY_API_KEY` only | Backward-compatible legacy-only mode |
+| `both` (default) | User API keys and `PROXY_API_KEY` | Transition window for gradual rollout |
+
+Recommended transition path:
+
+1. Start on `AUTH_MODE=both`
+2. Bootstrap admin with `INITIAL_ADMIN_USERNAME` + `INITIAL_ADMIN_PASSWORD`
+3. Create user API keys and rotate clients over to those keys
+4. Switch to `AUTH_MODE=users` once all clients are migrated
+
+Legacy compatibility statement: `PROXY_API_KEY` remains fully supported when `AUTH_MODE` is set to `legacy` or `both`.
+
+---
+
## Managing Credentials
The proxy includes an interactive tool for managing all your API keys and OAuth credentials.
@@ -469,10 +499,25 @@ The proxy includes a powerful text-based UI for configuration and management.
| Variable | Description | Default |
|----------|-------------|---------|
-| `PROXY_API_KEY` | Authentication key for your proxy | Required |
+| `PROXY_API_KEY` | Legacy `/v1/*` key used in `legacy` or `both` mode | Optional |
+| `AUTH_MODE` | `/v1/*` auth mode: `users`, `legacy`, `both` | `both` |
+| `APP_ENV` | Runtime environment (`dev` or `prod`) | `dev` |
+| `ALLOW_INSECURE_DEFAULTS` | Allow startup with default secrets | `true` in dev, `false` in prod |
+| `SESSION_SECRET` | Dashboard session signing secret | Required in prod |
+| `API_TOKEN_PEPPER` | HMAC key for API token hashes | Required in prod |
+| `CORS_ALLOW_ORIGINS` | Comma-separated browser origins | empty |
+| `CORS_ALLOW_CREDENTIALS` | Allow credentialed CORS requests | false (unless origins configured) |
| `OAUTH_REFRESH_INTERVAL` | Token refresh check interval (seconds) | `600` |
| `SKIP_OAUTH_INIT_CHECK` | Skip interactive OAuth setup on startup | `false` |
+### Production Checklist
+
+- Set `APP_ENV=prod`
+- Set strong `SESSION_SECRET` and `API_TOKEN_PEPPER`
+- Keep `ALLOW_INSECURE_DEFAULTS=false`
+- Set explicit `CORS_ALLOW_ORIGINS` (do not use `*` with credentials)
+- Keep persistent mounts for `/app/data`, `/app/logs`, `/app/oauth_creds`, and `/app/usage`
+
### Per-Provider Settings
| Pattern | Description | Example |
@@ -859,8 +904,8 @@ The proxy is available as a multi-architecture Docker image (amd64/arm64) from G
cp .env.example .env
nano .env
-# 2. Create usage directory (usage_*.json files are created automatically)
-mkdir usage
+# 2. Create runtime data directories
+mkdir -p usage data
# 3. Start the proxy
docker compose up -d
@@ -869,13 +914,13 @@ docker compose up -d
docker compose logs -f
```
-> **Important:** Create the `usage/` directory before running Docker Compose so usage stats persist on the host.
+> **Important:** Create `usage/` and `data/` before running Docker Compose so usage stats and SQLite data persist on the host.
**Manual Docker Run:**
```bash
-# Create usage directory if it doesn't exist
-mkdir usage
+# Create runtime data directories if they don't exist
+mkdir -p usage data
docker run -d \
--name llm-api-proxy \
@@ -885,6 +930,7 @@ docker run -d \
-v $(pwd)/oauth_creds:/app/oauth_creds \
-v $(pwd)/logs:/app/logs \
-v $(pwd)/usage:/app/usage \
+ -v $(pwd)/data:/app/data \
-e SKIP_OAUTH_INIT_CHECK=true \
-e PYTHONUNBUFFERED=1 \
ghcr.io/mirrowel/llm-api-key-proxy:latest
@@ -905,6 +951,7 @@ docker compose -f docker-compose.dev.yml up -d --build
| `oauth_creds/` | OAuth credential files (persistent) |
| `logs/` | Request logs and detailed logging |
| `usage/` | Usage statistics persistence (`usage_*.json`) |
+| `data/` | SQLite database persistence (`data/proxy.db`) |
**Image Tags:**
diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml
index becc2606..380a00e7 100644
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@@ -21,6 +21,8 @@ services:
- ./logs:/app/logs
# Mount usage directory for usage statistics persistence
- ./usage:/app/usage
+ # Mount data directory for sqlite persistence (data/proxy.db)
+ - ./data:/app/data
# Optionally mount additional .env files (e.g., combined credential files)
# - ./antigravity_all_combined.env:/app/antigravity_all_combined.env:ro
environment:
diff --git a/requirements.txt b/requirements.txt
index 1f5d4985..11d70de6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,7 @@
# FastAPI framework for building the proxy server
fastapi
+jinja2
+python-multipart
# ASGI server for running the FastAPI application
uvicorn
# For loading environment variables from a .env file
@@ -20,8 +22,15 @@ colorlog
rich
+sqlalchemy
+aiosqlite
+
# GUI for model filter configuration
customtkinter
# For building the executable
pyinstaller
+
+# Test dependencies
+pytest
+pytest-asyncio
diff --git a/src/proxy_app/api_token_auth.py b/src/proxy_app/api_token_auth.py
new file mode 100644
index 00000000..2d428a5a
--- /dev/null
+++ b/src/proxy_app/api_token_auth.py
@@ -0,0 +1,206 @@
+import hashlib
+import hmac
+import os
+from dataclasses import dataclass
+from datetime import datetime
+
+from fastapi import Depends, HTTPException, status
+from fastapi.security import APIKeyHeader
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from proxy_app.auth import get_db_session
+from proxy_app.db_models import ApiKey, User
+from proxy_app.security_config import get_api_token_pepper
+
+AUTH_MODE_USERS = "users"
+AUTH_MODE_LEGACY = "legacy"
+AUTH_MODE_BOTH = "both"
+DEFAULT_AUTH_MODE = AUTH_MODE_BOTH
+
+AUTH_SOURCE_USER_API_KEY = "user_api_key"
+AUTH_SOURCE_LEGACY_MASTER = "legacy_master"
+
+HASH_SCHEME_HMAC_SHA256 = "hmac_sha256_v1"
+HASH_SCHEME_LEGACY_SHA256_PREFIX = "sha256_prefix_v1"
+
+api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
+anthropic_api_key_header = APIKeyHeader(name="x-api-key", auto_error=False)
+
+
+@dataclass
+class ApiActor:
+ user_id: int | None
+ api_key_id: int | None
+ role: str
+ auth_source: str
+
+
+def get_auth_mode() -> str:
+ mode = (os.getenv("AUTH_MODE") or DEFAULT_AUTH_MODE).strip().lower()
+ return normalize_auth_mode(mode)
+
+
+def normalize_auth_mode(mode: str) -> str:
+ if mode in {AUTH_MODE_USERS, AUTH_MODE_LEGACY, AUTH_MODE_BOTH}:
+ return mode
+ return DEFAULT_AUTH_MODE
+
+
+def get_legacy_master_key() -> str:
+ return os.getenv("PROXY_API_KEY") or ""
+
+
+def parse_bearer_token(authorization: str | None) -> str | None:
+ if not authorization:
+ return None
+ value = authorization.strip()
+ if not value:
+ return None
+ parts = value.split(" ", 1)
+ if len(parts) != 2:
+ return None
+ scheme, token = parts
+ if scheme.lower() != "bearer":
+ return None
+ token = token.strip()
+ return token or None
+
+
+def extract_api_token_from_headers(
+ *, x_api_key: str | None, authorization: str | None
+) -> str | None:
+ if x_api_key and x_api_key.strip():
+ return x_api_key.strip()
+ return parse_bearer_token(authorization)
+
+
+def hash_api_token(token: str, *, pepper: str | None = None) -> str:
+ active_pepper = pepper if pepper is not None else get_api_token_pepper()
+ digest = hmac.new(
+ active_pepper.encode("utf-8"),
+ token.encode("utf-8"),
+ hashlib.sha256,
+ ).hexdigest()
+ return digest
+
+
+def hash_api_token_legacy(token: str, *, pepper: str | None = None) -> str:
+ active_pepper = pepper if pepper is not None else get_api_token_pepper()
+ payload = f"{active_pepper}{token}".encode("utf-8")
+ return hashlib.sha256(payload).hexdigest()
+
+
+def _active_user_api_key_query(token_hashes: list[str]):
+ now = datetime.utcnow()
+ return (
+ select(ApiKey, User)
+ .join(User, ApiKey.user_id == User.id)
+ .where(ApiKey.token_hash.in_(token_hashes))
+ .where(ApiKey.revoked_at.is_(None))
+ .where((ApiKey.expires_at.is_(None)) | (ApiKey.expires_at > now))
+ .where(User.is_active.is_(True))
+ )
+
+
+async def _lookup_user_actor(
+ *,
+ session: AsyncSession,
+ token: str,
+ token_pepper: str | None,
+) -> ApiActor | None:
+ new_hash = hash_api_token(token, pepper=token_pepper)
+ legacy_hash = hash_api_token_legacy(token, pepper=token_pepper)
+ lookup_hashes = [new_hash] if legacy_hash == new_hash else [new_hash, legacy_hash]
+
+ rows = (await session.execute(_active_user_api_key_query(lookup_hashes))).all()
+ if not rows:
+ return None
+
+ api_key: ApiKey
+ user: User
+ api_key, user = rows[0]
+ for row in rows:
+ candidate_key, candidate_user = row
+ if candidate_key.token_hash == new_hash:
+ api_key, user = candidate_key, candidate_user
+ break
+
+ if api_key.token_hash == legacy_hash and api_key.token_hash != new_hash:
+ try:
+ api_key.token_hash = new_hash
+ await session.commit()
+ except Exception:
+ await session.rollback()
+
+ return ApiActor(
+ user_id=user.id,
+ api_key_id=api_key.id,
+ role=user.role,
+ auth_source=AUTH_SOURCE_USER_API_KEY,
+ )
+
+
+async def resolve_api_actor_from_token(
+ *,
+ session: AsyncSession,
+ token: str,
+ auth_mode: str | None = None,
+ legacy_master_key: str | None = None,
+ token_pepper: str | None = None,
+) -> ApiActor | None:
+ mode = normalize_auth_mode(auth_mode) if auth_mode else get_auth_mode()
+
+ if mode in {AUTH_MODE_USERS, AUTH_MODE_BOTH}:
+ actor = await _lookup_user_actor(
+ session=session,
+ token=token,
+ token_pepper=token_pepper,
+ )
+ if actor:
+ return actor
+
+ if mode in {AUTH_MODE_LEGACY, AUTH_MODE_BOTH}:
+ legacy_key = (
+ get_legacy_master_key() if legacy_master_key is None else legacy_master_key
+ )
+ if legacy_key and token == legacy_key:
+ return ApiActor(
+ user_id=None,
+ api_key_id=None,
+ role="admin",
+ auth_source=AUTH_SOURCE_LEGACY_MASTER,
+ )
+
+ return None
+
+
+async def get_api_actor(
+ session: AsyncSession = Depends(get_db_session),
+ x_api_key: str | None = Depends(anthropic_api_key_header),
+ authorization: str | None = Depends(api_key_header),
+) -> ApiActor:
+ token = extract_api_token_from_headers(x_api_key=x_api_key, authorization=authorization)
+ if not token:
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Invalid or missing API Key",
+ )
+
+ actor = await resolve_api_actor_from_token(session=session, token=token)
+ if actor:
+ return actor
+
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Invalid or missing API Key",
+ )
+
+
+async def require_admin_api_actor(actor: ApiActor = Depends(get_api_actor)) -> ApiActor:
+ if actor.role != "admin":
+ raise HTTPException(
+ status_code=status.HTTP_403_FORBIDDEN,
+ detail="Admin access required",
+ )
+ return actor
diff --git a/src/proxy_app/auth.py b/src/proxy_app/auth.py
new file mode 100644
index 00000000..12f07c63
--- /dev/null
+++ b/src/proxy_app/auth.py
@@ -0,0 +1,256 @@
+import base64
+import hashlib
+import hmac
+import json
+import os
+import secrets
+import time
+from dataclasses import dataclass
+from typing import AsyncGenerator
+
+from fastapi import Depends, Form, HTTPException, Request, Response, status
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
+
+from proxy_app.db_models import User
+from proxy_app.security_config import get_session_secret
+
+SESSION_COOKIE_NAME = "proxy_session"
+SESSION_COOKIE_PATH = "/"
+SESSION_COOKIE_SAMESITE = "lax"
+CSRF_COOKIE_NAME = "proxy_csrf"
+CSRF_COOKIE_PATH = "/ui"
+CSRF_FORM_FIELD = "csrf_token"
+CSRF_TOKEN_BYTES = 32
+DEFAULT_SESSION_TTL_SECONDS = 60 * 60 * 24
+
+
+def _b64url_encode(data: bytes) -> str:
+ return base64.urlsafe_b64encode(data).decode("ascii").rstrip("=")
+
+
+def _b64url_decode(data: str) -> bytes:
+ padding = "=" * (-len(data) % 4)
+ return base64.urlsafe_b64decode(data + padding)
+
+def _get_session_ttl_seconds() -> int:
+ raw = os.getenv("SESSION_TTL_SECONDS", str(DEFAULT_SESSION_TTL_SECONDS))
+ try:
+ ttl = int(raw)
+ except ValueError:
+ ttl = DEFAULT_SESSION_TTL_SECONDS
+ return max(60, ttl)
+
+
+def _get_cookie_secure() -> bool:
+ return os.getenv("SESSION_COOKIE_SECURE", "false").lower() in {
+ "1",
+ "true",
+ "yes",
+ "on",
+ }
+
+
+def _get_cookie_samesite() -> str:
+ configured = os.getenv("SESSION_COOKIE_SAMESITE", SESSION_COOKIE_SAMESITE).strip().lower()
+ if configured not in {"lax", "strict", "none"}:
+ configured = SESSION_COOKIE_SAMESITE
+ return configured
+
+
+def _resolve_cookie_secure(samesite: str) -> bool:
+ configured_secure = _get_cookie_secure()
+ if samesite == "none":
+ return True
+ return configured_secure
+
+
+def _is_valid_csrf_token(token: str | None) -> bool:
+ if not token:
+ return False
+ if len(token) < 20 or len(token) > 256:
+ return False
+ return True
+
+
+def _generate_csrf_token() -> str:
+ return secrets.token_urlsafe(CSRF_TOKEN_BYTES)
+
+
+def get_csrf_token_for_request(request: Request) -> str:
+ token = request.cookies.get(CSRF_COOKIE_NAME)
+ if _is_valid_csrf_token(token):
+ return token
+ return _generate_csrf_token()
+
+
+def verify_password(password: str, password_hash: str) -> bool:
+ try:
+ algorithm, iterations_raw, salt_b64, digest_b64 = password_hash.split("$", 3)
+ if algorithm != "pbkdf2_sha256":
+ return False
+ iterations = int(iterations_raw)
+ salt = base64.b64decode(salt_b64)
+ expected = base64.b64decode(digest_b64)
+ except Exception:
+ return False
+
+ candidate = hashlib.pbkdf2_hmac(
+ "sha256", password.encode("utf-8"), salt, iterations
+ )
+ return hmac.compare_digest(candidate, expected)
+
+
+def create_session_token(*, user_id: int, username: str, role: str) -> str:
+ now = int(time.time())
+ payload = {
+ "uid": user_id,
+ "usr": username,
+ "rol": role,
+ "iat": now,
+ "exp": now + _get_session_ttl_seconds(),
+ }
+ payload_json = json.dumps(payload, separators=(",", ":"), sort_keys=True).encode(
+ "utf-8"
+ )
+ payload_b64 = _b64url_encode(payload_json)
+ signature = hmac.new(
+ get_session_secret().encode("utf-8"),
+ payload_b64.encode("ascii"),
+ hashlib.sha256,
+ ).digest()
+ return f"{payload_b64}.{_b64url_encode(signature)}"
+
+
+def decode_session_token(token: str) -> dict | None:
+ try:
+ payload_b64, sig_b64 = token.split(".", 1)
+ expected_sig = hmac.new(
+ get_session_secret().encode("utf-8"),
+ payload_b64.encode("ascii"),
+ hashlib.sha256,
+ ).digest()
+ actual_sig = _b64url_decode(sig_b64)
+ if not hmac.compare_digest(expected_sig, actual_sig):
+ return None
+
+ payload = json.loads(_b64url_decode(payload_b64).decode("utf-8"))
+ exp = int(payload["exp"])
+ if exp < int(time.time()):
+ return None
+ return payload
+ except Exception:
+ return None
+
+
+def set_session_cookie(response: Response, token: str) -> None:
+ same_site = _get_cookie_samesite()
+ response.set_cookie(
+ key=SESSION_COOKIE_NAME,
+ value=token,
+ httponly=True,
+ secure=_resolve_cookie_secure(same_site),
+ samesite=same_site,
+ max_age=_get_session_ttl_seconds(),
+ path=SESSION_COOKIE_PATH,
+ )
+
+
+def clear_session_cookie(response: Response) -> None:
+ same_site = _get_cookie_samesite()
+ response.delete_cookie(
+ key=SESSION_COOKIE_NAME,
+ path=SESSION_COOKIE_PATH,
+ secure=_resolve_cookie_secure(same_site),
+ httponly=True,
+ samesite=same_site,
+ )
+
+
+def attach_csrf_cookie(request: Request, response: Response, *, token: str | None = None) -> str:
+ token_value = token if _is_valid_csrf_token(token) else get_csrf_token_for_request(request)
+
+ same_site = _get_cookie_samesite()
+ current_cookie = request.cookies.get(CSRF_COOKIE_NAME)
+ if current_cookie != token_value:
+ response.set_cookie(
+ key=CSRF_COOKIE_NAME,
+ value=token_value,
+ httponly=True,
+ secure=_resolve_cookie_secure(same_site),
+ samesite=same_site,
+ max_age=_get_session_ttl_seconds(),
+ path=CSRF_COOKIE_PATH,
+ )
+ return token_value
+
+
+def clear_csrf_cookie(response: Response) -> None:
+ same_site = _get_cookie_samesite()
+ response.delete_cookie(
+ key=CSRF_COOKIE_NAME,
+ path=CSRF_COOKIE_PATH,
+ secure=_resolve_cookie_secure(same_site),
+ httponly=True,
+ samesite=same_site,
+ )
+
+
+async def require_csrf(
+ request: Request,
+ csrf_token: str = Form(default="", alias=CSRF_FORM_FIELD),
+) -> None:
+ cookie_token = request.cookies.get(CSRF_COOKIE_NAME)
+ if not (_is_valid_csrf_token(cookie_token) and _is_valid_csrf_token(csrf_token)):
+ raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="CSRF validation failed")
+ if not hmac.compare_digest(cookie_token, csrf_token):
+ raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="CSRF validation failed")
+
+
+@dataclass
+class SessionUser:
+ id: int
+ username: str
+ role: str
+
+
+async def get_db_session(request: Request) -> AsyncGenerator[AsyncSession, None]:
+ session_maker: async_sessionmaker[AsyncSession] = request.app.state.db_session_maker
+ async with session_maker() as session:
+ yield session
+
+
+async def require_user(
+ request: Request,
+ session: AsyncSession = Depends(get_db_session),
+) -> SessionUser:
+ token = request.cookies.get(SESSION_COOKIE_NAME)
+ if not token:
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Authentication required",
+ )
+
+ payload = decode_session_token(token)
+ if not payload:
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Invalid session",
+ )
+
+ user = await session.get(User, int(payload.get("uid", 0)))
+ if not user or not user.is_active:
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Invalid session",
+ )
+
+ return SessionUser(id=user.id, username=user.username, role=user.role)
+
+
+async def require_admin(current_user: SessionUser = Depends(require_user)) -> SessionUser:
+ if current_user.role != "admin":
+ raise HTTPException(
+ status_code=status.HTTP_403_FORBIDDEN,
+ detail="Admin access required",
+ )
+ return current_user
diff --git a/src/proxy_app/db.py b/src/proxy_app/db.py
new file mode 100644
index 00000000..b5cd0720
--- /dev/null
+++ b/src/proxy_app/db.py
@@ -0,0 +1,70 @@
+import base64
+import hashlib
+import logging
+import os
+from pathlib import Path
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
+
+from proxy_app.db_models import Base, User
+
+PBKDF2_ITERATIONS = 200000
+
+
+def hash_password(password: str) -> str:
+ salt = os.urandom(16)
+ digest = hashlib.pbkdf2_hmac(
+ "sha256", password.encode("utf-8"), salt, PBKDF2_ITERATIONS
+ )
+ return "pbkdf2_sha256${}${}${}".format(
+ PBKDF2_ITERATIONS,
+ base64.b64encode(salt).decode("ascii"),
+ base64.b64encode(digest).decode("ascii"),
+ )
+
+
+def get_database_url(root_dir: Path) -> str:
+ configured = os.getenv("DATABASE_URL")
+ if configured:
+ return configured
+ db_dir = root_dir / "data"
+ db_dir.mkdir(parents=True, exist_ok=True)
+ return f"sqlite+aiosqlite:///{db_dir / 'proxy.db'}"
+
+
+async def _bootstrap_initial_admin(session: AsyncSession) -> bool:
+ username = (os.getenv("INITIAL_ADMIN_USERNAME") or "").strip()
+ password = os.getenv("INITIAL_ADMIN_PASSWORD") or ""
+ if not username or not password:
+ logging.info("INITIAL_ADMIN_USERNAME/PASSWORD not set, skipping bootstrap")
+ return False
+
+ existing = await session.scalar(select(User).where(User.username == username))
+ if existing:
+ return False
+
+ admin = User(
+ username=username,
+ password_hash=hash_password(password),
+ role="admin",
+ is_active=True,
+ )
+ session.add(admin)
+ await session.commit()
+ logging.info("Bootstrapped initial admin user '%s'", username)
+ return True
+
+
+async def init_db(root_dir: Path) -> async_sessionmaker[AsyncSession]:
+ database_url = get_database_url(root_dir)
+ engine = create_async_engine(database_url, future=True)
+ session_maker = async_sessionmaker(engine, expire_on_commit=False)
+
+ async with engine.begin() as conn:
+ await conn.run_sync(Base.metadata.create_all)
+
+ async with session_maker() as session:
+ await _bootstrap_initial_admin(session)
+
+ return session_maker
diff --git a/src/proxy_app/db_models.py b/src/proxy_app/db_models.py
new file mode 100644
index 00000000..90d7d23e
--- /dev/null
+++ b/src/proxy_app/db_models.py
@@ -0,0 +1,56 @@
+from datetime import datetime
+
+from sqlalchemy import Boolean, DateTime, Float, ForeignKey, Integer, String, Text
+from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
+
+
+class Base(DeclarativeBase):
+ pass
+
+
+class User(Base):
+ __tablename__ = "users"
+
+ id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+ username: Mapped[str] = mapped_column(String(128), unique=True, index=True)
+ password_hash: Mapped[str] = mapped_column(Text)
+ role: Mapped[str] = mapped_column(String(16), default="user")
+ is_active: Mapped[bool] = mapped_column(Boolean, default=True)
+ created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+ last_login_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
+
+
+class ApiKey(Base):
+ __tablename__ = "api_keys"
+
+ id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+ user_id: Mapped[int] = mapped_column(ForeignKey("users.id"), index=True)
+ name: Mapped[str] = mapped_column(String(128))
+ token_prefix: Mapped[str] = mapped_column(String(24), index=True)
+ token_hash: Mapped[str] = mapped_column(String(128), unique=True, index=True)
+ created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+ last_used_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
+ revoked_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
+ expires_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
+
+
+class UsageEvent(Base):
+ __tablename__ = "usage_events"
+
+ id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+ timestamp: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, index=True)
+ user_id: Mapped[int | None] = mapped_column(ForeignKey("users.id"), nullable=True, index=True)
+ api_key_id: Mapped[int | None] = mapped_column(
+ ForeignKey("api_keys.id"), nullable=True, index=True
+ )
+ endpoint: Mapped[str] = mapped_column(String(128), index=True)
+ provider: Mapped[str | None] = mapped_column(String(64), nullable=True, index=True)
+ model: Mapped[str | None] = mapped_column(String(256), nullable=True, index=True)
+ request_id: Mapped[str] = mapped_column(String(128), index=True)
+ status_code: Mapped[int] = mapped_column(Integer)
+ prompt_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True)
+ completion_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True)
+ total_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True)
+ cost_usd: Mapped[float | None] = mapped_column(Float, nullable=True)
+ error_type: Mapped[str | None] = mapped_column(String(64), nullable=True)
+ error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
diff --git a/src/proxy_app/detailed_logger.py b/src/proxy_app/detailed_logger.py
index 07d346ff..9723e623 100644
--- a/src/proxy_app/detailed_logger.py
+++ b/src/proxy_app/detailed_logger.py
@@ -38,6 +38,7 @@
safe_mkdir,
)
from rotator_library.utils.paths import get_logs_dir
+from proxy_app.request_logger import redact_sensitive_data, redact_sensitive_headers
def _get_raw_io_logs_dir() -> Path:
@@ -96,8 +97,8 @@ def log_request(self, headers: Dict[str, Any], body: Dict[str, Any]):
request_data = {
"request_id": self.request_id,
"timestamp_utc": datetime.utcnow().isoformat(),
- "headers": dict(headers),
- "body": body,
+ "headers": redact_sensitive_headers(dict(headers)),
+ "body": redact_sensitive_data(body),
}
self._write_json("request.json", request_data)
@@ -106,7 +107,10 @@ def log_stream_chunk(self, chunk: Dict[str, Any]):
if not self._dir_available:
return
- log_entry = {"timestamp_utc": datetime.utcnow().isoformat(), "chunk": chunk}
+ log_entry = {
+ "timestamp_utc": datetime.utcnow().isoformat(),
+ "chunk": redact_sensitive_data(chunk),
+ }
content = json.dumps(log_entry, ensure_ascii=False) + "\n"
safe_log_write(self.log_dir / "streaming_chunks.jsonl", content, logging)
@@ -122,8 +126,8 @@ def log_final_response(
"timestamp_utc": datetime.utcnow().isoformat(),
"status_code": status_code,
"duration_ms": round(duration_ms),
- "headers": dict(headers) if headers else None,
- "body": body,
+ "headers": redact_sensitive_headers(dict(headers)) if headers else None,
+ "body": redact_sensitive_data(body),
}
self._write_json("final_response.json", response_data)
self._log_metadata(response_data)
diff --git a/src/proxy_app/main.py b/src/proxy_app/main.py
index 3e4bbbbc..fcd85b55 100644
--- a/src/proxy_app/main.py
+++ b/src/proxy_app/main.py
@@ -111,7 +111,7 @@
from fastapi import FastAPI, Request, HTTPException, Depends
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse, JSONResponse
- from fastapi.security import APIKeyHeader
+ from fastapi.staticfiles import StaticFiles
print(" → Loading core dependencies...")
with _console.status("[dim]Loading core dependencies...", spinner="dots"):
@@ -135,9 +135,18 @@
from rotator_library.credential_manager import CredentialManager
from rotator_library.background_refresher import BackgroundRefresher
from rotator_library.model_info_service import init_model_info_service
- from proxy_app.request_logger import log_request_to_console
+ from proxy_app.request_logger import log_request_to_console, redact_sensitive_data
+ from proxy_app.security_config import get_cors_settings, validate_secret_settings
from proxy_app.batch_manager import EmbeddingBatcher
+ from proxy_app.api_token_auth import ApiActor, get_api_actor, require_admin_api_actor
from proxy_app.detailed_logger import RawIOLogger
+ from proxy_app.db import init_db
+ from proxy_app.routers import admin_router, auth_router, ui_router, user_router
+ from proxy_app.usage_recorder import (
+ record_usage_event as record_usage_event_async,
+ start_usage_recorder,
+ stop_usage_recorder,
+ )
print(" → Discovering provider plugins...")
# Provider lazy loading happens during import, so time it here
@@ -351,6 +360,8 @@ def filter(self, record):
# Load environment variables from .env file
load_dotenv(_root_dir / ".env")
+validate_secret_settings()
+
# --- Configuration ---
USE_EMBEDDING_BATCHER = False
ENABLE_REQUEST_LOGGING = args.enable_request_logging
@@ -425,6 +436,9 @@ def filter(self, record):
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Manage the RotatingClient's lifecycle with the app's lifespan."""
+ app.state.db_session_maker = await init_db(_root_dir)
+ app.state.usage_recorder = await start_usage_recorder(app.state.db_session_maker)
+
# [MODIFIED] Perform skippable OAuth initialization at startup
skip_oauth_init = os.getenv("SKIP_OAUTH_INIT_CHECK", "false").lower() == "true"
@@ -577,7 +591,11 @@ async def process_credential(provider: str, path: str, provider_instance):
json.dump(data, f, indent=2)
f.truncate()
except Exception as e:
- logging.error(f"Failed to update metadata for '{path}': {e}")
+ logging.error(
+ "Failed to update metadata for '%s': %s",
+ path,
+ redact_sensitive_data(str(e), key_hint="error"),
+ )
logging.info("OAuth credential processing complete.")
oauth_credentials = final_oauth_credentials
@@ -644,6 +662,7 @@ async def process_credential(provider: str, path: str, provider_instance):
yield
+ await stop_usage_recorder()
await client.background_refresher.stop() # Stop the background task on shutdown
if app.state.embedding_batcher:
await app.state.embedding_batcher.stop()
@@ -661,18 +680,25 @@ async def process_credential(provider: str, path: str, provider_instance):
# --- FastAPI App Setup ---
app = FastAPI(lifespan=lifespan)
+app.include_router(auth_router)
+app.include_router(user_router)
+app.include_router(admin_router)
+app.include_router(ui_router)
+app.mount(
+ "/static",
+ StaticFiles(directory=str(Path(__file__).resolve().parent / "static")),
+ name="static",
+)
-# Add CORS middleware to allow all origins, methods, and headers
+# Add CORS middleware with secure defaults.
+_cors_settings = get_cors_settings()
app.add_middleware(
CORSMiddleware,
- allow_origins=["*"], # Allows all origins
- allow_credentials=True,
- allow_methods=["*"], # Allows all methods
- allow_headers=["*"], # Allows all headers
+ allow_origins=_cors_settings.allow_origins,
+ allow_credentials=_cors_settings.allow_credentials,
+ allow_methods=_cors_settings.allow_methods,
+ allow_headers=_cors_settings.allow_headers,
)
-api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
-
-
def get_rotating_client(request: Request) -> RotatingClient:
"""Dependency to get the rotating client instance from the app state."""
return request.app.state.rotating_client
@@ -683,41 +709,89 @@ def get_embedding_batcher(request: Request) -> EmbeddingBatcher:
return request.app.state.embedding_batcher
-async def verify_api_key(auth: str = Depends(api_key_header)):
- """Dependency to verify the proxy API key."""
- # If PROXY_API_KEY is not set or empty, skip verification (open access)
- if not PROXY_API_KEY:
- return auth
- if not auth or auth != f"Bearer {PROXY_API_KEY}":
- raise HTTPException(status_code=401, detail="Invalid or missing API Key")
- return auth
-
-
-# --- Anthropic API Key Header ---
-anthropic_api_key_header = APIKeyHeader(name="x-api-key", auto_error=False)
-
-
-async def verify_anthropic_api_key(
- x_api_key: str = Depends(anthropic_api_key_header),
- auth: str = Depends(api_key_header),
-):
- """
- Dependency to verify API key for Anthropic endpoints.
- Accepts either x-api-key header (Anthropic style) or Authorization Bearer (OpenAI style).
- """
- # Check x-api-key first (Anthropic style)
- if x_api_key and x_api_key == PROXY_API_KEY:
- return x_api_key
- # Fall back to Bearer token (OpenAI style)
- if auth and auth == f"Bearer {PROXY_API_KEY}":
- return auth
- raise HTTPException(status_code=401, detail="Invalid or missing API Key")
+def get_usage_recorder(request: Request):
+ """Dependency to get the usage recorder instance from the app state."""
+ return getattr(request.app.state, "usage_recorder", None)
+
+
+def _resolve_request_id(request: Request, fallback_id: str | None = None) -> str:
+ request_id = request.headers.get("x-request-id") or request.headers.get("request-id")
+ if request_id:
+ return request_id
+ if fallback_id:
+ return fallback_id
+ return str(uuid.uuid4())
+
+
+def _extract_usage_payload(payload: Any) -> dict[str, Any] | None:
+ if payload is None:
+ return None
+ usage = None
+ if isinstance(payload, dict):
+ usage = payload.get("usage")
+ elif hasattr(payload, "usage"):
+ usage = getattr(payload, "usage")
+
+ if usage is None and hasattr(payload, "model_dump"):
+ dumped = payload.model_dump(exclude_none=True)
+ if isinstance(dumped, dict):
+ usage = dumped.get("usage")
+
+ if usage is None:
+ return None
+
+ if hasattr(usage, "model_dump"):
+ usage = usage.model_dump(exclude_none=True)
+
+ if isinstance(usage, dict):
+ return usage
+ return None
+
+
+_ERROR_MESSAGE_MAX_CHARS = max(64, int(os.getenv("ERROR_MESSAGE_MAX_CHARS", "500")))
+
+
+def _safe_error_message(error: Exception | str | None, *, default: str) -> str:
+ raw = str(error).strip() if error is not None else ""
+ if not raw:
+ raw = default
+ redacted = redact_sensitive_data(raw, key_hint="error")
+ if not isinstance(redacted, str):
+ redacted = default
+ if len(redacted) > _ERROR_MESSAGE_MAX_CHARS:
+ return f"{redacted[:_ERROR_MESSAGE_MAX_CHARS]}...[truncated]"
+ return redacted
+
+
+async def _record_usage(
+ *,
+ actor: ApiActor,
+ endpoint: str,
+ model: str | None,
+ request_id: str,
+ status_code: int,
+ usage: dict[str, Any] | None,
+ error: Exception | None = None,
+) -> None:
+ await record_usage_event_async(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ status_code=status_code,
+ usage=usage,
+ request_id=request_id,
+ error_type=type(error).__name__ if error else None,
+ error_message=_safe_error_message(error, default="Internal server error") if error else None,
+ )
async def streaming_response_wrapper(
request: Request,
request_data: dict,
response_stream: AsyncGenerator[str, None],
+ actor: ApiActor,
+ endpoint: str,
+ request_id: str,
logger: Optional[RawIOLogger] = None,
) -> AsyncGenerator[str, None]:
"""
@@ -726,6 +800,10 @@ async def streaming_response_wrapper(
"""
response_chunks = []
full_response = {}
+ usage_data = None
+ status_code = 200
+ stream_error: Exception | None = None
+ model = request_data.get("model")
try:
async for chunk_str in response_stream:
@@ -744,11 +822,14 @@ async def streaming_response_wrapper(
except json.JSONDecodeError:
pass
except Exception as e:
- logging.error(f"An error occurred during the response stream: {e}")
+ status_code = 500
+ stream_error = e
+ safe_message = _safe_error_message(e, default="Unexpected streaming error")
+ logging.error("An error occurred during the response stream: %s", safe_message)
# Yield a final error message to the client to ensure they are not left hanging.
error_payload = {
"error": {
- "message": f"An unexpected error occurred during the stream: {str(e)}",
+ "message": f"An unexpected error occurred during the stream: {safe_message}",
"type": "proxy_internal_error",
"code": 500,
}
@@ -758,7 +839,7 @@ async def streaming_response_wrapper(
# Also log this as a failed request
if logger:
logger.log_final_response(
- status_code=500, headers=None, body={"error": str(e)}
+ status_code=500, headers=None, body={"error": safe_message}
)
return # Stop further processing
finally:
@@ -877,20 +958,32 @@ async def streaming_response_wrapper(
"choices": [final_choice],
"usage": usage_data,
}
+ model = full_response.get("model") or model
+ request_id = _resolve_request_id(request, full_response.get("id") or request_id)
if logger:
logger.log_final_response(
- status_code=200,
+ status_code=status_code,
headers=None, # Headers are not available at this stage
body=full_response,
)
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=status_code,
+ usage=usage_data,
+ error=stream_error,
+ )
+
@app.post("/v1/chat/completions")
async def chat_completions(
request: Request,
client: RotatingClient = Depends(get_rotating_client),
- _=Depends(verify_api_key),
+ actor: ApiActor = Depends(get_api_actor),
):
"""
OpenAI-compatible endpoint powered by the RotatingClient.
@@ -898,11 +991,24 @@ async def chat_completions(
"""
# Raw I/O logger captures unmodified HTTP data at proxy boundary (disabled by default)
raw_logger = RawIOLogger() if ENABLE_RAW_LOGGING else None
+ endpoint = "/v1/chat/completions"
+ request_id = _resolve_request_id(request)
+ model = None
try:
# Read and parse the request body only once at the beginning.
try:
request_data = await request.json()
except json.JSONDecodeError:
+ error = ValueError("Invalid JSON in request body.")
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=400,
+ usage=None,
+ error=error,
+ )
raise HTTPException(status_code=400, detail="Invalid JSON in request body.")
# Global temperature=0 override (controlled by .env variable, default: OFF)
@@ -963,12 +1069,26 @@ async def chat_completions(
)
return StreamingResponse(
streaming_response_wrapper(
- request, request_data, response_generator, raw_logger
+ request,
+ request_data,
+ response_generator,
+ actor,
+ endpoint,
+ request_id,
+ raw_logger,
),
media_type="text/event-stream",
)
else:
response = await client.acompletion(request=request, **request_data)
+ response_dump = (
+ response.model_dump(exclude_none=True)
+ if hasattr(response, "model_dump")
+ else response
+ )
+ if isinstance(response_dump, dict):
+ request_id = _resolve_request_id(request, response_dump.get("id") or request_id)
+ model = response_dump.get("model") or model
if raw_logger:
# Assuming response has status_code and headers attributes
# This might need adjustment based on the actual response object
@@ -981,8 +1101,17 @@ async def chat_completions(
raw_logger.log_final_response(
status_code=status_code,
headers=response_headers,
- body=response.model_dump(),
+ body=response_dump,
)
+
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=200,
+ usage=_extract_usage_payload(response_dump),
+ )
return response
except (
@@ -990,19 +1119,83 @@ async def chat_completions(
ValueError,
litellm.ContextWindowExceededError,
) as e:
- raise HTTPException(status_code=400, detail=f"Invalid Request: {str(e)}")
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=400,
+ usage=None,
+ error=e,
+ )
+ raise HTTPException(status_code=400, detail=f"Invalid Request: {_safe_error_message(e, default='Invalid request')}")
except litellm.AuthenticationError as e:
- raise HTTPException(status_code=401, detail=f"Authentication Error: {str(e)}")
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=401,
+ usage=None,
+ error=e,
+ )
+ raise HTTPException(status_code=401, detail=f"Authentication Error: {_safe_error_message(e, default='Authentication failed')}")
except litellm.RateLimitError as e:
- raise HTTPException(status_code=429, detail=f"Rate Limit Exceeded: {str(e)}")
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=429,
+ usage=None,
+ error=e,
+ )
+ raise HTTPException(status_code=429, detail=f"Rate Limit Exceeded: {_safe_error_message(e, default='Rate limit exceeded')}")
except (litellm.ServiceUnavailableError, litellm.APIConnectionError) as e:
- raise HTTPException(status_code=503, detail=f"Service Unavailable: {str(e)}")
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=503,
+ usage=None,
+ error=e,
+ )
+ raise HTTPException(status_code=503, detail=f"Service Unavailable: {_safe_error_message(e, default='Service unavailable')}")
except litellm.Timeout as e:
- raise HTTPException(status_code=504, detail=f"Gateway Timeout: {str(e)}")
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=504,
+ usage=None,
+ error=e,
+ )
+ raise HTTPException(status_code=504, detail=f"Gateway Timeout: {_safe_error_message(e, default='Gateway timeout')}")
except (litellm.InternalServerError, litellm.OpenAIError) as e:
- raise HTTPException(status_code=502, detail=f"Bad Gateway: {str(e)}")
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=502,
+ usage=None,
+ error=e,
+ )
+ raise HTTPException(status_code=502, detail=f"Bad Gateway: {_safe_error_message(e, default='Upstream error')}")
except Exception as e:
- logging.error(f"Request failed after all retries: {e}")
+ safe_message = _safe_error_message(e, default="Internal server error")
+ logging.error("Request failed after all retries: %s", safe_message)
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=500,
+ usage=None,
+ error=e,
+ )
# Optionally log the failed request
if ENABLE_REQUEST_LOGGING:
try:
@@ -1011,9 +1204,9 @@ async def chat_completions(
request_data = {"error": "Could not parse request body"}
if raw_logger:
raw_logger.log_final_response(
- status_code=500, headers=None, body={"error": str(e)}
+ status_code=500, headers=None, body={"error": safe_message}
)
- raise HTTPException(status_code=500, detail=str(e))
+ raise HTTPException(status_code=500, detail=safe_message)
# --- Anthropic Messages API Endpoint ---
@@ -1022,7 +1215,7 @@ async def anthropic_messages(
request: Request,
body: AnthropicMessagesRequest,
client: RotatingClient = Depends(get_rotating_client),
- _=Depends(verify_anthropic_api_key),
+ actor: ApiActor = Depends(get_api_actor),
):
"""
Anthropic-compatible Messages API endpoint.
@@ -1034,6 +1227,9 @@ async def anthropic_messages(
"""
# Initialize raw I/O logger if enabled (for debugging proxy boundary)
logger = RawIOLogger() if ENABLE_RAW_LOGGING else None
+ endpoint = "/v1/messages"
+ model = body.model
+ request_id = _resolve_request_id(request)
# Log raw Anthropic request if raw logging is enabled
if logger:
@@ -1058,6 +1254,14 @@ async def anthropic_messages(
result = await client.anthropic_messages(body, raw_request=request)
if body.stream:
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=200,
+ usage=None,
+ )
# Streaming response
return StreamingResponse(
result,
@@ -1069,6 +1273,8 @@ async def anthropic_messages(
},
)
else:
+ request_id = _resolve_request_id(request, result.get("id") if isinstance(result, dict) else request_id)
+ model = result.get("model") if isinstance(result, dict) else model
# Non-streaming response
if logger:
logger.log_final_response(
@@ -1076,6 +1282,14 @@ async def anthropic_messages(
headers=None,
body=result,
)
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=200,
+ usage=_extract_usage_payload(result),
+ )
return JSONResponse(content=result)
except (
@@ -1083,46 +1297,116 @@ async def anthropic_messages(
ValueError,
litellm.ContextWindowExceededError,
) as e:
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=400,
+ usage=None,
+ error=e,
+ )
error_response = {
"type": "error",
- "error": {"type": "invalid_request_error", "message": str(e)},
+ "error": {
+ "type": "invalid_request_error",
+ "message": _safe_error_message(e, default="Invalid request"),
+ },
}
raise HTTPException(status_code=400, detail=error_response)
except litellm.AuthenticationError as e:
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=401,
+ usage=None,
+ error=e,
+ )
error_response = {
"type": "error",
- "error": {"type": "authentication_error", "message": str(e)},
+ "error": {
+ "type": "authentication_error",
+ "message": _safe_error_message(e, default="Authentication failed"),
+ },
}
raise HTTPException(status_code=401, detail=error_response)
except litellm.RateLimitError as e:
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=429,
+ usage=None,
+ error=e,
+ )
error_response = {
"type": "error",
- "error": {"type": "rate_limit_error", "message": str(e)},
+ "error": {
+ "type": "rate_limit_error",
+ "message": _safe_error_message(e, default="Rate limit exceeded"),
+ },
}
raise HTTPException(status_code=429, detail=error_response)
except (litellm.ServiceUnavailableError, litellm.APIConnectionError) as e:
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=503,
+ usage=None,
+ error=e,
+ )
error_response = {
"type": "error",
- "error": {"type": "api_error", "message": str(e)},
+ "error": {
+ "type": "api_error",
+ "message": _safe_error_message(e, default="Service unavailable"),
+ },
}
raise HTTPException(status_code=503, detail=error_response)
except litellm.Timeout as e:
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=504,
+ usage=None,
+ error=e,
+ )
error_response = {
"type": "error",
- "error": {"type": "api_error", "message": f"Request timed out: {str(e)}"},
+ "error": {
+ "type": "api_error",
+ "message": f"Request timed out: {_safe_error_message(e, default='Gateway timeout')}",
+ },
}
raise HTTPException(status_code=504, detail=error_response)
except Exception as e:
- logging.error(f"Anthropic messages endpoint error: {e}")
+ safe_message = _safe_error_message(e, default="Internal server error")
+ logging.error("Anthropic messages endpoint error: %s", safe_message)
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=500,
+ usage=None,
+ error=e,
+ )
if logger:
logger.log_final_response(
status_code=500,
headers=None,
- body={"error": str(e)},
+ body={"error": safe_message},
)
error_response = {
"type": "error",
- "error": {"type": "api_error", "message": str(e)},
+ "error": {"type": "api_error", "message": safe_message},
}
raise HTTPException(status_code=500, detail=error_response)
@@ -1133,7 +1417,7 @@ async def anthropic_count_tokens(
request: Request,
body: AnthropicCountTokensRequest,
client: RotatingClient = Depends(get_rotating_client),
- _=Depends(verify_anthropic_api_key),
+ _=Depends(get_api_actor),
):
"""
Anthropic-compatible count_tokens endpoint.
@@ -1155,20 +1439,27 @@ async def anthropic_count_tokens(
) as e:
error_response = {
"type": "error",
- "error": {"type": "invalid_request_error", "message": str(e)},
+ "error": {
+ "type": "invalid_request_error",
+ "message": _safe_error_message(e, default="Invalid request"),
+ },
}
raise HTTPException(status_code=400, detail=error_response)
except litellm.AuthenticationError as e:
error_response = {
"type": "error",
- "error": {"type": "authentication_error", "message": str(e)},
+ "error": {
+ "type": "authentication_error",
+ "message": _safe_error_message(e, default="Authentication failed"),
+ },
}
raise HTTPException(status_code=401, detail=error_response)
except Exception as e:
- logging.error(f"Anthropic count_tokens endpoint error: {e}")
+ safe_message = _safe_error_message(e, default="Internal server error")
+ logging.error("Anthropic count_tokens endpoint error: %s", safe_message)
error_response = {
"type": "error",
- "error": {"type": "api_error", "message": str(e)},
+ "error": {"type": "api_error", "message": safe_message},
}
raise HTTPException(status_code=500, detail=error_response)
@@ -1179,7 +1470,7 @@ async def embeddings(
body: EmbeddingRequest,
client: RotatingClient = Depends(get_rotating_client),
batcher: Optional[EmbeddingBatcher] = Depends(get_embedding_batcher),
- _=Depends(verify_api_key),
+ actor: ApiActor = Depends(get_api_actor),
):
"""
OpenAI-compatible endpoint for creating embeddings.
@@ -1187,6 +1478,9 @@ async def embeddings(
- True: Uses a server-side batcher for high throughput.
- False: Passes requests directly to the provider.
"""
+ endpoint = "/v1/embeddings"
+ model = body.model
+ request_id = _resolve_request_id(request)
try:
request_data = body.model_dump(exclude_none=True)
log_request_to_console(
@@ -1238,6 +1532,22 @@ async def embeddings(
response = await client.aembedding(request=request, **request_data)
+ response_dump = (
+ response.model_dump(exclude_none=True)
+ if hasattr(response, "model_dump")
+ else response
+ )
+ if isinstance(response_dump, dict):
+ request_id = _resolve_request_id(request, response_dump.get("id") or request_id)
+ model = response_dump.get("model") or model
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=200,
+ usage=_extract_usage_payload(response_dump),
+ )
return response
except HTTPException as e:
@@ -1248,20 +1558,84 @@ async def embeddings(
ValueError,
litellm.ContextWindowExceededError,
) as e:
- raise HTTPException(status_code=400, detail=f"Invalid Request: {str(e)}")
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=400,
+ usage=None,
+ error=e,
+ )
+ raise HTTPException(status_code=400, detail=f"Invalid Request: {_safe_error_message(e, default='Invalid request')}")
except litellm.AuthenticationError as e:
- raise HTTPException(status_code=401, detail=f"Authentication Error: {str(e)}")
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=401,
+ usage=None,
+ error=e,
+ )
+ raise HTTPException(status_code=401, detail=f"Authentication Error: {_safe_error_message(e, default='Authentication failed')}")
except litellm.RateLimitError as e:
- raise HTTPException(status_code=429, detail=f"Rate Limit Exceeded: {str(e)}")
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=429,
+ usage=None,
+ error=e,
+ )
+ raise HTTPException(status_code=429, detail=f"Rate Limit Exceeded: {_safe_error_message(e, default='Rate limit exceeded')}")
except (litellm.ServiceUnavailableError, litellm.APIConnectionError) as e:
- raise HTTPException(status_code=503, detail=f"Service Unavailable: {str(e)}")
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=503,
+ usage=None,
+ error=e,
+ )
+ raise HTTPException(status_code=503, detail=f"Service Unavailable: {_safe_error_message(e, default='Service unavailable')}")
except litellm.Timeout as e:
- raise HTTPException(status_code=504, detail=f"Gateway Timeout: {str(e)}")
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=504,
+ usage=None,
+ error=e,
+ )
+ raise HTTPException(status_code=504, detail=f"Gateway Timeout: {_safe_error_message(e, default='Gateway timeout')}")
except (litellm.InternalServerError, litellm.OpenAIError) as e:
- raise HTTPException(status_code=502, detail=f"Bad Gateway: {str(e)}")
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=502,
+ usage=None,
+ error=e,
+ )
+ raise HTTPException(status_code=502, detail=f"Bad Gateway: {_safe_error_message(e, default='Upstream error')}")
except Exception as e:
- logging.error(f"Embedding request failed: {e}")
- raise HTTPException(status_code=500, detail=str(e))
+ safe_message = _safe_error_message(e, default="Internal server error")
+ logging.error("Embedding request failed: %s", safe_message)
+ await _record_usage(
+ actor=actor,
+ endpoint=endpoint,
+ model=model,
+ request_id=request_id,
+ status_code=500,
+ usage=None,
+ error=e,
+ )
+ raise HTTPException(status_code=500, detail=safe_message)
@app.get("/")
@@ -1273,7 +1647,7 @@ def read_root():
async def list_models(
request: Request,
client: RotatingClient = Depends(get_rotating_client),
- _=Depends(verify_api_key),
+ _=Depends(get_api_actor),
enriched: bool = True,
):
"""
@@ -1309,7 +1683,7 @@ async def list_models(
async def get_model(
model_id: str,
request: Request,
- _=Depends(verify_api_key),
+ _=Depends(get_api_actor),
):
"""
Returns detailed information about a specific model.
@@ -1336,7 +1710,7 @@ async def get_model(
@app.get("/v1/model-info/stats")
async def model_info_stats(
request: Request,
- _=Depends(verify_api_key),
+ _=Depends(get_api_actor),
):
"""
Returns statistics about the model info service (for monitoring/debugging).
@@ -1347,7 +1721,7 @@ async def model_info_stats(
@app.get("/v1/providers")
-async def list_providers(_=Depends(verify_api_key)):
+async def list_providers(_=Depends(get_api_actor)):
"""
Returns a list of all available providers.
"""
@@ -1358,7 +1732,7 @@ async def list_providers(_=Depends(verify_api_key)):
async def get_quota_stats(
request: Request,
client: RotatingClient = Depends(get_rotating_client),
- _=Depends(verify_api_key),
+ _=Depends(require_admin_api_actor),
provider: str = None,
):
"""
@@ -1394,15 +1768,16 @@ async def get_quota_stats(
stats = await client.get_quota_stats(provider_filter=provider)
return stats
except Exception as e:
- logging.error(f"Failed to get quota stats: {e}")
- raise HTTPException(status_code=500, detail=str(e))
+ safe_message = _safe_error_message(e, default="Internal server error")
+ logging.error("Failed to get quota stats: %s", safe_message)
+ raise HTTPException(status_code=500, detail=safe_message)
@app.post("/v1/quota-stats")
async def refresh_quota_stats(
request: Request,
client: RotatingClient = Depends(get_rotating_client),
- _=Depends(verify_api_key),
+ _=Depends(require_admin_api_actor),
):
"""
Refresh quota and usage statistics.
@@ -1489,15 +1864,16 @@ async def refresh_quota_stats(
except HTTPException:
raise
except Exception as e:
- logging.error(f"Failed to refresh quota stats: {e}")
- raise HTTPException(status_code=500, detail=str(e))
+ safe_message = _safe_error_message(e, default="Internal server error")
+ logging.error("Failed to refresh quota stats: %s", safe_message)
+ raise HTTPException(status_code=500, detail=safe_message)
@app.post("/v1/token-count")
async def token_count(
request: Request,
client: RotatingClient = Depends(get_rotating_client),
- _=Depends(verify_api_key),
+ _=Depends(get_api_actor),
):
"""
Calculates the token count for a given list of messages and a model.
@@ -1516,12 +1892,13 @@ async def token_count(
return {"token_count": count}
except Exception as e:
- logging.error(f"Token count failed: {e}")
- raise HTTPException(status_code=500, detail=str(e))
+ safe_message = _safe_error_message(e, default="Internal server error")
+ logging.error("Token count failed: %s", safe_message)
+ raise HTTPException(status_code=500, detail=safe_message)
@app.post("/v1/cost-estimate")
-async def cost_estimate(request: Request, _=Depends(verify_api_key)):
+async def cost_estimate(request: Request, _=Depends(get_api_actor)):
"""
Estimates the cost for a request based on token counts and model pricing.
@@ -1611,8 +1988,9 @@ async def cost_estimate(request: Request, _=Depends(verify_api_key)):
except HTTPException:
raise
except Exception as e:
- logging.error(f"Cost estimate failed: {e}")
- raise HTTPException(status_code=500, detail=str(e))
+ safe_message = _safe_error_message(e, default="Internal server error")
+ logging.error("Cost estimate failed: %s", safe_message)
+ raise HTTPException(status_code=500, detail=safe_message)
if __name__ == "__main__":
diff --git a/src/proxy_app/request_logger.py b/src/proxy_app/request_logger.py
index a5e9074e..3dd7e0f9 100644
--- a/src/proxy_app/request_logger.py
+++ b/src/proxy_app/request_logger.py
@@ -1,16 +1,94 @@
# SPDX-License-Identifier: MIT
# Copyright (c) 2026 Mirrowel
-import json
-import os
from datetime import datetime
-from pathlib import Path
-import uuid
-from typing import Literal, Dict
import logging
+import re
+from typing import Any
from .provider_urls import get_provider_endpoint
+REDACTED = "[REDACTED]"
+_SENSITIVE_HEADER_NAMES = {
+ "authorization",
+ "proxy-authorization",
+ "x-api-key",
+ "api-key",
+ "cookie",
+ "set-cookie",
+}
+_SENSITIVE_KEY_FRAGMENTS = {
+ "authorization",
+ "api_key",
+ "apikey",
+ "access_token",
+ "refresh_token",
+ "id_token",
+ "session",
+ "cookie",
+ "secret",
+ "password",
+ "token",
+}
+_TOKEN_PATTERNS = [
+ re.compile(r"(?i)^bearer\s+.+$"),
+ re.compile(r"\bsk-[A-Za-z0-9_-]{16,}\b"),
+ re.compile(r"\bpk_[A-Za-z0-9_-]{16,}\b"),
+ re.compile(r"\b[A-Za-z0-9_-]{20,}\.[A-Za-z0-9_-]{20,}\.[A-Za-z0-9_-]{20,}\b"),
+]
+
+
+def _looks_like_token(value: str) -> bool:
+ for pattern in _TOKEN_PATTERNS:
+ if pattern.search(value):
+ return True
+ return False
+
+
+def _is_sensitive_key(key: str | None) -> bool:
+ if not key:
+ return False
+ normalized = key.lower().replace("-", "_")
+ return any(fragment in normalized for fragment in _SENSITIVE_KEY_FRAGMENTS)
+
+
+def redact_sensitive_value(value: Any, *, key_hint: str | None = None) -> Any:
+ if value is None:
+ return None
+ if isinstance(value, str):
+ if _is_sensitive_key(key_hint) or _looks_like_token(value):
+ return REDACTED
+ return value
+ if isinstance(value, bytes):
+ return REDACTED
+ return value
+
+
+def redact_sensitive_headers(headers: dict[str, Any] | None) -> dict[str, Any] | None:
+ if headers is None:
+ return None
+ redacted: dict[str, Any] = {}
+ for key, value in headers.items():
+ if key.lower() in _SENSITIVE_HEADER_NAMES:
+ redacted[key] = REDACTED
+ else:
+ redacted[key] = redact_sensitive_value(value, key_hint=key)
+ return redacted
+
+
+def redact_sensitive_data(value: Any, *, key_hint: str | None = None) -> Any:
+ if isinstance(value, dict):
+ redacted_dict = {}
+ for key, nested_value in value.items():
+ if _is_sensitive_key(key):
+ redacted_dict[key] = REDACTED
+ else:
+ redacted_dict[key] = redact_sensitive_data(nested_value, key_hint=key)
+ return redacted_dict
+ if isinstance(value, list):
+ return [redact_sensitive_data(item, key_hint=key_hint) for item in value]
+ return redact_sensitive_value(value, key_hint=key_hint)
+
def log_request_to_console(url: str, headers: dict, client_info: tuple, request_data: dict):
"""
Logs a concise, single-line summary of an incoming request to the console.
@@ -31,4 +109,3 @@ def log_request_to_console(url: str, headers: dict, client_info: tuple, request_
log_message = f"{time_str} - {client_info[0]}:{client_info[1]} - provider: {provider}, model: {model_name} - {endpoint_url}"
logging.info(log_message)
-
diff --git a/src/proxy_app/routers/__init__.py b/src/proxy_app/routers/__init__.py
new file mode 100644
index 00000000..22cd7641
--- /dev/null
+++ b/src/proxy_app/routers/__init__.py
@@ -0,0 +1,6 @@
+from proxy_app.routers.auth_api import router as auth_router
+from proxy_app.routers.admin_api import router as admin_router
+from proxy_app.routers.user_api import router as user_router
+from proxy_app.routers.ui import router as ui_router
+
+__all__ = ["auth_router", "user_router", "admin_router", "ui_router"]
diff --git a/src/proxy_app/routers/admin_api.py b/src/proxy_app/routers/admin_api.py
new file mode 100644
index 00000000..35188e02
--- /dev/null
+++ b/src/proxy_app/routers/admin_api.py
@@ -0,0 +1,245 @@
+import secrets
+from datetime import datetime
+from typing import Literal
+
+from fastapi import APIRouter, Depends, HTTPException, Query, status
+from pydantic import BaseModel
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from proxy_app.auth import SessionUser, get_db_session, require_admin
+from proxy_app.db import hash_password
+from proxy_app.db_models import User
+from proxy_app.usage_queries import (
+ fetch_usage_by_day,
+ fetch_usage_by_model,
+ fetch_usage_summary,
+)
+
+router = APIRouter(prefix="/api/admin", tags=["admin"])
+
+
+class AdminUserItem(BaseModel):
+ id: int
+ username: str
+ role: str
+ is_active: bool
+ created_at: datetime
+ last_login_at: datetime | None
+
+
+class AdminUserListResponse(BaseModel):
+ users: list[AdminUserItem]
+
+
+class CreateAdminUserRequest(BaseModel):
+ username: str
+ password: str
+ role: Literal["admin", "user"] = "user"
+ is_active: bool = True
+
+
+class ResetPasswordRequest(BaseModel):
+ password: str | None = None
+
+
+class ResetPasswordResponse(BaseModel):
+ id: int
+ username: str
+ password: str
+
+
+class UsageTotals(BaseModel):
+ request_count: int
+ prompt_tokens: int
+ completion_tokens: int
+ total_tokens: int
+ cost_usd: float | None
+
+
+class UsageByDayItem(UsageTotals):
+ day: str
+
+
+class UsageByDayResponse(BaseModel):
+ days: int
+ rows: list[UsageByDayItem]
+
+
+class UsageByModelItem(UsageTotals):
+ model: str | None
+
+
+class UsageByModelResponse(BaseModel):
+ days: int
+ rows: list[UsageByModelItem]
+
+
+def _serialize_user(user: User) -> AdminUserItem:
+ return AdminUserItem(
+ id=user.id,
+ username=user.username,
+ role=user.role,
+ is_active=user.is_active,
+ created_at=user.created_at,
+ last_login_at=user.last_login_at,
+ )
+
+
+async def _require_target_user(session: AsyncSession, user_id: int) -> User:
+ user = await session.get(User, user_id)
+ if not user:
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail="User not found",
+ )
+ return user
+
+
+@router.get("/users", response_model=AdminUserListResponse)
+async def admin_list_users(
+ _: SessionUser = Depends(require_admin),
+ session: AsyncSession = Depends(get_db_session),
+) -> AdminUserListResponse:
+ rows = await session.scalars(select(User).order_by(User.created_at.asc()))
+ return AdminUserListResponse(users=[_serialize_user(row) for row in rows])
+
+
+@router.post("/users", response_model=AdminUserItem)
+async def admin_create_user(
+ payload: CreateAdminUserRequest,
+ _: SessionUser = Depends(require_admin),
+ session: AsyncSession = Depends(get_db_session),
+) -> AdminUserItem:
+ username = payload.username.strip()
+ if not username:
+ raise HTTPException(
+ status_code=status.HTTP_400_BAD_REQUEST,
+ detail="Username cannot be empty",
+ )
+
+ if not payload.password:
+ raise HTTPException(
+ status_code=status.HTTP_400_BAD_REQUEST,
+ detail="Password cannot be empty",
+ )
+
+ existing = await session.scalar(select(User).where(User.username == username))
+ if existing:
+ raise HTTPException(
+ status_code=status.HTTP_409_CONFLICT,
+ detail="Username already exists",
+ )
+
+ user = User(
+ username=username,
+ password_hash=hash_password(payload.password),
+ role=payload.role,
+ is_active=payload.is_active,
+ )
+ session.add(user)
+ await session.commit()
+ await session.refresh(user)
+ return _serialize_user(user)
+
+
+@router.post("/users/{id}/disable")
+async def admin_disable_user(
+ id: int,
+ current_admin: SessionUser = Depends(require_admin),
+ session: AsyncSession = Depends(get_db_session),
+) -> dict[str, bool]:
+ if current_admin.id == id:
+ raise HTTPException(
+ status_code=status.HTTP_400_BAD_REQUEST,
+ detail="Cannot disable your own account",
+ )
+
+ user = await session.get(User, id)
+ if not user:
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail="User not found",
+ )
+
+ if user.is_active:
+ user.is_active = False
+ await session.commit()
+ return {"ok": True}
+
+
+@router.post("/users/{id}/enable")
+async def admin_enable_user(
+ id: int,
+ _: SessionUser = Depends(require_admin),
+ session: AsyncSession = Depends(get_db_session),
+) -> dict[str, bool]:
+ user = await session.get(User, id)
+ if not user:
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail="User not found",
+ )
+
+ if not user.is_active:
+ user.is_active = True
+ await session.commit()
+ return {"ok": True}
+
+
+@router.post("/users/{id}/reset-password", response_model=ResetPasswordResponse)
+async def admin_reset_password(
+ id: int,
+ payload: ResetPasswordRequest,
+ _: SessionUser = Depends(require_admin),
+ session: AsyncSession = Depends(get_db_session),
+) -> ResetPasswordResponse:
+ user = await session.get(User, id)
+ if not user:
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail="User not found",
+ )
+
+ new_password = payload.password or secrets.token_urlsafe(12)
+ user.password_hash = hash_password(new_password)
+ await session.commit()
+
+ return ResetPasswordResponse(id=user.id, username=user.username, password=new_password)
+
+
+@router.get("/users/{id}/usage/summary", response_model=UsageTotals)
+async def admin_user_usage_summary(
+ id: int,
+ _: SessionUser = Depends(require_admin),
+ session: AsyncSession = Depends(get_db_session),
+) -> UsageTotals:
+ await _require_target_user(session, id)
+ return UsageTotals(**(await fetch_usage_summary(session, user_id=id)))
+
+
+@router.get("/users/{id}/usage/by-day", response_model=UsageByDayResponse)
+async def admin_user_usage_by_day(
+ id: int,
+ days: int = Query(default=30, ge=1, le=365),
+ _: SessionUser = Depends(require_admin),
+ session: AsyncSession = Depends(get_db_session),
+) -> UsageByDayResponse:
+ await _require_target_user(session, id)
+ rows = await fetch_usage_by_day(session, user_id=id, days=days)
+ return UsageByDayResponse(days=days, rows=[UsageByDayItem(**row) for row in rows])
+
+
+@router.get("/users/{id}/usage/by-model", response_model=UsageByModelResponse)
+async def admin_user_usage_by_model(
+ id: int,
+ days: int = Query(default=30, ge=1, le=365),
+ _: SessionUser = Depends(require_admin),
+ session: AsyncSession = Depends(get_db_session),
+) -> UsageByModelResponse:
+ await _require_target_user(session, id)
+ rows = await fetch_usage_by_model(session, user_id=id, days=days)
+ return UsageByModelResponse(
+ days=days,
+ rows=[UsageByModelItem(**row) for row in rows],
+ )
diff --git a/src/proxy_app/routers/auth_api.py b/src/proxy_app/routers/auth_api.py
new file mode 100644
index 00000000..2c9a088c
--- /dev/null
+++ b/src/proxy_app/routers/auth_api.py
@@ -0,0 +1,73 @@
+from datetime import datetime
+
+from fastapi import APIRouter, Depends, HTTPException, Response, status
+from pydantic import BaseModel
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from proxy_app.auth import (
+ SessionUser,
+ clear_session_cookie,
+ create_session_token,
+ get_db_session,
+ require_user,
+ set_session_cookie,
+ verify_password,
+)
+from proxy_app.db_models import User
+
+router = APIRouter(prefix="/auth", tags=["auth"])
+
+
+class LoginRequest(BaseModel):
+ username: str
+ password: str
+
+
+class IdentityResponse(BaseModel):
+ id: int
+ username: str
+ role: str
+
+
+@router.post("/login", response_model=IdentityResponse)
+async def login(
+ payload: LoginRequest,
+ response: Response,
+ session: AsyncSession = Depends(get_db_session),
+) -> IdentityResponse:
+ username = payload.username.strip()
+ user = await session.scalar(select(User).where(User.username == username))
+ if not user or not verify_password(payload.password, user.password_hash):
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Invalid username or password",
+ )
+ if not user.is_active:
+ raise HTTPException(
+ status_code=status.HTTP_403_FORBIDDEN,
+ detail="User is inactive",
+ )
+
+ user.last_login_at = datetime.utcnow()
+ await session.commit()
+
+ token = create_session_token(user_id=user.id, username=user.username, role=user.role)
+ set_session_cookie(response, token)
+
+ return IdentityResponse(id=user.id, username=user.username, role=user.role)
+
+
+@router.post("/logout")
+async def logout(response: Response) -> dict[str, bool]:
+ clear_session_cookie(response)
+ return {"ok": True}
+
+
+@router.get("/me", response_model=IdentityResponse)
+async def me(current_user: SessionUser = Depends(require_user)) -> IdentityResponse:
+ return IdentityResponse(
+ id=current_user.id,
+ username=current_user.username,
+ role=current_user.role,
+ )
diff --git a/src/proxy_app/routers/ui.py b/src/proxy_app/routers/ui.py
new file mode 100644
index 00000000..c4b50557
--- /dev/null
+++ b/src/proxy_app/routers/ui.py
@@ -0,0 +1,429 @@
+import secrets
+from datetime import datetime
+from pathlib import Path
+
+from fastapi import APIRouter, Depends, Form, HTTPException, Query, Request, status
+from fastapi.responses import HTMLResponse, RedirectResponse
+from fastapi.templating import Jinja2Templates
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from proxy_app.api_token_auth import hash_api_token
+from proxy_app.auth import (
+ CSRF_FORM_FIELD,
+ SESSION_COOKIE_NAME,
+ SessionUser,
+ attach_csrf_cookie,
+ clear_csrf_cookie,
+ clear_session_cookie,
+ create_session_token,
+ decode_session_token,
+ get_db_session,
+ get_csrf_token_for_request,
+ require_csrf,
+ require_admin,
+ require_user,
+ set_session_cookie,
+ verify_password,
+)
+from proxy_app.db import hash_password
+from proxy_app.db_models import ApiKey, User
+from proxy_app.usage_queries import (
+ fetch_usage_by_day,
+ fetch_usage_by_model,
+ fetch_usage_summary,
+)
+
+router = APIRouter(prefix="/ui", tags=["ui"])
+templates = Jinja2Templates(directory=str(Path(__file__).resolve().parent.parent / "templates"))
+
+
+def _redirect(path: str) -> RedirectResponse:
+ return RedirectResponse(url=path, status_code=status.HTTP_303_SEE_OTHER)
+
+
+def _template_response(
+ *,
+ request: Request,
+ name: str,
+ context: dict,
+ status_code: int = status.HTTP_200_OK,
+) -> HTMLResponse:
+ merged_context = dict(context)
+ merged_context["request"] = request
+ csrf_token = get_csrf_token_for_request(request)
+ merged_context[CSRF_FORM_FIELD] = csrf_token
+ response = templates.TemplateResponse(
+ request=request,
+ name=name,
+ context=merged_context,
+ status_code=status_code,
+ )
+ attach_csrf_cookie(request, response, token=csrf_token)
+ return response
+
+
+def _generate_api_token() -> str:
+ return f"pk_{secrets.token_urlsafe(32)}"
+
+
+async def _optional_session_user(
+ request: Request,
+ session: AsyncSession,
+) -> SessionUser | None:
+ token = request.cookies.get(SESSION_COOKIE_NAME)
+ if not token:
+ return None
+
+ payload = decode_session_token(token)
+ if not payload:
+ return None
+
+ user = await session.get(User, int(payload.get("uid", 0)))
+ if not user or not user.is_active:
+ return None
+
+ return SessionUser(id=user.id, username=user.username, role=user.role)
+
+
+async def _load_me_context(
+ session: AsyncSession,
+ user_id: int,
+ *,
+ days: int,
+) -> dict:
+ rows = await session.scalars(
+ select(ApiKey).where(ApiKey.user_id == user_id).order_by(ApiKey.created_at.desc())
+ )
+ api_keys = list(rows)
+ usage_summary = await fetch_usage_summary(session, user_id=user_id)
+ usage_by_day = await fetch_usage_by_day(session, user_id=user_id, days=days)
+ return {
+ "api_keys": api_keys,
+ "usage_summary": usage_summary,
+ "usage_by_day": usage_by_day,
+ "days": days,
+ }
+
+
+async def _load_admin_user_detail_context(
+ session: AsyncSession,
+ user_id: int,
+ *,
+ days: int,
+) -> dict:
+ user = await session.get(User, user_id)
+ if not user:
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="User not found")
+
+ usage_summary = await fetch_usage_summary(session, user_id=user_id)
+ usage_by_day = await fetch_usage_by_day(session, user_id=user_id, days=days)
+ usage_by_model = await fetch_usage_by_model(session, user_id=user_id, days=days)
+ return {
+ "target_user": user,
+ "usage_summary": usage_summary,
+ "usage_by_day": usage_by_day,
+ "usage_by_model": usage_by_model,
+ "days": days,
+ }
+
+
+@router.get("/login", response_class=HTMLResponse)
+async def ui_login_page(
+ request: Request,
+ session: AsyncSession = Depends(get_db_session),
+) -> HTMLResponse:
+ session_user = await _optional_session_user(request, session)
+ if session_user:
+ return _redirect("/ui/me")
+
+ return _template_response(request=request, name="login.html", context={"error": None})
+
+
+@router.post("/login", response_class=HTMLResponse)
+async def ui_login_submit(
+ request: Request,
+ _: None = Depends(require_csrf),
+ username: str = Form(default=""),
+ password: str = Form(default=""),
+ session: AsyncSession = Depends(get_db_session),
+) -> HTMLResponse:
+ username = username.strip()
+ user = await session.scalar(select(User).where(User.username == username))
+ if not user or not verify_password(password, user.password_hash):
+ return _template_response(
+ request=request,
+ name="login.html",
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ context={"error": "Invalid username or password"},
+ )
+
+ if not user.is_active:
+ return _template_response(
+ request=request,
+ name="login.html",
+ status_code=status.HTTP_403_FORBIDDEN,
+ context={"error": "User is inactive"},
+ )
+
+ user.last_login_at = datetime.utcnow()
+ await session.commit()
+
+ response = _redirect("/ui/me")
+ token = create_session_token(user_id=user.id, username=user.username, role=user.role)
+ set_session_cookie(response, token)
+ return response
+
+
+@router.post("/logout")
+async def ui_logout(_: None = Depends(require_csrf)) -> RedirectResponse:
+ response = _redirect("/ui/login")
+ clear_session_cookie(response)
+ clear_csrf_cookie(response)
+ return response
+
+
+@router.get("/me", response_class=HTMLResponse)
+async def ui_me_page(
+ request: Request,
+ days: int = Query(default=30, ge=1, le=365),
+ current_user: SessionUser = Depends(require_user),
+ session: AsyncSession = Depends(get_db_session),
+) -> HTMLResponse:
+ context = await _load_me_context(session, current_user.id, days=days)
+ context.update(
+ {
+ "current_user": current_user,
+ "new_api_key_token": None,
+ "error": None,
+ }
+ )
+ return _template_response(request=request, name="me.html", context=context)
+
+
+@router.post("/me/api-keys", response_class=HTMLResponse)
+async def ui_create_api_key(
+ request: Request,
+ _: None = Depends(require_csrf),
+ name: str = Form(default=""),
+ days: int = Query(default=30, ge=1, le=365),
+ current_user: SessionUser = Depends(require_user),
+ session: AsyncSession = Depends(get_db_session),
+) -> HTMLResponse:
+ key_name = name.strip()
+ if not key_name:
+ context = await _load_me_context(session, current_user.id, days=days)
+ context.update(
+ {
+ "current_user": current_user,
+ "new_api_key_token": None,
+ "error": "API key name cannot be empty",
+ }
+ )
+ return _template_response(
+ request=request,
+ name="me.html",
+ context=context,
+ status_code=status.HTTP_400_BAD_REQUEST,
+ )
+
+ token = _generate_api_token()
+ key = ApiKey(
+ user_id=current_user.id,
+ name=key_name,
+ token_prefix=token[:20],
+ token_hash=hash_api_token(token),
+ )
+ session.add(key)
+ await session.commit()
+
+ context = await _load_me_context(session, current_user.id, days=days)
+ context.update(
+ {
+ "current_user": current_user,
+ "new_api_key_token": token,
+ "error": None,
+ }
+ )
+ return _template_response(request=request, name="me.html", context=context)
+
+
+@router.post("/me/api-keys/{id}/revoke")
+async def ui_revoke_api_key(
+ request: Request,
+ id: int,
+ _: None = Depends(require_csrf),
+ current_user: SessionUser = Depends(require_user),
+ session: AsyncSession = Depends(get_db_session),
+) -> RedirectResponse:
+ key = await session.scalar(
+ select(ApiKey).where(ApiKey.id == id, ApiKey.user_id == current_user.id)
+ )
+ if not key:
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="API key not found")
+
+ if not key.revoked_at:
+ key.revoked_at = datetime.utcnow()
+ await session.commit()
+
+ return _redirect("/ui/me")
+
+
+@router.get("/admin", response_class=HTMLResponse)
+async def ui_admin_page(
+ request: Request,
+ current_admin: SessionUser = Depends(require_admin),
+ session: AsyncSession = Depends(get_db_session),
+) -> HTMLResponse:
+ rows = await session.scalars(select(User).order_by(User.created_at.asc()))
+ return _template_response(
+ request=request,
+ name="admin.html",
+ context={
+ "current_admin": current_admin,
+ "users": list(rows),
+ "error": None,
+ },
+ )
+
+
+@router.post("/admin/users", response_class=HTMLResponse)
+async def ui_admin_create_user(
+ request: Request,
+ _: None = Depends(require_csrf),
+ username: str = Form(default=""),
+ password: str = Form(default=""),
+ role: str = Form(default="user"),
+ is_active: bool = Form(default=False),
+ current_admin: SessionUser = Depends(require_admin),
+ session: AsyncSession = Depends(get_db_session),
+) -> HTMLResponse:
+ username = username.strip()
+ error = None
+
+ if not username:
+ error = "Username cannot be empty"
+ elif not password:
+ error = "Password cannot be empty"
+ elif role not in {"admin", "user"}:
+ error = "Role must be admin or user"
+
+ if not error:
+ existing = await session.scalar(select(User).where(User.username == username))
+ if existing:
+ error = "Username already exists"
+
+ if error:
+ rows = await session.scalars(select(User).order_by(User.created_at.asc()))
+ return _template_response(
+ request=request,
+ name="admin.html",
+ status_code=status.HTTP_400_BAD_REQUEST,
+ context={
+ "current_admin": current_admin,
+ "users": list(rows),
+ "error": error,
+ },
+ )
+
+ user = User(
+ username=username,
+ password_hash=hash_password(password),
+ role=role,
+ is_active=is_active,
+ )
+ session.add(user)
+ await session.commit()
+ return _redirect("/ui/admin")
+
+
+@router.get("/admin/users/{id}", response_class=HTMLResponse)
+async def ui_admin_user_detail(
+ id: int,
+ request: Request,
+ days: int = Query(default=30, ge=1, le=365),
+ current_admin: SessionUser = Depends(require_admin),
+ session: AsyncSession = Depends(get_db_session),
+) -> HTMLResponse:
+ context = await _load_admin_user_detail_context(session, id, days=days)
+ context.update(
+ {
+ "current_admin": current_admin,
+ "error": None,
+ "new_password": None,
+ }
+ )
+ return _template_response(request=request, name="admin_user_detail.html", context=context)
+
+
+@router.post("/admin/users/{id}/disable")
+async def ui_admin_disable_user(
+ request: Request,
+ id: int,
+ _: None = Depends(require_csrf),
+ current_admin: SessionUser = Depends(require_admin),
+ session: AsyncSession = Depends(get_db_session),
+) -> RedirectResponse:
+ if current_admin.id == id:
+ raise HTTPException(
+ status_code=status.HTTP_400_BAD_REQUEST,
+ detail="Cannot disable your own account",
+ )
+
+ user = await session.get(User, id)
+ if not user:
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="User not found")
+
+ if user.is_active:
+ user.is_active = False
+ await session.commit()
+
+ return _redirect(f"/ui/admin/users/{id}")
+
+
+@router.post("/admin/users/{id}/enable")
+async def ui_admin_enable_user(
+ request: Request,
+ id: int,
+ csrf_ok: None = Depends(require_csrf),
+ session: AsyncSession = Depends(get_db_session),
+ _: SessionUser = Depends(require_admin),
+) -> RedirectResponse:
+ user = await session.get(User, id)
+ if not user:
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="User not found")
+
+ if not user.is_active:
+ user.is_active = True
+ await session.commit()
+
+ return _redirect(f"/ui/admin/users/{id}")
+
+
+@router.post("/admin/users/{id}/reset-password", response_class=HTMLResponse)
+async def ui_admin_reset_password(
+ id: int,
+ request: Request,
+ _: None = Depends(require_csrf),
+ password: str = Form(default=""),
+ days: int = Query(default=30, ge=1, le=365),
+ current_admin: SessionUser = Depends(require_admin),
+ session: AsyncSession = Depends(get_db_session),
+) -> HTMLResponse:
+ user = await session.get(User, id)
+ if not user:
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="User not found")
+
+ new_password = password or secrets.token_urlsafe(12)
+ user.password_hash = hash_password(new_password)
+ await session.commit()
+
+ context = await _load_admin_user_detail_context(session, id, days=days)
+ context.update(
+ {
+ "current_admin": current_admin,
+ "error": None,
+ "new_password": new_password,
+ }
+ )
+ return _template_response(request=request, name="admin_user_detail.html", context=context)
diff --git a/src/proxy_app/routers/user_api.py b/src/proxy_app/routers/user_api.py
new file mode 100644
index 00000000..a435a891
--- /dev/null
+++ b/src/proxy_app/routers/user_api.py
@@ -0,0 +1,212 @@
+import secrets
+from datetime import datetime
+
+from fastapi import APIRouter, Depends, HTTPException, Query, status
+from pydantic import BaseModel
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from proxy_app.auth import SessionUser, get_db_session, require_user
+from proxy_app.api_token_auth import hash_api_token
+from proxy_app.db_models import ApiKey, User
+from proxy_app.usage_queries import (
+ fetch_usage_by_day,
+ fetch_usage_by_model,
+ fetch_usage_summary,
+)
+
+router = APIRouter(prefix="/api", tags=["user"])
+
+def generate_api_token() -> str:
+ return f"pk_{secrets.token_urlsafe(32)}"
+
+
+class MeResponse(BaseModel):
+ id: int
+ username: str
+ role: str
+ is_active: bool
+
+
+class ApiKeyItem(BaseModel):
+ id: int
+ name: str
+ token_prefix: str
+ created_at: datetime
+ last_used_at: datetime | None
+ revoked_at: datetime | None
+ expires_at: datetime | None
+
+
+class ApiKeyListResponse(BaseModel):
+ api_keys: list[ApiKeyItem]
+
+
+class CreateApiKeyRequest(BaseModel):
+ name: str
+
+
+class CreateApiKeyResponse(BaseModel):
+ id: int
+ name: str
+ token: str
+ token_prefix: str
+ created_at: datetime
+
+
+class UsageTotals(BaseModel):
+ request_count: int
+ prompt_tokens: int
+ completion_tokens: int
+ total_tokens: int
+ cost_usd: float | None
+
+
+class UsageByDayItem(UsageTotals):
+ day: str
+
+
+class UsageByDayResponse(BaseModel):
+ days: int
+ rows: list[UsageByDayItem]
+
+
+class UsageByModelItem(UsageTotals):
+ model: str | None
+
+
+class UsageByModelResponse(BaseModel):
+ days: int
+ rows: list[UsageByModelItem]
+
+
+@router.get("/me", response_model=MeResponse)
+async def get_me(
+ current_user: SessionUser = Depends(require_user),
+ session: AsyncSession = Depends(get_db_session),
+) -> MeResponse:
+ user = await session.get(User, current_user.id)
+ if not user:
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Invalid session",
+ )
+
+ return MeResponse(
+ id=user.id,
+ username=user.username,
+ role=user.role,
+ is_active=user.is_active,
+ )
+
+
+@router.get("/me/api-keys", response_model=ApiKeyListResponse)
+async def list_my_api_keys(
+ current_user: SessionUser = Depends(require_user),
+ session: AsyncSession = Depends(get_db_session),
+) -> ApiKeyListResponse:
+ rows = await session.scalars(
+ select(ApiKey)
+ .where(ApiKey.user_id == current_user.id)
+ .order_by(ApiKey.created_at.desc())
+ )
+ return ApiKeyListResponse(
+ api_keys=[
+ ApiKeyItem(
+ id=row.id,
+ name=row.name,
+ token_prefix=row.token_prefix,
+ created_at=row.created_at,
+ last_used_at=row.last_used_at,
+ revoked_at=row.revoked_at,
+ expires_at=row.expires_at,
+ )
+ for row in rows
+ ]
+ )
+
+
+@router.post("/me/api-keys", response_model=CreateApiKeyResponse)
+async def create_my_api_key(
+ payload: CreateApiKeyRequest,
+ current_user: SessionUser = Depends(require_user),
+ session: AsyncSession = Depends(get_db_session),
+) -> CreateApiKeyResponse:
+ name = payload.name.strip()
+ if not name:
+ raise HTTPException(
+ status_code=status.HTTP_400_BAD_REQUEST,
+ detail="API key name cannot be empty",
+ )
+
+ token = generate_api_token()
+ key = ApiKey(
+ user_id=current_user.id,
+ name=name,
+ token_prefix=token[:20],
+ token_hash=hash_api_token(token),
+ )
+ session.add(key)
+ await session.commit()
+ await session.refresh(key)
+
+ return CreateApiKeyResponse(
+ id=key.id,
+ name=key.name,
+ token=token,
+ token_prefix=key.token_prefix,
+ created_at=key.created_at,
+ )
+
+
+@router.delete("/me/api-keys/{id}")
+async def revoke_my_api_key(
+ id: int,
+ current_user: SessionUser = Depends(require_user),
+ session: AsyncSession = Depends(get_db_session),
+) -> dict[str, bool]:
+ key = await session.scalar(
+ select(ApiKey).where(ApiKey.id == id, ApiKey.user_id == current_user.id)
+ )
+ if not key:
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail="API key not found",
+ )
+
+ if not key.revoked_at:
+ key.revoked_at = datetime.utcnow()
+ await session.commit()
+
+ return {"ok": True}
+
+
+@router.get("/me/usage/summary", response_model=UsageTotals)
+async def get_my_usage_summary(
+ current_user: SessionUser = Depends(require_user),
+ session: AsyncSession = Depends(get_db_session),
+) -> UsageTotals:
+ return UsageTotals(**(await fetch_usage_summary(session, user_id=current_user.id)))
+
+
+@router.get("/me/usage/by-day", response_model=UsageByDayResponse)
+async def get_my_usage_by_day(
+ days: int = Query(default=30, ge=1, le=365),
+ current_user: SessionUser = Depends(require_user),
+ session: AsyncSession = Depends(get_db_session),
+) -> UsageByDayResponse:
+ rows = await fetch_usage_by_day(session, user_id=current_user.id, days=days)
+ return UsageByDayResponse(days=days, rows=[UsageByDayItem(**row) for row in rows])
+
+
+@router.get("/me/usage/by-model", response_model=UsageByModelResponse)
+async def get_my_usage_by_model(
+ days: int = Query(default=30, ge=1, le=365),
+ current_user: SessionUser = Depends(require_user),
+ session: AsyncSession = Depends(get_db_session),
+) -> UsageByModelResponse:
+ rows = await fetch_usage_by_model(session, user_id=current_user.id, days=days)
+ return UsageByModelResponse(
+ days=days,
+ rows=[UsageByModelItem(**row) for row in rows],
+ )
diff --git a/src/proxy_app/security_config.py b/src/proxy_app/security_config.py
new file mode 100644
index 00000000..fa53d875
--- /dev/null
+++ b/src/proxy_app/security_config.py
@@ -0,0 +1,113 @@
+import logging
+import os
+from dataclasses import dataclass
+
+
+DEFAULT_SESSION_SECRET = "change-me-session-secret"
+DEFAULT_API_TOKEN_PEPPER = "change-me-token-pepper"
+
+
+class SecurityValidationError(RuntimeError):
+ pass
+
+
+def parse_bool_env(name: str, default: bool) -> bool:
+ raw = os.getenv(name)
+ if raw is None:
+ return default
+ return raw.strip().lower() in {"1", "true", "yes", "on"}
+
+
+def get_app_env() -> str:
+ value = (os.getenv("APP_ENV") or "dev").strip().lower()
+ if value in {"prod", "production"}:
+ return "prod"
+ return "dev"
+
+
+def is_prod() -> bool:
+ return get_app_env() == "prod"
+
+
+def allow_insecure_defaults() -> bool:
+ default = not is_prod()
+ return parse_bool_env("ALLOW_INSECURE_DEFAULTS", default)
+
+
+def get_session_secret() -> str:
+ return (os.getenv("SESSION_SECRET") or "").strip() or DEFAULT_SESSION_SECRET
+
+
+def get_api_token_pepper() -> str:
+ return (
+ (os.getenv("API_TOKEN_PEPPER") or "").strip() or DEFAULT_API_TOKEN_PEPPER
+ )
+
+
+def validate_secret_settings() -> None:
+ if allow_insecure_defaults():
+ if get_session_secret() == DEFAULT_SESSION_SECRET:
+ logging.warning(
+ "SECURITY WARNING: SESSION_SECRET is using default value. "
+ "Set SESSION_SECRET for non-local usage."
+ )
+ if get_api_token_pepper() == DEFAULT_API_TOKEN_PEPPER:
+ logging.warning(
+ "SECURITY WARNING: API_TOKEN_PEPPER is using default value. "
+ "Set API_TOKEN_PEPPER for non-local usage."
+ )
+ return
+
+ invalid_reasons: list[str] = []
+ if get_session_secret() == DEFAULT_SESSION_SECRET:
+ invalid_reasons.append("SESSION_SECRET is missing or default")
+ if get_api_token_pepper() == DEFAULT_API_TOKEN_PEPPER:
+ invalid_reasons.append("API_TOKEN_PEPPER is missing or default")
+
+ if invalid_reasons:
+ raise SecurityValidationError(
+ "Refusing startup due to insecure defaults: "
+ + "; ".join(invalid_reasons)
+ + ". Set secure secrets or ALLOW_INSECURE_DEFAULTS=true explicitly."
+ )
+
+
+@dataclass(frozen=True)
+class CORSSettings:
+ allow_origins: list[str]
+ allow_credentials: bool
+ allow_methods: list[str]
+ allow_headers: list[str]
+
+
+def _split_csv_env(name: str, default: str) -> list[str]:
+ raw = os.getenv(name, default)
+ values = [value.strip() for value in raw.split(",")]
+ return [value for value in values if value]
+
+
+def get_cors_settings() -> CORSSettings:
+ origins = _split_csv_env("CORS_ALLOW_ORIGINS", "")
+ credentials_default = bool(origins)
+ allow_credentials = parse_bool_env("CORS_ALLOW_CREDENTIALS", credentials_default)
+
+ if not origins:
+ allow_credentials = False
+
+ if allow_credentials and "*" in origins:
+ raise SecurityValidationError(
+ "Invalid CORS config: CORS_ALLOW_ORIGINS cannot include '*' when "
+ "CORS_ALLOW_CREDENTIALS=true."
+ )
+
+ return CORSSettings(
+ allow_origins=origins,
+ allow_credentials=allow_credentials,
+ allow_methods=_split_csv_env(
+ "CORS_ALLOW_METHODS", "GET,POST,PUT,PATCH,DELETE,OPTIONS"
+ ),
+ allow_headers=_split_csv_env(
+ "CORS_ALLOW_HEADERS",
+ "Authorization,Content-Type,X-API-Key,X-Requested-With,X-CSRF-Token",
+ ),
+ )
diff --git a/src/proxy_app/static/ui.css b/src/proxy_app/static/ui.css
new file mode 100644
index 00000000..a461afa3
--- /dev/null
+++ b/src/proxy_app/static/ui.css
@@ -0,0 +1,156 @@
+* {
+ box-sizing: border-box;
+}
+
+body {
+ margin: 0;
+ font-family: "Segoe UI", Tahoma, sans-serif;
+ background: #f5f6f8;
+ color: #1f2937;
+}
+
+.wrap {
+ max-width: 1040px;
+ margin: 0 auto;
+ padding: 0 16px;
+}
+
+.topbar {
+ background: #111827;
+ color: #fff;
+}
+
+.topbar-inner {
+ min-height: 56px;
+ display: flex;
+ align-items: center;
+ justify-content: space-between;
+}
+
+.brand {
+ color: #fff;
+ text-decoration: none;
+ font-weight: 700;
+}
+
+.nav {
+ display: flex;
+ align-items: center;
+ gap: 12px;
+}
+
+.nav a {
+ color: #e5e7eb;
+ text-decoration: none;
+}
+
+.main-content {
+ padding-top: 20px;
+ padding-bottom: 20px;
+}
+
+.panel {
+ background: #fff;
+ border: 1px solid #e5e7eb;
+ border-radius: 10px;
+ padding: 16px;
+ margin-bottom: 16px;
+}
+
+.narrow {
+ max-width: 420px;
+}
+
+.stacked-form {
+ display: flex;
+ flex-direction: column;
+ gap: 8px;
+}
+
+.inline-form {
+ display: flex;
+ gap: 8px;
+ align-items: center;
+}
+
+.checkbox-row {
+ display: flex;
+ align-items: center;
+ gap: 8px;
+}
+
+input,
+select,
+button {
+ border: 1px solid #d1d5db;
+ border-radius: 6px;
+ padding: 8px 10px;
+ font: inherit;
+}
+
+button {
+ background: #0f766e;
+ color: #fff;
+ border-color: #0f766e;
+ cursor: pointer;
+}
+
+table {
+ width: 100%;
+ border-collapse: collapse;
+}
+
+th,
+td {
+ border-bottom: 1px solid #e5e7eb;
+ text-align: left;
+ padding: 8px;
+ vertical-align: top;
+}
+
+.muted {
+ color: #6b7280;
+}
+
+.error {
+ color: #b91c1c;
+ font-weight: 600;
+}
+
+.warn {
+ color: #92400e;
+ font-weight: 600;
+}
+
+.token {
+ display: block;
+ padding: 8px;
+ border: 1px solid #d1d5db;
+ border-radius: 6px;
+ background: #f9fafb;
+ overflow-wrap: anywhere;
+}
+
+.grid-two {
+ display: grid;
+ grid-template-columns: 1fr;
+ gap: 16px;
+}
+
+.action-row {
+ display: flex;
+ flex-wrap: wrap;
+ gap: 12px;
+ align-items: center;
+}
+
+.stats {
+ margin: 0;
+ padding-left: 18px;
+}
+
+@media (min-width: 840px) {
+ .grid-two {
+ grid-template-columns: 1fr 1fr;
+ }
+}
diff --git a/src/proxy_app/templates/admin.html b/src/proxy_app/templates/admin.html
new file mode 100644
index 00000000..b9782488
--- /dev/null
+++ b/src/proxy_app/templates/admin.html
@@ -0,0 +1,73 @@
+{% extends "base.html" %}
+
+{% block title %}Admin{% endblock %}
+
+{% block content %}
+ Manage users and inspect usage. {{ error }} ID {{ target_user.id }} | Role: {{ target_user.role }} | Status: {% if target_user.is_active %}Active{% else %}Disabled{% endif %} {{ error }} Copy this now and deliver it securely.Admin Console
+ Create User
+
+Users
+
+
+
+
+
+
+
+ {% for user in users %}
+ ID
+ Username
+ Role
+ Status
+ Created
+ Last Login
+
+
+ {% else %}
+ {{ user.id }}
+ {{ user.username }}
+ {{ user.role }}
+ {% if user.is_active %}Active{% else %}Disabled{% endif %}
+ {{ user.created_at }}
+ {{ user.last_login_at if user.last_login_at else "-" }}
+
+
+ {% endfor %}
+
+ No users found.
+ User: {{ target_user.username }}
+ New Password
+ {{ new_password }}
+Usage Summary
+
+
+ Usage By Day ({{ days }}d)
+
+
+
+
+
+
+
+ {% for row in usage_by_day %}
+ Day
+ Requests
+ Total Tokens
+ Cost USD
+
+
+ {% else %}
+ {{ row.day }}
+ {{ row.request_count }}
+ {{ row.total_tokens }}
+ {{ row.cost_usd if row.cost_usd is not none else "-" }}
+
+
+ {% endfor %}
+
+ No usage yet.
+ Usage By Model ({{ days }}d)
+
+
+
+
+
+
+
+ {% for row in usage_by_model %}
+ Model
+ Requests
+ Total tokens
+ Cost USD
+
+
+ {% else %}
+ {{ row.model if row.model else "(unknown)" }}
+ {{ row.request_count }}
+ {{ row.total_tokens }}
+ {{ row.cost_usd if row.cost_usd is not none else "-" }}
+
+
+ {% endfor %}
+
+ No usage yet.
+
Use your proxy user credentials.
+ + {% if error %} +{{ error }}
+ {% endif %} + + +Signed in as {{ current_user.username }} ({{ current_user.role }})
+{{ error }}
+Copy this now. You will not be able to view it again.
+{{ new_api_key_token }}
+| ID | +Name | +Prefix | +Created | +Status | +Action | +
|---|---|---|---|---|---|
| {{ key.id }} | +{{ key.name }} | +{{ key.token_prefix }} |
+ {{ key.created_at }} | +{% if key.revoked_at %}Revoked{% else %}Active{% endif %} | ++ {% if not key.revoked_at %} + + {% else %} + - + {% endif %} + | +
| No API keys yet. | +|||||
| Day | +Requests | +Total tokens | +Cost USD | +
|---|---|---|---|
| {{ row.day }} | +{{ row.request_count }} | +{{ row.total_tokens }} | +{{ row.cost_usd if row.cost_usd is not none else "-" }} | +
| No usage yet. | +|||