diff --git a/check-rustfs.sh b/check-rustfs.sh index fe4deca..d093c1f 100755 --- a/check-rustfs.sh +++ b/check-rustfs.sh @@ -117,10 +117,11 @@ echo " Access RustFS" echo "=========================================" echo "" -# Check if Console is running locally -if pgrep -f "target/release/operator.*console" >/dev/null; then - echo "✅ Operator Console (local):" - echo " Running at: http://localhost:9090" +# Operator Console (deployed in K8s) +if kubectl get deployment rustfs-operator-console -n rustfs-system >/dev/null 2>&1; then + echo "✅ Operator Console (K8s Deployment):" + echo " Port forward: kubectl port-forward -n rustfs-system svc/rustfs-operator-console 9090:9090" + echo " Then access: http://localhost:9090" echo " Health check: curl http://localhost:9090/healthz" echo " API docs: deploy/console/README.md" echo "" @@ -128,8 +129,8 @@ if pgrep -f "target/release/operator.*console" >/dev/null; then echo " Login: POST http://localhost:9090/api/v1/login" echo "" else - echo "⚠️ Operator Console not running locally" - echo " Start with: cargo run -- console --port 9090" + echo "⚠️ Operator Console Deployment not found in rustfs-system" + echo " Deploy with: ./deploy-rustfs.sh" echo "" fi diff --git a/cleanup-rustfs.sh b/cleanup-rustfs.sh index 0755a38..ae25f3f 100755 --- a/cleanup-rustfs.sh +++ b/cleanup-rustfs.sh @@ -47,9 +47,8 @@ confirm_cleanup() { echo "" log_warning "This operation will delete all RustFS resources:" echo " - Tenant: example-tenant" - echo " - Namespace: rustfs-system (including all Pods, PVCs, Services)" + echo " - Namespace: rustfs-system (including Operator, Console, Pods, PVCs, Services)" echo " - CRD: tenants.rustfs.com" - echo " - Operator process" echo "" read -p "Confirm deletion? (yes/no): " confirm @@ -87,76 +86,6 @@ delete_tenant() { fi } -# Stop Operator -stop_operator() { - log_info "Stopping Operator process..." - - # Method 1: Read from PID file - if [ -f operator.pid ]; then - local pid=$(cat operator.pid) - if ps -p $pid > /dev/null 2>&1; then - log_info "Stopping Operator (PID: $pid)..." - kill $pid 2>/dev/null || true - sleep 2 - - # If process still exists, force kill - if ps -p $pid > /dev/null 2>&1; then - log_warning "Process did not exit normally, forcing termination..." - kill -9 $pid 2>/dev/null || true - fi - fi - rm -f operator.pid - fi - - # Method 2: Find all operator processes - local operator_pids=$(pgrep -f "target/release/operator.*server" 2>/dev/null || true) - if [ -n "$operator_pids" ]; then - log_info "Found Operator processes: $operator_pids" - pkill -f "target/release/operator.*server" || true - sleep 2 - - # Force kill remaining processes - pkill -9 -f "target/release/operator.*server" 2>/dev/null || true - fi - - log_success "Operator stopped" -} - -# Stop Console -stop_console() { - log_info "Stopping Console process..." - - # Method 1: Read from PID file - if [ -f console.pid ]; then - local pid=$(cat console.pid) - if ps -p $pid > /dev/null 2>&1; then - log_info "Stopping Console (PID: $pid)..." - kill $pid 2>/dev/null || true - sleep 2 - - # If process still exists, force kill - if ps -p $pid > /dev/null 2>&1; then - log_warning "Process did not exit normally, forcing termination..." - kill -9 $pid 2>/dev/null || true - fi - fi - rm -f console.pid - fi - - # Method 2: Find all console processes - local console_pids=$(pgrep -f "target/release/operator.*console" 2>/dev/null || true) - if [ -n "$console_pids" ]; then - log_info "Found Console processes: $console_pids" - pkill -f "target/release/operator.*console" || true - sleep 2 - - # Force kill remaining processes - pkill -9 -f "target/release/operator.*console" 2>/dev/null || true - fi - - log_success "Console stopped" -} - # Delete Namespace delete_namespace() { log_info "Deleting Namespace: rustfs-system..." @@ -223,10 +152,6 @@ cleanup_local_files() { log_info "Cleaning up local files..." local files_to_clean=( - "operator.log" - "operator.pid" - "console.log" - "console.pid" "deploy/rustfs-operator/crds/tenant-crd.yaml" ) @@ -271,21 +196,7 @@ verify_cleanup() { log_success "✓ CRD cleaned" fi - # Check Operator process - if pgrep -f "target/release/operator.*server" >/dev/null; then - log_error "Operator process still running" - issues=$((issues + 1)) - else - log_success "✓ Operator stopped" - fi - - # Check Console process - if pgrep -f "target/release/operator.*console" >/dev/null; then - log_error "Console process still running" - issues=$((issues + 1)) - else - log_success "✓ Console stopped" - fi + # Operator and Console are deleted with namespace (no local process check) echo "" if [ $issues -eq 0 ]; then @@ -331,8 +242,6 @@ main() { echo "" delete_tenant - stop_console - stop_operator delete_namespace delete_crd cleanup_local_files diff --git a/deploy-rustfs.sh b/deploy-rustfs.sh index 11df063..833b6a2 100755 --- a/deploy-rustfs.sh +++ b/deploy-rustfs.sh @@ -14,6 +14,7 @@ # limitations under the License. # RustFS Operator deployment script - uses examples/simple-tenant.yaml +# Deploys Operator and Console as Kubernetes Deployments (Pods in K8s) # For quick deployment and CRD modification verification set -e @@ -50,6 +51,7 @@ check_prerequisites() { command -v kubectl >/dev/null 2>&1 || missing_tools+=("kubectl") command -v cargo >/dev/null 2>&1 || missing_tools+=("cargo") command -v kind >/dev/null 2>&1 || missing_tools+=("kind") + command -v docker >/dev/null 2>&1 || missing_tools+=("docker") if [ ${#missing_tools[@]} -ne 0 ]; then log_error "Missing required tools: ${missing_tools[*]}" @@ -59,6 +61,39 @@ check_prerequisites() { log_success "All required tools are installed" } +# Fix "too many open files" for kind (inotify limits) +# See: https://kind.sigs.k8s.io/docs/user/known-issues/#pod-errors-due-to-too-many-open-files +fix_inotify_limits() { + log_info "Applying inotify limits (fix for 'too many open files')..." + + local sysctl_conf="/etc/sysctl.d/99-rustfs-kind.conf" + local persisted=false + + if sudo sysctl -w fs.inotify.max_user_watches=524288 >/dev/null 2>&1 \ + && sudo sysctl -w fs.inotify.max_user_instances=512 >/dev/null 2>&1; then + log_success "Inotify limits applied (current session)" + persisted=true + fi + + if sudo test -w /etc/sysctl.d 2>/dev/null; then + if ! sudo grep -qs "fs.inotify.max_user_watches" "$sysctl_conf" 2>/dev/null; then + printf 'fs.inotify.max_user_watches = 524288\nfs.inotify.max_user_instances = 512\n' \ + | sudo tee "$sysctl_conf" >/dev/null 2>&1 && \ + log_success "Inotify limits persisted to $sysctl_conf" + fi + fi + + if [ "$persisted" = true ]; then + return 0 + fi + + log_warning "Could not set inotify limits (may need root). If you see kube-proxy 'too many open files' errors:" + echo " sudo sysctl fs.inotify.max_user_watches=524288" + echo " sudo sysctl fs.inotify.max_user_instances=512" + echo " # Make persistent: add to /etc/sysctl.conf or $sysctl_conf" + return 1 +} + # Check Kubernetes cluster connection check_cluster() { log_info "Checking Kubernetes cluster connection..." @@ -67,6 +102,8 @@ check_cluster() { log_error "Unable to connect to Kubernetes cluster" log_info "Attempting to start kind cluster..." + fix_inotify_limits || true + if kind get clusters | grep -q "rustfs-dev"; then log_info "Detected kind cluster 'rustfs-dev', attempting to restart..." kind delete cluster --name rustfs-dev @@ -74,6 +111,8 @@ check_cluster() { log_info "Creating new kind cluster..." kind create cluster --name rustfs-dev + else + fix_inotify_limits || true fi log_success "Kubernetes cluster connection OK: $(kubectl config current-context)" @@ -121,52 +160,47 @@ build_operator() { log_success "Operator build completed" } -# Start operator (background) -start_operator() { - log_info "Starting operator..." +# Build Docker image and deploy Operator + Console as Kubernetes Deployments +deploy_operator_and_console() { + local kind_cluster="rustfs-dev" + local image_name="rustfs/operator:dev" + + log_info "Building Docker image..." - # Check if operator is already running - if pgrep -f "target/release/operator.*server" >/dev/null; then - log_warning "Detected existing operator process" - log_info "Stopping old operator process..." - pkill -f "target/release/operator.*server" || true - sleep 2 + if ! docker build -t "$image_name" .; then + log_error "Docker build failed" + exit 1 fi - # Start new operator process (background) - nohup cargo run --release -- server > operator.log 2>&1 & - OPERATOR_PID=$! - echo $OPERATOR_PID > operator.pid + log_info "Loading image into kind cluster '$kind_cluster'..." - log_success "Operator started (PID: $OPERATOR_PID)" - log_info "Log file: operator.log" + if ! kind load docker-image "$image_name" --name "$kind_cluster"; then + log_error "Failed to load image into kind cluster" + log_info "Verify: 1) kind cluster exists: kind get clusters" + log_info " 2) kind cluster 'rustfs-dev' exists: kind get clusters" + log_info " 3) Docker is running and accessible" + exit 1 + fi - # Wait for operator to start - sleep 3 -} + log_info "Creating Console JWT secret..." -# Start console (background) -start_console() { - log_info "Starting console..." + local jwt_secret + jwt_secret=$(openssl rand -base64 32 2>/dev/null || head -c 32 /dev/urandom | base64) - # Check if console is already running - if pgrep -f "target/release/operator.*console" >/dev/null; then - log_warning "Detected existing console process" - log_info "Stopping old console process..." - pkill -f "target/release/operator.*console" || true - sleep 2 - fi + kubectl create secret generic rustfs-operator-console-secret \ + --namespace rustfs-system \ + --from-literal=jwt-secret="$jwt_secret" \ + --dry-run=client -o yaml | kubectl apply -f - - # Start new console process (background) - nohup cargo run --release -- console --port 9090 > console.log 2>&1 & - CONSOLE_PID=$! - echo $CONSOLE_PID > console.pid + log_info "Deploying Operator and Console (Deployment)..." - log_success "Console started (PID: $CONSOLE_PID)" - log_info "Log file: console.log" + kubectl apply -f deploy/k8s-dev/operator-rbac.yaml + kubectl apply -f deploy/k8s-dev/console-rbac.yaml + kubectl apply -f deploy/k8s-dev/operator-deployment.yaml + kubectl apply -f deploy/k8s-dev/console-deployment.yaml + kubectl apply -f deploy/k8s-dev/console-service.yaml - # Wait for console to start - sleep 2 + log_success "Operator and Console deployed to Kubernetes" } # Deploy Tenant (EC 2+1 configuration) @@ -178,24 +212,25 @@ deploy_tenant() { log_success "Tenant submitted" } -# Wait for pods to be ready +# Wait for pods to be ready (1 operator + 1 console + 2 tenant = 4) wait_for_pods() { log_info "Waiting for pods to start (max 5 minutes)..." local timeout=300 local elapsed=0 local interval=5 + local expected_pods=4 while [ $elapsed -lt $timeout ]; do local ready_count=$(kubectl get pods -n rustfs-system --no-headers 2>/dev/null | grep -c "Running" || echo "0") local total_count=$(kubectl get pods -n rustfs-system --no-headers 2>/dev/null | wc -l || echo "0") - if [ "$ready_count" -eq 2 ] && [ "$total_count" -eq 2 ]; then - log_success "All pods are ready (2/2 Running)" + if [ "$ready_count" -eq "$expected_pods" ] && [ "$total_count" -eq "$expected_pods" ]; then + log_success "All pods are ready ($expected_pods/$expected_pods Running)" return 0 fi - echo -ne "${BLUE}[INFO]${NC} Pod status: $ready_count/2 Running, waited ${elapsed}s...\r" + echo -ne "${BLUE}[INFO]${NC} Pod status: $ready_count/$expected_pods Running, waited ${elapsed}s...\r" sleep $interval elapsed=$((elapsed + interval)) done @@ -212,23 +247,27 @@ show_status() { log_info "==========================================" echo "" - log_info "1. Tenant status:" + log_info "1. Deployment status:" + kubectl get deployment -n rustfs-system + echo "" + + log_info "2. Tenant status:" kubectl get tenant -n rustfs-system echo "" - log_info "2. Pod status:" + log_info "3. Pod status:" kubectl get pods -n rustfs-system -o wide echo "" - log_info "3. Service status:" + log_info "4. Service status:" kubectl get svc -n rustfs-system echo "" - log_info "4. PVC status:" + log_info "5. PVC status:" kubectl get pvc -n rustfs-system echo "" - log_info "5. StatefulSet status:" + log_info "6. StatefulSet status:" kubectl get statefulset -n rustfs-system echo "" } @@ -241,7 +280,9 @@ show_access_info() { echo "" echo "📋 View logs:" - echo " kubectl logs -f example-tenant-primary-0 -n rustfs-system" + echo " Operator: kubectl logs -f deployment/rustfs-operator -n rustfs-system" + echo " Console: kubectl logs -f deployment/rustfs-operator-console -n rustfs-system" + echo " RustFS: kubectl logs -f example-tenant-primary-0 -n rustfs-system" echo "" echo "🔌 Port forward S3 API (9000):" @@ -252,9 +293,9 @@ show_access_info() { echo " kubectl port-forward -n rustfs-system svc/example-tenant-console 9001:9001" echo "" - echo "🖥️ Operator Console (Management API):" - echo " Listening on: http://localhost:9090" - echo " Health check: curl http://localhost:9090/healthz" + echo "🖥️ Operator Console (Management API, port 9090):" + echo " kubectl port-forward -n rustfs-system svc/rustfs-operator-console 9090:9090" + echo " Then: curl http://localhost:9090/healthz" echo "" echo "🔐 RustFS Credentials:" @@ -276,9 +317,10 @@ show_access_info() { echo " ./cleanup-rustfs.sh" echo "" - echo "📝 Logs:" - echo " Operator: tail -f operator.log" - echo " Console: tail -f console.log" + echo "⚠️ If pods show 'ImagePullBackOff' or 'image not present':" + echo " docker build -t rustfs/operator:dev ." + echo " kind load docker-image rustfs/operator:dev --name rustfs-dev" + echo " kubectl rollout restart deployment -n rustfs-system" echo "" } @@ -299,8 +341,7 @@ main() { deploy_crd create_namespace build_operator - start_operator - start_console + deploy_operator_and_console deploy_tenant echo "" @@ -318,5 +359,26 @@ main() { # Catch Ctrl+C trap 'log_error "Deployment interrupted"; exit 1' INT +# Parse arguments +case "${1:-}" in + --fix-limits) + log_info "Fix inotify limits for kind (kube-proxy 'too many open files')" + fix_inotify_limits + echo "" + log_info "If cluster already has issues, delete and recreate:" + echo " kind delete cluster --name rustfs-dev" + echo " ./deploy-rustfs.sh" + exit 0 + ;; + -h|--help) + echo "Usage: $0 [options]" + echo "" + echo "Options:" + echo " --fix-limits Apply inotify limits (fix 'too many open files'), then exit" + echo " -h, --help Show this help" + exit 0 + ;; +esac + # 执行主流程 main "$@" diff --git a/deploy/console/KUBERNETES-INTEGRATION.md b/deploy/console/KUBERNETES-INTEGRATION.md deleted file mode 100644 index 9782a25..0000000 --- a/deploy/console/KUBERNETES-INTEGRATION.md +++ /dev/null @@ -1,236 +0,0 @@ -# RustFS Operator Console - Kubernetes Integration Summary - -## ✅ 已完成的集成 - -### 1. Helm Chart 模板(7个文件) - -已在 `deploy/rustfs-operator/templates/` 中创建: - -- **console-deployment.yaml** - Console Deployment 配置 - - 运行 `./operator console --port 9090` - - 健康检查和就绪探针 - - JWT secret 通过环境变量注入 - - 支持多副本部署 - -- **console-service.yaml** - Service 配置 - - 支持 ClusterIP / NodePort / LoadBalancer - - 默认端口 9090 - -- **console-serviceaccount.yaml** - ServiceAccount - -- **console-clusterrole.yaml** - RBAC ClusterRole - - Tenant 资源:完整 CRUD 权限 - - Namespace:读取和创建权限 - - Nodes, Events, Services, Pods:只读权限 - -- **console-clusterrolebinding.yaml** - RBAC 绑定 - -- **console-secret.yaml** - JWT Secret - - 自动生成或使用配置的密钥 - -- **console-ingress.yaml** - Ingress 配置(可选) - - 支持 TLS - - 可配置域名和路径 - -### 2. Helm Values 配置 - -`deploy/rustfs-operator/values.yaml` 中新增 `console` 配置段: - -```yaml -console: - enabled: true # 启用/禁用 Console - replicas: 1 # 副本数 - port: 9090 # 端口 - logLevel: info # 日志级别 - jwtSecret: "" # JWT 密钥(留空自动生成) - - image: {} # 镜像配置(使用 operator 镜像) - resources: {} # 资源限制 - service: {} # Service 配置 - ingress: {} # Ingress 配置 - rbac: {} # RBAC 配置 - serviceAccount: {} # ServiceAccount 配置 -``` - -### 3. Helm Helpers - -`deploy/rustfs-operator/templates/_helpers.tpl` 中新增: - -- `rustfs-operator.consoleServiceAccountName` - Console ServiceAccount 名称生成 - -### 4. 部署文档 - -- **deploy/console/README.md** - 完整部署指南 - - 架构说明 - - 部署方法(Helm / kubectl) - - API 端点文档 - - 认证说明 - - RBAC 权限说明 - - 安全考虑 - - 故障排查 - -- **deploy/console/examples/loadbalancer-example.md** - LoadBalancer 部署示例 - -- **deploy/console/examples/ingress-tls-example.md** - Ingress + TLS 部署示例 - -## 部署方式 - -### 方式一:Helm(推荐) - -```bash -# 启用 Console 部署 -helm install rustfs-operator deploy/rustfs-operator \ - --set console.enabled=true - -# 使用 LoadBalancer -helm install rustfs-operator deploy/rustfs-operator \ - --set console.enabled=true \ - --set console.service.type=LoadBalancer - -# 自定义配置 -helm install rustfs-operator deploy/rustfs-operator \ - -f custom-values.yaml -``` - -### 方式二:独立部署 - -可以从 Helm 模板生成 YAML 文件独立部署(需要 helm 命令): - -```bash -helm template rustfs-operator deploy/rustfs-operator \ - --set console.enabled=true \ - > console-manifests.yaml - -kubectl apply -f console-manifests.yaml -``` - -## 访问方式 - -### ClusterIP + Port Forward - -```bash -kubectl port-forward svc/rustfs-operator-console 9090:9090 -# 访问 http://localhost:9090 -``` - -### LoadBalancer - -```bash -kubectl get svc rustfs-operator-console -# 访问 http://:9090 -``` - -### Ingress - -```bash -# 访问 https://your-domain.com -``` - -## API 测试 - -```bash -# 健康检查 -curl http://localhost:9090/healthz # => "OK" - -# 创建测试用户 -kubectl create serviceaccount test-user -kubectl create clusterrolebinding test-admin \ - --clusterrole=cluster-admin \ - --serviceaccount=default:test-user - -# 登录 -TOKEN=$(kubectl create token test-user --duration=1h) -curl -X POST http://localhost:9090/api/v1/login \ - -H "Content-Type: application/json" \ - -d "{\"token\": \"$TOKEN\"}" \ - -c cookies.txt - -# 访问 API -curl http://localhost:9090/api/v1/tenants -b cookies.txt -``` - -## 架构 - -``` -┌─────────────────────────────────────────────────────────┐ -│ Kubernetes Cluster │ -│ │ -│ ┌────────────────────┐ ┌─────────────────────┐ │ -│ │ Operator Pod │ │ Console Pod(s) │ │ -│ │ │ │ │ │ -│ │ ./operator server │ │ ./operator console │ │ -│ │ │ │ --port 9090 │ │ -│ │ - Reconcile Loop │ │ │ │ -│ │ - Watch Tenants │ │ - REST API │ │ -│ │ - Manage K8s Res │ │ - JWT Auth │ │ -│ └────────────────────┘ │ - Query K8s API │ │ -│ │ └─────────────────────┘ │ -│ │ │ │ -│ ▼ ▼ │ -│ ┌──────────────────────────────────────────────────┐ │ -│ │ Kubernetes API Server │ │ -│ │ │ │ -│ │ - Tenant CRDs │ │ -│ │ - Deployments, Services, ConfigMaps, etc. │ │ -│ └──────────────────────────────────────────────────┘ │ -│ │ -└─────────────────────────────────────────────────────────┘ - ▲ - │ - ┌────────┴────────┐ - │ Users/Clients │ - │ │ - │ HTTP API Calls │ - └─────────────────┘ -``` - -## 安全特性 - -1. **JWT 认证** - 12小时会话过期 -2. **HttpOnly Cookies** - 防止 XSS 攻击 -3. **RBAC 集成** - 使用用户的 K8s Token 授权 -4. **最小权限** - Console ServiceAccount 仅有必要权限 -5. **TLS 支持** - 通过 Ingress 配置 HTTPS - -## 下一步 - -1. **构建镜像**:Docker 镜像已包含 `console` 命令,无需修改 Dockerfile -2. **部署测试**:使用 Helm 或 kubectl 部署到集群 -3. **集成前端**:(可选)开发 Web UI 调用 REST API -4. **添加监控**:集成 Prometheus metrics(未来增强) - -## 相关文件 - -``` -deploy/ -├── rustfs-operator/ -│ ├── templates/ -│ │ ├── console-deployment.yaml ✅ -│ │ ├── console-service.yaml ✅ -│ │ ├── console-serviceaccount.yaml ✅ -│ │ ├── console-clusterrole.yaml ✅ -│ │ ├── console-clusterrolebinding.yaml ✅ -│ │ ├── console-secret.yaml ✅ -│ │ ├── console-ingress.yaml ✅ -│ │ └── _helpers.tpl ✅ (已更新) -│ └── values.yaml ✅ (已更新) -└── console/ - ├── README.md ✅ - └── examples/ - ├── loadbalancer-example.md ✅ - └── ingress-tls-example.md ✅ -``` - -## 总结 - -Console 后端已完全集成到 Kubernetes 部署体系中: - -✅ Helm Chart 模板完整 -✅ RBAC 权限配置 -✅ Service、Ingress 支持 -✅ 健康检查、就绪探针 -✅ 安全配置(JWT Secret) -✅ 部署文档和示例 -✅ 多种部署方式支持 - -**状态:生产就绪,可部署到 Kubernetes 集群** 🚀 diff --git a/deploy/console/README.md b/deploy/console/README.md deleted file mode 100644 index 43d466b..0000000 --- a/deploy/console/README.md +++ /dev/null @@ -1,315 +0,0 @@ -# RustFS Operator Console Deployment Guide - -## Overview - -The RustFS Operator Console provides a web-based management interface for RustFS Tenants deployed in Kubernetes. It offers a REST API for managing tenants, viewing events, and monitoring cluster resources. - -## Architecture - -The Console is deployed as a separate Deployment alongside the Operator: -- **Operator**: Watches Tenant CRDs and reconciles Kubernetes resources -- **Console**: Provides REST API for management operations - -Both components use the same Docker image but run different commands: -- Operator: `./operator server` -- Console: `./operator console --port 9090` - -## Deployment Methods - -### Option 1: Helm Chart (Recommended) - -The Console is integrated into the main Helm chart and can be enabled via `values.yaml`. - -#### Install with Console enabled: - -```bash -helm install rustfs-operator deploy/rustfs-operator \ - --set console.enabled=true \ - --set console.service.type=LoadBalancer -``` - -#### Upgrade existing installation to enable Console: - -```bash -helm upgrade rustfs-operator deploy/rustfs-operator \ - --set console.enabled=true -``` - -#### Custom configuration: - -Create a `custom-values.yaml`: - -```yaml -console: - enabled: true - - # Number of replicas - replicas: 2 - - # JWT secret for session signing (recommended: generate with openssl rand -base64 32) - jwtSecret: "your-secure-random-secret-here" - - # Service configuration - service: - type: LoadBalancer - port: 9090 - annotations: - service.beta.kubernetes.io/aws-load-balancer-type: "nlb" - - # Ingress configuration - ingress: - enabled: true - className: nginx - annotations: - cert-manager.io/cluster-issuer: letsencrypt-prod - hosts: - - host: rustfs-console.example.com - paths: - - path: / - pathType: Prefix - tls: - - secretName: rustfs-console-tls - hosts: - - rustfs-console.example.com - - # Resource limits - resources: - requests: - cpu: 100m - memory: 128Mi - limits: - cpu: 500m - memory: 512Mi -``` - -Apply the configuration: - -```bash -helm upgrade --install rustfs-operator deploy/rustfs-operator \ - -f custom-values.yaml -``` - -### Option 2: kubectl apply (Standalone) - -For manual deployment or customization, you can use standalone YAML files. - -See `deploy/console/` directory for standalone deployment manifests. - -## Accessing the Console - -### Via Service (ClusterIP) - -```bash -# Port forward to local machine -kubectl port-forward svc/rustfs-operator-console 9090:9090 - -# Access at http://localhost:9090 -``` - -### Via LoadBalancer - -```bash -# Get the external IP -kubectl get svc rustfs-operator-console - -# Access at http://:9090 -``` - -### Via Ingress - -Access via the configured hostname (e.g., `https://rustfs-console.example.com`) - -## API Endpoints - -### Health & Readiness - -- `GET /healthz` - Health check -- `GET /readyz` - Readiness check - -### Authentication - -- `POST /api/v1/login` - Login with Kubernetes token - ```json - { - "token": "eyJhbGciOiJSUzI1NiIsImtpZCI6..." - } - ``` - -- `POST /api/v1/logout` - Logout and clear session -- `GET /api/v1/session` - Check session status - -### Tenant Management - -- `GET /api/v1/tenants` - List all tenants -- `GET /api/v1/namespaces/{ns}/tenants` - List tenants in namespace -- `GET /api/v1/namespaces/{ns}/tenants/{name}` - Get tenant details -- `POST /api/v1/namespaces/{ns}/tenants` - Create tenant -- `DELETE /api/v1/namespaces/{ns}/tenants/{name}` - Delete tenant - -### Events - -- `GET /api/v1/namespaces/{ns}/tenants/{name}/events` - List tenant events - -### Cluster Resources - -- `GET /api/v1/nodes` - List cluster nodes -- `GET /api/v1/namespaces` - List namespaces -- `POST /api/v1/namespaces` - Create namespace -- `GET /api/v1/cluster/resources` - Get cluster resource summary - -## Authentication - -The Console uses JWT-based authentication with Kubernetes ServiceAccount tokens: - -1. **Login**: Users provide their Kubernetes ServiceAccount token -2. **Validation**: Console validates the token by making a test API call to Kubernetes -3. **Session**: Console generates a JWT session token (12-hour expiry) -4. **Cookie**: Session token stored in HttpOnly cookie -5. **Authorization**: All API requests use the user's Kubernetes token for authorization - -### Getting a Kubernetes Token - -```bash -# Create a ServiceAccount -kubectl create serviceaccount console-user - -# Create ClusterRoleBinding (for admin access) -kubectl create clusterrolebinding console-user-admin \ - --clusterrole=cluster-admin \ - --serviceaccount=default:console-user - -# Get the token -kubectl create token console-user --duration=24h -``` - -### Login Example - -```bash -TOKEN=$(kubectl create token console-user --duration=24h) - -curl -X POST http://localhost:9090/api/v1/login \ - -H "Content-Type: application/json" \ - -d "{\"token\": \"$TOKEN\"}" \ - -c cookies.txt - -# Subsequent requests use the cookie -curl http://localhost:9090/api/v1/tenants \ - -b cookies.txt -``` - -## RBAC Permissions - -The Console ServiceAccount has the following permissions: - -- **Tenants**: Full CRUD operations -- **Namespaces**: List and create -- **Services, Pods, ConfigMaps, Secrets**: Read-only -- **Nodes**: Read-only -- **Events**: Read-only -- **StatefulSets**: Read-only -- **PersistentVolumeClaims**: Read-only - -Users authenticate with their own Kubernetes tokens, so actual permissions depend on the user's RBAC roles. - -## Security Considerations - -1. **JWT Secret**: Always set a strong random JWT secret in production - ```bash - openssl rand -base64 32 - ``` - -2. **TLS/HTTPS**: Enable Ingress with TLS for production deployments - -3. **Network Policies**: Restrict Console access to specific namespaces/pods - -4. **RBAC**: Console requires cluster-wide read access and tenant management permissions - -5. **Session Expiry**: Default 12-hour session timeout (configurable in code) - -6. **CORS**: Configure allowed origins based on your frontend deployment - -## Monitoring - -### Prometheus Metrics - -(To be implemented - placeholder for future enhancement) - -### Logs - -```bash -# View Console logs -kubectl logs -l app.kubernetes.io/component=console -f - -# Set log level -helm upgrade rustfs-operator deploy/rustfs-operator \ - --set console.logLevel=debug -``` - -## Troubleshooting - -### Console Pod Not Starting - -```bash -# Check pod status -kubectl get pods -l app.kubernetes.io/component=console - -# View events -kubectl describe pod -l app.kubernetes.io/component=console - -# Check logs -kubectl logs -l app.kubernetes.io/component=console -``` - -### Authentication Failures - -- Verify Kubernetes token is valid: `kubectl auth can-i get tenants --as=system:serviceaccount:default:console-user` -- Check Console ServiceAccount has proper RBAC permissions -- Verify JWT_SECRET is consistent across Console replicas - -### CORS Errors - -- Update CORS configuration in `src/console/server.rs` -- Rebuild and redeploy the image -- Or use Ingress annotations to handle CORS - -## Configuration Reference - -See `deploy/rustfs-operator/values.yaml` for complete configuration options: - -```yaml -console: - enabled: true|false # Enable/disable Console - replicas: 1 # Number of replicas - port: 9090 # Console port - logLevel: info # Log level - jwtSecret: "" # JWT signing secret - - image: - repository: rustfs/operator - tag: latest - pullPolicy: IfNotPresent - - resources: {} # Resource requests/limits - nodeSelector: {} # Node selection - tolerations: [] # Pod tolerations - affinity: {} # Pod affinity - - service: - type: ClusterIP # Service type - port: 9090 # Service port - - ingress: - enabled: false # Enable Ingress - className: "" # Ingress class - hosts: [] # Ingress hosts - tls: [] # TLS configuration -``` - -## Examples - -See `deploy/console/examples/` for: -- Basic deployment -- LoadBalancer service -- Ingress with TLS -- Multi-replica setup -- Custom RBAC roles diff --git a/deploy/console/examples/ingress-tls-example.md b/deploy/console/examples/ingress-tls-example.md deleted file mode 100644 index 0dc0e0c..0000000 --- a/deploy/console/examples/ingress-tls-example.md +++ /dev/null @@ -1,132 +0,0 @@ -# Example: Console with Ingress and TLS - -This example shows how to deploy the Console with Nginx Ingress and Let's Encrypt TLS certificates. - -## Prerequisites - -- Nginx Ingress Controller installed -- cert-manager installed for automatic TLS certificates -- DNS record pointing to your cluster - -## Configuration - -```yaml -# values-console-ingress.yaml -console: - enabled: true - replicas: 2 # For high availability - - # JWT secret (keep this secure!) - jwtSecret: "REPLACE_WITH_YOUR_SECRET_HERE" - - service: - type: ClusterIP # No need for LoadBalancer with Ingress - port: 9090 - - ingress: - enabled: true - className: nginx - annotations: - cert-manager.io/cluster-issuer: letsencrypt-prod - nginx.ingress.kubernetes.io/ssl-redirect: "true" - nginx.ingress.kubernetes.io/force-ssl-redirect: "true" - # Console uses cookies for auth - nginx.ingress.kubernetes.io/affinity: cookie - nginx.ingress.kubernetes.io/session-cookie-name: "console-session" - hosts: - - host: rustfs-console.example.com - paths: - - path: / - pathType: Prefix - tls: - - secretName: rustfs-console-tls - hosts: - - rustfs-console.example.com - - resources: - requests: - cpu: 100m - memory: 128Mi - limits: - cpu: 500m - memory: 512Mi - - # Pod anti-affinity for HA - affinity: - podAntiAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchLabels: - app.kubernetes.io/component: console - topologyKey: kubernetes.io/hostname -``` - -## Deploy - -```bash -# Create ClusterIssuer for Let's Encrypt (if not exists) -cat < Response { let (status, error_type, message, details) = match &self { - Error::Unauthorized { message } => { - (StatusCode::UNAUTHORIZED, "Unauthorized", message.clone(), None) - } + Error::Unauthorized { message } => ( + StatusCode::UNAUTHORIZED, + "Unauthorized", + message.clone(), + None, + ), Error::Forbidden { message } => { (StatusCode::FORBIDDEN, "Forbidden", message.clone(), None) } diff --git a/src/console/handlers/auth.rs b/src/console/handlers/auth.rs index e1c96a0..f451f19 100644 --- a/src/console/handlers/auth.rs +++ b/src/console/handlers/auth.rs @@ -12,13 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use axum::{ - extract::State, - http::header, - response::IntoResponse, - Extension, Json, -}; -use jsonwebtoken::{encode, EncodingKey, Header}; +use axum::{Extension, Json, extract::State, http::header, response::IntoResponse}; +use jsonwebtoken::{EncodingKey, Header, encode}; use kube::Client; use snafu::ResultExt; @@ -30,8 +25,11 @@ use crate::console::{ use crate::types::v1alpha1::tenant::Tenant; /// 登录处理 -/// -/// 验证 Kubernetes Token 并生成 Console Session Token +// TOKEN=$(kubectl create token rustfs-operator -n rustfs-system --duration=24h) + +// curl -X POST http://localhost:9090/api/v1/login \ +// -H "Content-Type: application/json" \ +// -d "{\"token\": \"$TOKEN\"}" pub async fn login( State(state): State, Json(req): Json, @@ -96,8 +94,8 @@ pub async fn logout() -> impl IntoResponse { /// 检查会话 pub async fn session_check(Extension(claims): Extension) -> Json { - let expires_at = chrono::DateTime::from_timestamp(claims.exp as i64, 0) - .map(|dt| dt.to_rfc3339()); + let expires_at = + chrono::DateTime::from_timestamp(claims.exp as i64, 0).map(|dt| dt.to_rfc3339()); Json(SessionResponse { valid: true, @@ -108,9 +106,11 @@ pub async fn session_check(Extension(claims): Extension) -> Json Result { // 使用默认配置加载 - let mut config = kube::Config::infer().await.map_err(|e| Error::InternalServer { - message: format!("Failed to load kubeconfig: {}", e), - })?; + let mut config = kube::Config::infer() + .await + .map_err(|e| Error::InternalServer { + message: format!("Failed to load kubeconfig: {}", e), + })?; // 覆盖 token config.auth_info.token = Some(token.to_string().into()); diff --git a/src/console/handlers/cluster.rs b/src/console/handlers/cluster.rs index bf0d50f..f6b2a22 100644 --- a/src/console/handlers/cluster.rs +++ b/src/console/handlers/cluster.rs @@ -14,7 +14,7 @@ use axum::{Extension, Json}; use k8s_openapi::api::core::v1 as corev1; -use kube::{api::ListParams, Api, Client, ResourceExt}; +use kube::{Api, Client, ResourceExt, api::ListParams}; use snafu::ResultExt; use crate::console::{ @@ -138,10 +138,7 @@ pub async fn list_namespaces( .as_ref() .and_then(|s| s.phase.clone()) .unwrap_or_else(|| "Unknown".to_string()), - created_at: ns - .metadata - .creation_timestamp - .map(|ts| ts.0.to_rfc3339()), + created_at: ns.metadata.creation_timestamp.map(|ts| ts.0.to_rfc3339()), }) .collect(); @@ -198,24 +195,24 @@ pub async fn get_cluster_resources( let total_nodes = nodes.items.len(); // 简化统计 (实际生产中需要更精确的计算) - let (total_cpu, total_memory, allocatable_cpu, allocatable_memory) = nodes - .items - .iter() - .fold( - (String::new(), String::new(), String::new(), String::new()), - |acc, node| { - // 这里简化处理,实际需要累加 Quantity - if let Some(status) = &node.status { - if let Some(capacity) = &status.capacity { - // 实际应该累加,这里仅作演示 - let cpu = capacity.get("cpu").map(|q| q.0.clone()).unwrap_or_default(); - let mem = capacity.get("memory").map(|q| q.0.clone()).unwrap_or_default(); - return (cpu, mem, acc.2, acc.3); - } + let (total_cpu, total_memory, allocatable_cpu, allocatable_memory) = nodes.items.iter().fold( + (String::new(), String::new(), String::new(), String::new()), + |acc, node| { + // 这里简化处理,实际需要累加 Quantity + if let Some(status) = &node.status { + if let Some(capacity) = &status.capacity { + // 实际应该累加,这里仅作演示 + let cpu = capacity.get("cpu").map(|q| q.0.clone()).unwrap_or_default(); + let mem = capacity + .get("memory") + .map(|q| q.0.clone()) + .unwrap_or_default(); + return (cpu, mem, acc.2, acc.3); } - acc - }, - ); + } + acc + }, + ); Ok(Json(ClusterResourcesResponse { total_nodes, @@ -228,9 +225,11 @@ pub async fn get_cluster_resources( /// 创建 Kubernetes 客户端 async fn create_client(claims: &Claims) -> Result { - let mut config = kube::Config::infer().await.map_err(|e| Error::InternalServer { - message: format!("Failed to load kubeconfig: {}", e), - })?; + let mut config = kube::Config::infer() + .await + .map_err(|e| Error::InternalServer { + message: format!("Failed to load kubeconfig: {}", e), + })?; config.auth_info.token = Some(claims.k8s_token.clone().into()); diff --git a/src/console/handlers/events.rs b/src/console/handlers/events.rs index f85125a..950a495 100644 --- a/src/console/handlers/events.rs +++ b/src/console/handlers/events.rs @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use axum::{extract::Path, Extension, Json}; +use axum::{Extension, Json, extract::Path}; use k8s_openapi::api::core::v1 as corev1; -use kube::{api::ListParams, Api, Client}; +use kube::{Api, Client, api::ListParams}; use snafu::ResultExt; use crate::console::{ @@ -60,9 +60,11 @@ pub async fn list_tenant_events( /// 创建 Kubernetes 客户端 async fn create_client(claims: &Claims) -> Result { - let mut config = kube::Config::infer().await.map_err(|e| Error::InternalServer { - message: format!("Failed to load kubeconfig: {}", e), - })?; + let mut config = kube::Config::infer() + .await + .map_err(|e| Error::InternalServer { + message: format!("Failed to load kubeconfig: {}", e), + })?; config.auth_info.token = Some(claims.k8s_token.clone().into()); diff --git a/src/console/handlers/tenants.rs b/src/console/handlers/tenants.rs index 19dde46..f042817 100644 --- a/src/console/handlers/tenants.rs +++ b/src/console/handlers/tenants.rs @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use axum::{extract::Path, Extension, Json}; +use axum::{Extension, Json, extract::Path}; use k8s_openapi::api::core::v1 as corev1; -use kube::{api::ListParams, Api, Client, ResourceExt}; +use kube::{Api, Client, ResourceExt, api::ListParams}; use snafu::ResultExt; use crate::console::{ @@ -24,8 +24,15 @@ use crate::console::{ }; use crate::types::v1alpha1::{persistence::PersistenceConfig, pool::Pool, tenant::Tenant}; -/// 列出所有 Tenants -pub async fn list_all_tenants(Extension(claims): Extension) -> Result> { +// curl -s -X POST http://localhost:9090/api/v1/login \ +// -H "Content-Type: application/json" \ +// -d "{\"token\": \"$(kubectl create token rustfs-operator-console -n rustfs-system --duration=24h)\"}" \ +// -c cookies.txt + +// curl -b cookies.txt http://localhost:9090/api/v1/tenants +pub async fn list_all_tenants( + Extension(claims): Extension, +) -> Result> { let client = create_client(&claims).await?; let api: Api = Api::all(client); @@ -55,10 +62,7 @@ pub async fn list_all_tenants(Extension(claims): Extension) -> Result Result { - let mut config = kube::Config::infer().await.map_err(|e| Error::InternalServer { - message: format!("Failed to load kubeconfig: {}", e), - })?; + let mut config = kube::Config::infer() + .await + .map_err(|e| Error::InternalServer { + message: format!("Failed to load kubeconfig: {}", e), + })?; config.auth_info.token = Some(claims.k8s_token.clone().into()); diff --git a/src/console/middleware/auth.rs b/src/console/middleware/auth.rs index 75a1c2c..ca657fe 100644 --- a/src/console/middleware/auth.rs +++ b/src/console/middleware/auth.rs @@ -14,11 +14,11 @@ use axum::{ extract::{Request, State}, - http::{header, StatusCode}, + http::{StatusCode, header}, middleware::Next, response::Response, }; -use jsonwebtoken::{decode, DecodingKey, Validation}; +use jsonwebtoken::{DecodingKey, Validation, decode}; use crate::console::state::{AppState, Claims}; @@ -72,16 +72,14 @@ pub async fn auth_middleware( /// 从 Cookie 字符串中解析 session token fn parse_session_cookie(cookies: &str) -> Option { - cookies - .split(';') - .find_map(|cookie| { - let parts: Vec<&str> = cookie.trim().splitn(2, '=').collect(); - if parts.len() == 2 && parts[0] == "session" { - Some(parts[1].to_string()) - } else { - None - } - }) + cookies.split(';').find_map(|cookie| { + let parts: Vec<&str> = cookie.trim().splitn(2, '=').collect(); + if parts.len() == 2 && parts[0] == "session" { + Some(parts[1].to_string()) + } else { + None + } + }) } #[cfg(test)] @@ -91,7 +89,10 @@ mod tests { #[test] fn test_parse_session_cookie() { let cookies = "session=test_token; other=value"; - assert_eq!(parse_session_cookie(cookies), Some("test_token".to_string())); + assert_eq!( + parse_session_cookie(cookies), + Some("test_token".to_string()) + ); let cookies = "other=value"; assert_eq!(parse_session_cookie(cookies), None); diff --git a/src/console/routes/mod.rs b/src/console/routes/mod.rs index 1a17877..cdfab9a 100644 --- a/src/console/routes/mod.rs +++ b/src/console/routes/mod.rs @@ -12,7 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use axum::{routing::{delete, get, post, put}, Router}; +use axum::{ + Router, + routing::{delete, get, post}, +}; use crate::console::{handlers, state::AppState}; @@ -101,7 +104,10 @@ pub fn event_routes() -> Router { pub fn cluster_routes() -> Router { Router::new() .route("/cluster/nodes", get(handlers::cluster::list_nodes)) - .route("/cluster/resources", get(handlers::cluster::get_cluster_resources)) + .route( + "/cluster/resources", + get(handlers::cluster::get_cluster_resources), + ) .route("/namespaces", get(handlers::cluster::list_namespaces)) .route("/namespaces", post(handlers::cluster::create_namespace)) } diff --git a/src/console/server.rs b/src/console/server.rs index bdcf7ed..6840b18 100644 --- a/src/console/server.rs +++ b/src/console/server.rs @@ -12,21 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use axum::{ - middleware, - routing::get, - Router, - http::StatusCode, - response::IntoResponse, -}; -use tower_http::{ - compression::CompressionLayer, - cors::CorsLayer, - trace::TraceLayer, -}; +use crate::console::{routes, state::AppState}; use axum::http::{HeaderValue, Method, header}; - -use crate::console::{state::AppState, routes}; +use axum::{Router, http::StatusCode, middleware, response::IntoResponse, routing::get}; +use tower_http::{compression::CompressionLayer, cors::CorsLayer, trace::TraceLayer}; /// 启动 Console HTTP Server pub async fn run(port: u16) -> Result<(), Box> { @@ -53,7 +42,13 @@ pub async fn run(port: u16) -> Result<(), Box> { .layer( CorsLayer::new() .allow_origin("http://localhost:3000".parse::().unwrap()) - .allow_methods([Method::GET, Method::POST, Method::PUT, Method::DELETE, Method::OPTIONS]) + .allow_methods([ + Method::GET, + Method::POST, + Method::PUT, + Method::DELETE, + Method::OPTIONS, + ]) .allow_headers([header::CONTENT_TYPE, header::AUTHORIZATION, header::COOKIE]) .allow_credentials(true), ) @@ -90,7 +85,10 @@ fn api_routes() -> Router { /// 健康检查 async fn health_check() -> impl IntoResponse { - (StatusCode::OK, "OK") + let since_epoch = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap(); + (StatusCode::OK, format!("OK: {}", since_epoch.as_secs())) } /// 就绪检查