diff --git a/internal/controller/overcommit/overcommit_controller.go b/internal/controller/overcommit/overcommit_controller.go index af57b5b..59a15b7 100644 --- a/internal/controller/overcommit/overcommit_controller.go +++ b/internal/controller/overcommit/overcommit_controller.go @@ -65,47 +65,36 @@ func (r *OvercommitReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil } - // Check if the CR is being deleted - if !overcommit.ObjectMeta.DeletionTimestamp.IsZero() { - logger.Info("Overcommit CR is being deleted, cleaning up resources") - - // Clean up resources - err := r.cleanupResources(ctx, overcommit) - if err != nil { - logger.Error(err, "Failed to clean up resources") - return ctrl.Result{}, err + // Migration: Remove legacy finalizers from previous versions + // Previous versions of this operator added finalizers to CRs for manual cleanup. + // To ensure smooth upgrades, we automatically remove any legacy finalizers + // so that CRs created before this change can be deleted normally. + legacyFinalizers := []string{"overcommit.finalizer", "webhook.finalizer"} + finalizerRemoved := false + for _, legacyFinalizer := range legacyFinalizers { + if controllerutil.ContainsFinalizer(overcommit, legacyFinalizer) { + logger.Info("Removing legacy finalizer during migration", "finalizer", legacyFinalizer) + controllerutil.RemoveFinalizer(overcommit, legacyFinalizer) + finalizerRemoved = true } - - // Remove finalizer if cleanup is successful - controllerutil.RemoveFinalizer(overcommit, "overcommit.finalizer") - err = r.Update(ctx, overcommit) - if err != nil { - logger.Error(err, "Failed to remove finalizer") - return ctrl.Result{}, err - } - - return ctrl.Result{}, nil } - // Add finalizer if not present - if !controllerutil.ContainsFinalizer(overcommit, "overcommit.finalizer") { - logger.Info("Adding finalizer to Overcommit CR") - controllerutil.AddFinalizer(overcommit, "overcommit.finalizer") + // If we removed any legacy finalizers, update the object and requeue + if finalizerRemoved { err = r.Update(ctx, overcommit) if err != nil { - logger.Error(err, "Failed to add finalizer") + logger.Error(err, "Failed to remove legacy finalizers") return ctrl.Result{}, err } - // Return early to trigger a new reconciliation with the updated object - logger.Info("Finalizer added, requeuing reconciliation") - return ctrl.Result{}, nil + logger.Info("Legacy finalizers removed, requeuing reconciliation") + return ctrl.Result{RequeueAfter: time.Second * 1}, nil } // Reconcile Issuer issuer := resources.GenerateIssuer() if issuer == nil { logger.Error(nil, "Generated issuer is nil") - return ctrl.Result{}, fmt.Errorf("Generated issuer is nil") + return ctrl.Result{}, fmt.Errorf("generated issuer is nil") } _, err = controllerutil.CreateOrUpdate(ctx, r.Client, issuer, func() error { @@ -395,6 +384,12 @@ func (r *OvercommitReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, err } + // Update the status of all resources + if err := r.updateOvercommitStatusSafely(ctx); err != nil { + logger.Error(err, "Failed to update Overcommit status") + // Don't fail the reconciliation for status update errors + } + // Only requeue periodically for status checks, not immediately logger.Info("Reconciliation completed successfully", "nextReconcile", "10 seconds", "time", time.Now().Format("15:04:05")) return ctrl.Result{ diff --git a/internal/controller/overcommit/utils.go b/internal/controller/overcommit/utils.go index 16fd5e4..8e28f97 100644 --- a/internal/controller/overcommit/utils.go +++ b/internal/controller/overcommit/utils.go @@ -15,6 +15,7 @@ import ( "github.com/InditexTech/k8s-overcommit-operator/internal/utils" logf "sigs.k8s.io/controller-runtime/pkg/log" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" @@ -24,121 +25,126 @@ import ( func (r *OvercommitReconciler) updateOvercommitStatus(ctx context.Context, overcommitObject *overcommit.Overcommit) error { logger := logf.FromContext(ctx) + logger.V(1).Info("Updating Overcommit status") - // Initialize resource status map + // Initialize resource status map with better structure resourceStatuses := make(map[string]overcommit.ResourceStatus) + // Helper function to check resource status + checkResourceStatus := func(name, resourceType string, checkFunc func() error) { + err := checkFunc() + ready := err == nil + resourceStatuses[resourceType] = overcommit.ResourceStatus{ + Name: name, + Ready: ready, + } + if !ready { + logger.V(1).Info("Resource not ready", "type", resourceType, "name", name, "error", err) + } + } + // Check Issuer status issuer := resources.GenerateIssuer() - err := r.Get(ctx, client.ObjectKey{Name: issuer.Name, Namespace: issuer.Namespace}, issuer) - if err == nil { - resourceStatuses["issuer"] = overcommit.ResourceStatus{Name: issuer.Name, Ready: true} - } else { - resourceStatuses["issuer"] = overcommit.ResourceStatus{Name: issuer.Name, Ready: false} - } + checkResourceStatus(issuer.Name, "issuer", func() error { + return r.Get(ctx, client.ObjectKey{Name: issuer.Name, Namespace: issuer.Namespace}, issuer) + }) - // Check Deployment Validating Class status - deployment := resources.GenerateOvercommitClassValidatingDeployment(*overcommitObject) - err = r.Get(ctx, client.ObjectKey{Name: deployment.Name, Namespace: deployment.Namespace}, deployment) - if err == nil { - resourceStatuses["deployment"] = overcommit.ResourceStatus{Name: deployment.Name, Ready: true} - } else { - resourceStatuses["deployment"] = overcommit.ResourceStatus{Name: deployment.Name, Ready: false} - } + // Check OvercommitClass Validator components + overcommitClassDeployment := resources.GenerateOvercommitClassValidatingDeployment(*overcommitObject) + checkResourceStatus(overcommitClassDeployment.Name, "overcommitclass-deployment", func() error { + return r.Get(ctx, client.ObjectKey{Name: overcommitClassDeployment.Name, Namespace: overcommitClassDeployment.Namespace}, overcommitClassDeployment) + }) - // Check Service Validating Class status - service := resources.GenerateOvercommitClassValidatingService(*deployment) - err = r.Get(ctx, client.ObjectKey{Name: service.Name, Namespace: service.Namespace}, service) - if err == nil { - resourceStatuses["service"] = overcommit.ResourceStatus{Name: service.Name, Ready: true} - } else { - resourceStatuses["service"] = overcommit.ResourceStatus{Name: service.Name, Ready: false} - } + overcommitClassService := resources.GenerateOvercommitClassValidatingService(*overcommitClassDeployment) + checkResourceStatus(overcommitClassService.Name, "overcommitclass-service", func() error { + return r.Get(ctx, client.ObjectKey{Name: overcommitClassService.Name, Namespace: overcommitClassService.Namespace}, overcommitClassService) + }) - // Check Certificate Validating Class status - certificate := resources.GenerateCertificateValidatingOvercommitClass(*resources.GenerateIssuer(), *service) - err = r.Get(ctx, client.ObjectKey{Name: certificate.Name, Namespace: certificate.Namespace}, certificate) - if err == nil { - resourceStatuses["certificate"] = overcommit.ResourceStatus{Name: certificate.Name, Ready: true} - } else { - resourceStatuses["certificate"] = overcommit.ResourceStatus{Name: certificate.Name, Ready: false} - } + overcommitClassCertificate := resources.GenerateCertificateValidatingOvercommitClass(*issuer, *overcommitClassService) + checkResourceStatus(overcommitClassCertificate.Name, "overcommitclass-certificate", func() error { + return r.Get(ctx, client.ObjectKey{Name: overcommitClassCertificate.Name, Namespace: overcommitClassCertificate.Namespace}, overcommitClassCertificate) + }) - // Check Webhook Validating Class status - webhook := resources.GenerateOvercommitClassValidatingWebhookConfiguration(*deployment, *service, *certificate) - err = r.Get(ctx, client.ObjectKey{Name: webhook.Name}, webhook) - if err == nil { - resourceStatuses["webhook"] = overcommit.ResourceStatus{Name: webhook.Name, Ready: true} - } else { - resourceStatuses["webhook"] = overcommit.ResourceStatus{Name: webhook.Name, Ready: false} - } + overcommitClassWebhook := resources.GenerateOvercommitClassValidatingWebhookConfiguration(*overcommitClassDeployment, *overcommitClassService, *overcommitClassCertificate) + checkResourceStatus(overcommitClassWebhook.Name, "overcommitclass-webhook", func() error { + return r.Get(ctx, client.ObjectKey{Name: overcommitClassWebhook.Name}, overcommitClassWebhook) + }) - // Check Deployment Validating Pods status + // Check Pod Validator components podDeployment := resources.GeneratePodValidatingDeployment(*overcommitObject) - err = r.Get(ctx, client.ObjectKey{Name: podDeployment.Name, Namespace: podDeployment.Namespace}, podDeployment) - if err == nil { - resourceStatuses["podDeployment"] = overcommit.ResourceStatus{Name: podDeployment.Name, Ready: true} - } else { - resourceStatuses["podDeployment"] = overcommit.ResourceStatus{Name: podDeployment.Name, Ready: false} - } + checkResourceStatus(podDeployment.Name, "pod-deployment", func() error { + return r.Get(ctx, client.ObjectKey{Name: podDeployment.Name, Namespace: podDeployment.Namespace}, podDeployment) + }) - // Check Service Validating Pods status podService := resources.GeneratePodValidatingService(*podDeployment) - err = r.Get(ctx, client.ObjectKey{Name: podService.Name, Namespace: podService.Namespace}, podService) - if err == nil { - resourceStatuses["podService"] = overcommit.ResourceStatus{Name: podService.Name, Ready: true} - } else { - resourceStatuses["podService"] = overcommit.ResourceStatus{Name: podService.Name, Ready: false} - } + checkResourceStatus(podService.Name, "pod-service", func() error { + return r.Get(ctx, client.ObjectKey{Name: podService.Name, Namespace: podService.Namespace}, podService) + }) - // Check Certificate Validating Pods status - podCertificate := resources.GenerateCertificateValidatingPods(*resources.GenerateIssuer(), *podService) - err = r.Get(ctx, client.ObjectKey{Name: podCertificate.Name, Namespace: podCertificate.Namespace}, podCertificate) - if err == nil { - resourceStatuses["podCertificate"] = overcommit.ResourceStatus{Name: podCertificate.Name, Ready: true} - } else { - resourceStatuses["podCertificate"] = overcommit.ResourceStatus{Name: podCertificate.Name, Ready: false} - } + podCertificate := resources.GenerateCertificateValidatingPods(*issuer, *podService) + checkResourceStatus(podCertificate.Name, "pod-certificate", func() error { + return r.Get(ctx, client.ObjectKey{Name: podCertificate.Name, Namespace: podCertificate.Namespace}, podCertificate) + }) - // Check Webhook Validating Pods status + // Check Pod Webhook (handle label errors gracefully) label, err := utils.GetOvercommitLabel(ctx, r.Client) if err != nil { - logger.Error(err, "Failed to get Overcommit label") - return err + logger.Info("Failed to get Overcommit label, using default", "error", err) + label = "overcommit.inditex.dev/class" } + podWebhook := resources.GeneratePodValidatingWebhookConfiguration(*podDeployment, *podService, *podCertificate, label) - err = r.Get(ctx, client.ObjectKey{Name: podWebhook.Name}, podWebhook) - if err == nil { - resourceStatuses["podWebhook"] = overcommit.ResourceStatus{Name: podWebhook.Name, Ready: true} - } else { - resourceStatuses["podWebhook"] = overcommit.ResourceStatus{Name: podWebhook.Name, Ready: false} + checkResourceStatus(podWebhook.Name, "pod-webhook", func() error { + return r.Get(ctx, client.ObjectKey{Name: podWebhook.Name}, podWebhook) + }) + + // Check OvercommitClass Controller + ocController := resources.GenerateOvercommitClassControllerDeployment(*overcommitObject) + checkResourceStatus(ocController.Name, "overcommitclass-controller", func() error { + return r.Get(ctx, client.ObjectKey{Name: ocController.Name, Namespace: ocController.Namespace}, ocController) + }) + + // Convert map to slice for CRD status (maintain consistent order) + resourceTypes := []string{ + "issuer", + "overcommitclass-deployment", "overcommitclass-service", "overcommitclass-certificate", "overcommitclass-webhook", + "pod-deployment", "pod-service", "pod-certificate", "pod-webhook", + "overcommitclass-controller", } - // Convert map to slice for CRD status - resourceStatusSlice := make([]overcommit.ResourceStatus, 0, len(resourceStatuses)) // Pre-allocate slice + resourceStatusSlice := make([]overcommit.ResourceStatus, 0, len(resourceStatuses)) allReady := true - for _, status := range resourceStatuses { - resourceStatusSlice = append(resourceStatusSlice, status) - if !status.Ready { - allReady = false + readyCount := 0 + + for _, resourceType := range resourceTypes { + if status, exists := resourceStatuses[resourceType]; exists { + resourceStatusSlice = append(resourceStatusSlice, status) + if status.Ready { + readyCount++ + } else { + allReady = false + } } } // Update the status of the CRD overcommitObject.Status.Resources = resourceStatusSlice - // Update the condition + // Update the condition with more detailed information condition := metav1.Condition{ - Type: "ResourcesReady", - Status: metav1.ConditionTrue, - Reason: "AllResourcesReady", - Message: "All managed resources are ready", + Type: "ResourcesReady", + Status: metav1.ConditionTrue, + Reason: "AllResourcesReady", + Message: fmt.Sprintf("All %d managed resources are ready", len(resourceStatusSlice)), + LastTransitionTime: metav1.Now(), } + if !allReady { condition.Status = metav1.ConditionFalse condition.Reason = "ResourcesNotReady" - condition.Message = "Some resources are not ready" + condition.Message = fmt.Sprintf("%d of %d resources are ready", readyCount, len(resourceStatusSlice)) } + setCondition(&overcommitObject.Status, condition) // Update the status in the API @@ -147,20 +153,21 @@ func (r *OvercommitReconciler) updateOvercommitStatus(ctx context.Context, overc return err } + logger.V(1).Info("Successfully updated Overcommit status", "ready", readyCount, "total", len(resourceStatusSlice)) return nil } // updateOvercommitStatusSafely safely updates the status by first refreshing the object from the cluster // with retry logic to handle concurrent modifications -// Since Overcommit is cluster-wide and always named "cluster", we use a fixed key func (r *OvercommitReconciler) updateOvercommitStatusSafely(ctx context.Context) error { logger := logf.FromContext(ctx) // Since Overcommit is cluster-wide and always named "cluster", use the correct key clusterKey := types.NamespacedName{Name: "cluster", Namespace: ""} - // Retry up to 3 times with exponential backoff - for attempt := 0; attempt < 3; attempt++ { + // Retry up to 5 times with exponential backoff + maxRetries := 5 + for attempt := 0; attempt < maxRetries; attempt++ { // Fetch the latest version of the object from the cluster freshOvercommit := &overcommit.Overcommit{} if err := r.Get(ctx, clusterKey, freshOvercommit); err != nil { @@ -169,97 +176,74 @@ func (r *OvercommitReconciler) updateOvercommitStatusSafely(ctx context.Context) return err } // Object not found, nothing to update + logger.V(1).Info("Overcommit object not found, skipping status update") return nil } // Try to update status using the fresh object if err := r.updateOvercommitStatus(ctx, freshOvercommit); err != nil { - if attempt == 2 { // Last attempt - logger.Error(err, "Failed to update Overcommit status after all retries") + isConflict := errors.IsConflict(err) + isLastAttempt := attempt == maxRetries-1 + + if isLastAttempt { + logger.Error(err, "Failed to update Overcommit status after all retries", "maxRetries", maxRetries) + return err + } + + if isConflict { + // Wait with exponential backoff for conflicts + backoffDuration := time.Duration(1<