From 98d96f8d56ea21d2037952de7ff28839fcb9ffda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enrique=20Andr=C3=A9s=20Villar?= Date: Thu, 16 Oct 2025 13:07:27 +0200 Subject: [PATCH 1/2] Refactor: streamline Overcommit and OvercommitClass reconciliation logic and remove unused finalizer code --- .../overcommit/overcommit_controller.go | 45 +-- internal/controller/overcommit/utils.go | 284 +++++++++--------- .../overcommitclass_controller.go | 36 --- internal/controller/overcommitclass/utils.go | 62 ---- .../generate_resources_validating_webhooks.go | 2 +- 5 files changed, 142 insertions(+), 287 deletions(-) diff --git a/internal/controller/overcommit/overcommit_controller.go b/internal/controller/overcommit/overcommit_controller.go index af57b5b..987da5a 100644 --- a/internal/controller/overcommit/overcommit_controller.go +++ b/internal/controller/overcommit/overcommit_controller.go @@ -33,7 +33,6 @@ type OvercommitReconciler struct { // +kubebuilder:rbac:groups=overcommit.inditex.dev,resources=overcommits,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=overcommit.inditex.dev,resources=overcommits/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=overcommit.inditex.dev,resources=overcommits/finalizers,verbs=update // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. @@ -65,47 +64,11 @@ func (r *OvercommitReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil } - // Check if the CR is being deleted - if !overcommit.ObjectMeta.DeletionTimestamp.IsZero() { - logger.Info("Overcommit CR is being deleted, cleaning up resources") - - // Clean up resources - err := r.cleanupResources(ctx, overcommit) - if err != nil { - logger.Error(err, "Failed to clean up resources") - return ctrl.Result{}, err - } - - // Remove finalizer if cleanup is successful - controllerutil.RemoveFinalizer(overcommit, "overcommit.finalizer") - err = r.Update(ctx, overcommit) - if err != nil { - logger.Error(err, "Failed to remove finalizer") - return ctrl.Result{}, err - } - - return ctrl.Result{}, nil - } - - // Add finalizer if not present - if !controllerutil.ContainsFinalizer(overcommit, "overcommit.finalizer") { - logger.Info("Adding finalizer to Overcommit CR") - controllerutil.AddFinalizer(overcommit, "overcommit.finalizer") - err = r.Update(ctx, overcommit) - if err != nil { - logger.Error(err, "Failed to add finalizer") - return ctrl.Result{}, err - } - // Return early to trigger a new reconciliation with the updated object - logger.Info("Finalizer added, requeuing reconciliation") - return ctrl.Result{}, nil - } - // Reconcile Issuer issuer := resources.GenerateIssuer() if issuer == nil { logger.Error(nil, "Generated issuer is nil") - return ctrl.Result{}, fmt.Errorf("Generated issuer is nil") + return ctrl.Result{}, fmt.Errorf("generated issuer is nil") } _, err = controllerutil.CreateOrUpdate(ctx, r.Client, issuer, func() error { @@ -395,6 +358,12 @@ func (r *OvercommitReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, err } + // Update the status of all resources + if err := r.updateOvercommitStatusSafely(ctx); err != nil { + logger.Error(err, "Failed to update Overcommit status") + // Don't fail the reconciliation for status update errors + } + // Only requeue periodically for status checks, not immediately logger.Info("Reconciliation completed successfully", "nextReconcile", "10 seconds", "time", time.Now().Format("15:04:05")) return ctrl.Result{ diff --git a/internal/controller/overcommit/utils.go b/internal/controller/overcommit/utils.go index 16fd5e4..8e28f97 100644 --- a/internal/controller/overcommit/utils.go +++ b/internal/controller/overcommit/utils.go @@ -15,6 +15,7 @@ import ( "github.com/InditexTech/k8s-overcommit-operator/internal/utils" logf "sigs.k8s.io/controller-runtime/pkg/log" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" @@ -24,121 +25,126 @@ import ( func (r *OvercommitReconciler) updateOvercommitStatus(ctx context.Context, overcommitObject *overcommit.Overcommit) error { logger := logf.FromContext(ctx) + logger.V(1).Info("Updating Overcommit status") - // Initialize resource status map + // Initialize resource status map with better structure resourceStatuses := make(map[string]overcommit.ResourceStatus) + // Helper function to check resource status + checkResourceStatus := func(name, resourceType string, checkFunc func() error) { + err := checkFunc() + ready := err == nil + resourceStatuses[resourceType] = overcommit.ResourceStatus{ + Name: name, + Ready: ready, + } + if !ready { + logger.V(1).Info("Resource not ready", "type", resourceType, "name", name, "error", err) + } + } + // Check Issuer status issuer := resources.GenerateIssuer() - err := r.Get(ctx, client.ObjectKey{Name: issuer.Name, Namespace: issuer.Namespace}, issuer) - if err == nil { - resourceStatuses["issuer"] = overcommit.ResourceStatus{Name: issuer.Name, Ready: true} - } else { - resourceStatuses["issuer"] = overcommit.ResourceStatus{Name: issuer.Name, Ready: false} - } + checkResourceStatus(issuer.Name, "issuer", func() error { + return r.Get(ctx, client.ObjectKey{Name: issuer.Name, Namespace: issuer.Namespace}, issuer) + }) - // Check Deployment Validating Class status - deployment := resources.GenerateOvercommitClassValidatingDeployment(*overcommitObject) - err = r.Get(ctx, client.ObjectKey{Name: deployment.Name, Namespace: deployment.Namespace}, deployment) - if err == nil { - resourceStatuses["deployment"] = overcommit.ResourceStatus{Name: deployment.Name, Ready: true} - } else { - resourceStatuses["deployment"] = overcommit.ResourceStatus{Name: deployment.Name, Ready: false} - } + // Check OvercommitClass Validator components + overcommitClassDeployment := resources.GenerateOvercommitClassValidatingDeployment(*overcommitObject) + checkResourceStatus(overcommitClassDeployment.Name, "overcommitclass-deployment", func() error { + return r.Get(ctx, client.ObjectKey{Name: overcommitClassDeployment.Name, Namespace: overcommitClassDeployment.Namespace}, overcommitClassDeployment) + }) - // Check Service Validating Class status - service := resources.GenerateOvercommitClassValidatingService(*deployment) - err = r.Get(ctx, client.ObjectKey{Name: service.Name, Namespace: service.Namespace}, service) - if err == nil { - resourceStatuses["service"] = overcommit.ResourceStatus{Name: service.Name, Ready: true} - } else { - resourceStatuses["service"] = overcommit.ResourceStatus{Name: service.Name, Ready: false} - } + overcommitClassService := resources.GenerateOvercommitClassValidatingService(*overcommitClassDeployment) + checkResourceStatus(overcommitClassService.Name, "overcommitclass-service", func() error { + return r.Get(ctx, client.ObjectKey{Name: overcommitClassService.Name, Namespace: overcommitClassService.Namespace}, overcommitClassService) + }) - // Check Certificate Validating Class status - certificate := resources.GenerateCertificateValidatingOvercommitClass(*resources.GenerateIssuer(), *service) - err = r.Get(ctx, client.ObjectKey{Name: certificate.Name, Namespace: certificate.Namespace}, certificate) - if err == nil { - resourceStatuses["certificate"] = overcommit.ResourceStatus{Name: certificate.Name, Ready: true} - } else { - resourceStatuses["certificate"] = overcommit.ResourceStatus{Name: certificate.Name, Ready: false} - } + overcommitClassCertificate := resources.GenerateCertificateValidatingOvercommitClass(*issuer, *overcommitClassService) + checkResourceStatus(overcommitClassCertificate.Name, "overcommitclass-certificate", func() error { + return r.Get(ctx, client.ObjectKey{Name: overcommitClassCertificate.Name, Namespace: overcommitClassCertificate.Namespace}, overcommitClassCertificate) + }) - // Check Webhook Validating Class status - webhook := resources.GenerateOvercommitClassValidatingWebhookConfiguration(*deployment, *service, *certificate) - err = r.Get(ctx, client.ObjectKey{Name: webhook.Name}, webhook) - if err == nil { - resourceStatuses["webhook"] = overcommit.ResourceStatus{Name: webhook.Name, Ready: true} - } else { - resourceStatuses["webhook"] = overcommit.ResourceStatus{Name: webhook.Name, Ready: false} - } + overcommitClassWebhook := resources.GenerateOvercommitClassValidatingWebhookConfiguration(*overcommitClassDeployment, *overcommitClassService, *overcommitClassCertificate) + checkResourceStatus(overcommitClassWebhook.Name, "overcommitclass-webhook", func() error { + return r.Get(ctx, client.ObjectKey{Name: overcommitClassWebhook.Name}, overcommitClassWebhook) + }) - // Check Deployment Validating Pods status + // Check Pod Validator components podDeployment := resources.GeneratePodValidatingDeployment(*overcommitObject) - err = r.Get(ctx, client.ObjectKey{Name: podDeployment.Name, Namespace: podDeployment.Namespace}, podDeployment) - if err == nil { - resourceStatuses["podDeployment"] = overcommit.ResourceStatus{Name: podDeployment.Name, Ready: true} - } else { - resourceStatuses["podDeployment"] = overcommit.ResourceStatus{Name: podDeployment.Name, Ready: false} - } + checkResourceStatus(podDeployment.Name, "pod-deployment", func() error { + return r.Get(ctx, client.ObjectKey{Name: podDeployment.Name, Namespace: podDeployment.Namespace}, podDeployment) + }) - // Check Service Validating Pods status podService := resources.GeneratePodValidatingService(*podDeployment) - err = r.Get(ctx, client.ObjectKey{Name: podService.Name, Namespace: podService.Namespace}, podService) - if err == nil { - resourceStatuses["podService"] = overcommit.ResourceStatus{Name: podService.Name, Ready: true} - } else { - resourceStatuses["podService"] = overcommit.ResourceStatus{Name: podService.Name, Ready: false} - } + checkResourceStatus(podService.Name, "pod-service", func() error { + return r.Get(ctx, client.ObjectKey{Name: podService.Name, Namespace: podService.Namespace}, podService) + }) - // Check Certificate Validating Pods status - podCertificate := resources.GenerateCertificateValidatingPods(*resources.GenerateIssuer(), *podService) - err = r.Get(ctx, client.ObjectKey{Name: podCertificate.Name, Namespace: podCertificate.Namespace}, podCertificate) - if err == nil { - resourceStatuses["podCertificate"] = overcommit.ResourceStatus{Name: podCertificate.Name, Ready: true} - } else { - resourceStatuses["podCertificate"] = overcommit.ResourceStatus{Name: podCertificate.Name, Ready: false} - } + podCertificate := resources.GenerateCertificateValidatingPods(*issuer, *podService) + checkResourceStatus(podCertificate.Name, "pod-certificate", func() error { + return r.Get(ctx, client.ObjectKey{Name: podCertificate.Name, Namespace: podCertificate.Namespace}, podCertificate) + }) - // Check Webhook Validating Pods status + // Check Pod Webhook (handle label errors gracefully) label, err := utils.GetOvercommitLabel(ctx, r.Client) if err != nil { - logger.Error(err, "Failed to get Overcommit label") - return err + logger.Info("Failed to get Overcommit label, using default", "error", err) + label = "overcommit.inditex.dev/class" } + podWebhook := resources.GeneratePodValidatingWebhookConfiguration(*podDeployment, *podService, *podCertificate, label) - err = r.Get(ctx, client.ObjectKey{Name: podWebhook.Name}, podWebhook) - if err == nil { - resourceStatuses["podWebhook"] = overcommit.ResourceStatus{Name: podWebhook.Name, Ready: true} - } else { - resourceStatuses["podWebhook"] = overcommit.ResourceStatus{Name: podWebhook.Name, Ready: false} + checkResourceStatus(podWebhook.Name, "pod-webhook", func() error { + return r.Get(ctx, client.ObjectKey{Name: podWebhook.Name}, podWebhook) + }) + + // Check OvercommitClass Controller + ocController := resources.GenerateOvercommitClassControllerDeployment(*overcommitObject) + checkResourceStatus(ocController.Name, "overcommitclass-controller", func() error { + return r.Get(ctx, client.ObjectKey{Name: ocController.Name, Namespace: ocController.Namespace}, ocController) + }) + + // Convert map to slice for CRD status (maintain consistent order) + resourceTypes := []string{ + "issuer", + "overcommitclass-deployment", "overcommitclass-service", "overcommitclass-certificate", "overcommitclass-webhook", + "pod-deployment", "pod-service", "pod-certificate", "pod-webhook", + "overcommitclass-controller", } - // Convert map to slice for CRD status - resourceStatusSlice := make([]overcommit.ResourceStatus, 0, len(resourceStatuses)) // Pre-allocate slice + resourceStatusSlice := make([]overcommit.ResourceStatus, 0, len(resourceStatuses)) allReady := true - for _, status := range resourceStatuses { - resourceStatusSlice = append(resourceStatusSlice, status) - if !status.Ready { - allReady = false + readyCount := 0 + + for _, resourceType := range resourceTypes { + if status, exists := resourceStatuses[resourceType]; exists { + resourceStatusSlice = append(resourceStatusSlice, status) + if status.Ready { + readyCount++ + } else { + allReady = false + } } } // Update the status of the CRD overcommitObject.Status.Resources = resourceStatusSlice - // Update the condition + // Update the condition with more detailed information condition := metav1.Condition{ - Type: "ResourcesReady", - Status: metav1.ConditionTrue, - Reason: "AllResourcesReady", - Message: "All managed resources are ready", + Type: "ResourcesReady", + Status: metav1.ConditionTrue, + Reason: "AllResourcesReady", + Message: fmt.Sprintf("All %d managed resources are ready", len(resourceStatusSlice)), + LastTransitionTime: metav1.Now(), } + if !allReady { condition.Status = metav1.ConditionFalse condition.Reason = "ResourcesNotReady" - condition.Message = "Some resources are not ready" + condition.Message = fmt.Sprintf("%d of %d resources are ready", readyCount, len(resourceStatusSlice)) } + setCondition(&overcommitObject.Status, condition) // Update the status in the API @@ -147,20 +153,21 @@ func (r *OvercommitReconciler) updateOvercommitStatus(ctx context.Context, overc return err } + logger.V(1).Info("Successfully updated Overcommit status", "ready", readyCount, "total", len(resourceStatusSlice)) return nil } // updateOvercommitStatusSafely safely updates the status by first refreshing the object from the cluster // with retry logic to handle concurrent modifications -// Since Overcommit is cluster-wide and always named "cluster", we use a fixed key func (r *OvercommitReconciler) updateOvercommitStatusSafely(ctx context.Context) error { logger := logf.FromContext(ctx) // Since Overcommit is cluster-wide and always named "cluster", use the correct key clusterKey := types.NamespacedName{Name: "cluster", Namespace: ""} - // Retry up to 3 times with exponential backoff - for attempt := 0; attempt < 3; attempt++ { + // Retry up to 5 times with exponential backoff + maxRetries := 5 + for attempt := 0; attempt < maxRetries; attempt++ { // Fetch the latest version of the object from the cluster freshOvercommit := &overcommit.Overcommit{} if err := r.Get(ctx, clusterKey, freshOvercommit); err != nil { @@ -169,97 +176,74 @@ func (r *OvercommitReconciler) updateOvercommitStatusSafely(ctx context.Context) return err } // Object not found, nothing to update + logger.V(1).Info("Overcommit object not found, skipping status update") return nil } // Try to update status using the fresh object if err := r.updateOvercommitStatus(ctx, freshOvercommit); err != nil { - if attempt == 2 { // Last attempt - logger.Error(err, "Failed to update Overcommit status after all retries") + isConflict := errors.IsConflict(err) + isLastAttempt := attempt == maxRetries-1 + + if isLastAttempt { + logger.Error(err, "Failed to update Overcommit status after all retries", "maxRetries", maxRetries) + return err + } + + if isConflict { + // Wait with exponential backoff for conflicts + backoffDuration := time.Duration(1< Date: Thu, 16 Oct 2025 13:30:21 +0200 Subject: [PATCH 2/2] fix: add migration to remove legacy finalizers from existing CRs --- .../overcommit/overcommit_controller.go | 26 +++++++++++++++++++ .../overcommitclass_controller.go | 17 ++++++++++++ 2 files changed, 43 insertions(+) diff --git a/internal/controller/overcommit/overcommit_controller.go b/internal/controller/overcommit/overcommit_controller.go index 987da5a..59a15b7 100644 --- a/internal/controller/overcommit/overcommit_controller.go +++ b/internal/controller/overcommit/overcommit_controller.go @@ -33,6 +33,7 @@ type OvercommitReconciler struct { // +kubebuilder:rbac:groups=overcommit.inditex.dev,resources=overcommits,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=overcommit.inditex.dev,resources=overcommits/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=overcommit.inditex.dev,resources=overcommits/finalizers,verbs=update // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. @@ -64,6 +65,31 @@ func (r *OvercommitReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil } + // Migration: Remove legacy finalizers from previous versions + // Previous versions of this operator added finalizers to CRs for manual cleanup. + // To ensure smooth upgrades, we automatically remove any legacy finalizers + // so that CRs created before this change can be deleted normally. + legacyFinalizers := []string{"overcommit.finalizer", "webhook.finalizer"} + finalizerRemoved := false + for _, legacyFinalizer := range legacyFinalizers { + if controllerutil.ContainsFinalizer(overcommit, legacyFinalizer) { + logger.Info("Removing legacy finalizer during migration", "finalizer", legacyFinalizer) + controllerutil.RemoveFinalizer(overcommit, legacyFinalizer) + finalizerRemoved = true + } + } + + // If we removed any legacy finalizers, update the object and requeue + if finalizerRemoved { + err = r.Update(ctx, overcommit) + if err != nil { + logger.Error(err, "Failed to remove legacy finalizers") + return ctrl.Result{}, err + } + logger.Info("Legacy finalizers removed, requeuing reconciliation") + return ctrl.Result{RequeueAfter: time.Second * 1}, nil + } + // Reconcile Issuer issuer := resources.GenerateIssuer() if issuer == nil { diff --git a/internal/controller/overcommitclass/overcommitclass_controller.go b/internal/controller/overcommitclass/overcommitclass_controller.go index cd4bca4..9d1532e 100644 --- a/internal/controller/overcommitclass/overcommitclass_controller.go +++ b/internal/controller/overcommitclass/overcommitclass_controller.go @@ -28,6 +28,7 @@ type OvercommitClassReconciler struct { // +kubebuilder:rbac:groups=overcommit.inditex.dev,resources=overcommitclasses,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=overcommit.inditex.dev,resources=overcommitclasses/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=overcommit.inditex.dev,resources=overcommitclasses/finalizers,verbs=update // +kubebuilder:rbac:groups=cert-manager.io,resources=certificates,verbs=get;list;watch;create;update;delete // +kubebuilder:rbac:groups=cert-manager.io,resources=issuers,verbs=get;list;watch;create;update;delete // +kubebuilder:rbac:groups="",resources=events,verbs=create;patch;update @@ -72,6 +73,22 @@ func (r *OvercommitClassReconciler) Reconcile(ctx context.Context, req ctrl.Requ return ctrl.Result{}, nil } + // Migration: Remove legacy finalizer from previous versions + // Previous versions of this operator added finalizers to OvercommitClass CRs for manual cleanup. + // To ensure smooth upgrades, we automatically remove any legacy finalizer + // so that CRs created before this change can be deleted normally. + if controllerutil.ContainsFinalizer(overcommitClass, "overcommitclass.finalizer") { + logger.Info("Removing legacy finalizer during migration", "finalizer", "overcommitclass.finalizer") + controllerutil.RemoveFinalizer(overcommitClass, "overcommitclass.finalizer") + err = r.Update(ctx, overcommitClass) + if err != nil { + logger.Error(err, "Failed to remove legacy finalizer") + return ctrl.Result{}, err + } + logger.Info("Legacy finalizer removed, requeuing reconciliation") + return ctrl.Result{RequeueAfter: time.Second * 1}, nil + } + // Check if the OvercommitClass has the correct owner reference overcommitResource, err := utils.GetOvercommit(ctx, r.Client) if err != nil {