diff --git a/build/crd/crunchy/generated/postgres-operator.crunchydata.com_postgresclusters.yaml b/build/crd/crunchy/generated/postgres-operator.crunchydata.com_postgresclusters.yaml index 8d1bf8dbec..9fb80ba5d2 100644 --- a/build/crd/crunchy/generated/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/build/crd/crunchy/generated/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -22972,6 +22972,12 @@ spec: properties: pgBouncer: properties: + lastFailoverPrimaryUID: + description: |- + Identifies the primary pod UID when failover signal (SIGTERM) was last triggered. + Used to detect failovers and trigger PgBouncer container restart for fresh + connection pool and DNS lookup to the correct primary. + type: string postgresRevision: description: |- Identifies the revision of PgBouncer assets that have been installed into diff --git a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml index 17a1908ccf..6bffe1b60a 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -22870,6 +22870,12 @@ spec: properties: pgBouncer: properties: + lastFailoverPrimaryUID: + description: |- + Identifies the primary pod UID when failover signal (SIGTERM) was last triggered. + Used to detect failovers and trigger PgBouncer container restart for fresh + connection pool and DNS lookup to the correct primary. + type: string postgresRevision: description: |- Identifies the revision of PgBouncer assets that have been installed into diff --git a/deploy/crd.yaml b/deploy/crd.yaml index c06debdaec..9bc5532465 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -51472,6 +51472,12 @@ spec: properties: pgBouncer: properties: + lastFailoverPrimaryUID: + description: |- + Identifies the primary pod UID when failover signal (SIGTERM) was last triggered. + Used to detect failovers and trigger PgBouncer container restart for fresh + connection pool and DNS lookup to the correct primary. + type: string postgresRevision: description: |- Identifies the revision of PgBouncer assets that have been installed into diff --git a/e2e-tests/functions b/e2e-tests/functions index f27292760d..2a100bea25 100644 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -54,6 +54,7 @@ deploy_operator() { disable_telemetry=false fi yq eval '.spec.template.spec.containers[0].image = "'${IMAGE}'"' "${DEPLOY_DIR}/${cw_prefix}operator.yaml" \ + | yq eval '.spec.template.spec.containers[0].imagePullPolicy = "IfNotPresent"' - \ | yq eval '(.spec.template.spec.containers[] | select(.name=="operator") | .env[] | select(.name=="DISABLE_TELEMETRY") | .value) = "'${disable_telemetry}'"' - \ | kubectl -n "${OPERATOR_NS:-$NAMESPACE}" apply -f - } diff --git a/e2e-tests/tests/pgbouncer-failover/00-assert.yaml b/e2e-tests/tests/pgbouncer-failover/00-assert.yaml new file mode 100644 index 0000000000..14beb86c55 --- /dev/null +++ b/e2e-tests/tests/pgbouncer-failover/00-assert.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: percona-postgresql-operator +status: + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 diff --git a/e2e-tests/tests/pgbouncer-failover/00-deploy-operator.yaml b/e2e-tests/tests/pgbouncer-failover/00-deploy-operator.yaml new file mode 100644 index 0000000000..7faf4da852 --- /dev/null +++ b/e2e-tests/tests/pgbouncer-failover/00-deploy-operator.yaml @@ -0,0 +1,14 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + init_temp_dir # do this only in the first TestStep + + deploy_operator + deploy_client + deploy_s3_secrets diff --git a/e2e-tests/tests/pgbouncer-failover/01-assert.yaml b/e2e-tests/tests/pgbouncer-failover/01-assert.yaml new file mode 100644 index 0000000000..d92e1f49c5 --- /dev/null +++ b/e2e-tests/tests/pgbouncer-failover/01-assert.yaml @@ -0,0 +1,16 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGCluster +metadata: + name: pgbouncer-failover +status: + postgres: + ready: 2 + size: 2 + pgbouncer: + ready: 2 + size: 2 + state: ready diff --git a/e2e-tests/tests/pgbouncer-failover/01-create-cluster.yaml b/e2e-tests/tests/pgbouncer-failover/01-create-cluster.yaml new file mode 100644 index 0000000000..585b620930 --- /dev/null +++ b/e2e-tests/tests/pgbouncer-failover/01-create-cluster.yaml @@ -0,0 +1,15 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + get_cr \ + | yq eval ' + .spec.proxy.pgBouncer.replicas=2 | + .spec.instances[].replicas=2' - \ + | kubectl -n "${NAMESPACE}" apply -f - diff --git a/e2e-tests/tests/pgbouncer-failover/02-assert.yaml b/e2e-tests/tests/pgbouncer-failover/02-assert.yaml new file mode 100644 index 0000000000..cd0f5a0773 --- /dev/null +++ b/e2e-tests/tests/pgbouncer-failover/02-assert.yaml @@ -0,0 +1,17 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGCluster +metadata: + name: pgbouncer-failover +status: + postgres: + ready: 2 + size: 2 + pgbouncer: + # PgBouncer should be back to ready state with 2 replicas + ready: 2 + size: 2 + state: ready diff --git a/e2e-tests/tests/pgbouncer-failover/02-trigger-switchover.yaml b/e2e-tests/tests/pgbouncer-failover/02-trigger-switchover.yaml new file mode 100644 index 0000000000..a0d47edbc6 --- /dev/null +++ b/e2e-tests/tests/pgbouncer-failover/02-trigger-switchover.yaml @@ -0,0 +1,55 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 60 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + LEADER=$(kubectl get pods -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/role=primary \ + -o jsonpath='{.items[0].metadata.name}') + + echo "Current leader: $LEADER" + + REPLICA=$(kubectl get pods -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/role=replica \ + -o jsonpath='{.items[0].metadata.name}') + + echo "Replica to promote: $REPLICA" + + kubectl get pods -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/role=pgbouncer \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' \ + > /tmp/pgbouncer-pods-before.txt + + echo "PgBouncer pods before switchover:" + cat /tmp/pgbouncer-pods-before.txt + + kubectl exec "$LEADER" -c database -n "${NAMESPACE}" -- \ + patronictl switchover \ + --leader "$LEADER" \ + --candidate "$REPLICA" \ + --force + + echo "Switchover triggered successfully" + + # Wait for PgBouncer pods to be recreated (they should be deleted and recreated) + sleep 15 + + kubectl get pods -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/role=pgbouncer \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' \ + > /tmp/pgbouncer-pods-after.txt + + echo "PgBouncer pods after switchover:" + cat /tmp/pgbouncer-pods-after.txt + + if diff /tmp/pgbouncer-pods-before.txt /tmp/pgbouncer-pods-after.txt > /dev/null; then + echo "ERROR: PgBouncer pods were NOT recreated after failover!" + exit 1 + else + echo "SUCCESS: PgBouncer pods were recreated after failover" + fi diff --git a/e2e-tests/tests/pgbouncer-failover/03-cleanup.yaml b/e2e-tests/tests/pgbouncer-failover/03-cleanup.yaml new file mode 100644 index 0000000000..c6196ff4cf --- /dev/null +++ b/e2e-tests/tests/pgbouncer-failover/03-cleanup.yaml @@ -0,0 +1,21 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +delete: + - apiVersion: pgv2.percona.com/v2 + kind: PerconaPGCluster + metadata: + name: pgbouncer-failover + - apiVersion: postgres-operator.crunchydata.com/v1beta1 + kind: PostgresCluster + metadata: + name: pgbouncer-failover +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + remove_all_finalizers + destroy_operator + timeout: 60 diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index ee616a0395..fdb61e7a14 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -262,11 +262,11 @@ func newObservedInstances( return &observed } -// writablePod looks at observedInstances and finds an instance that matches +// WritablePod looks at observedInstances and finds an instance that matches // a few conditions. The instance should be non-terminating, running, and // writable i.e. the instance with the primary. If such an instance exists, it // is returned along with the instance pod. -func (observed *observedInstances) writablePod(container string) (*corev1.Pod, *Instance) { +func (observed *observedInstances) WritablePod(container string) (*corev1.Pod, *Instance) { if observed == nil { return nil, nil } diff --git a/internal/controller/postgrescluster/instance_test.go b/internal/controller/postgrescluster/instance_test.go index c4067ad303..02631e21c1 100644 --- a/internal/controller/postgrescluster/instance_test.go +++ b/internal/controller/postgrescluster/instance_test.go @@ -381,7 +381,7 @@ func TestWritablePod(t *testing.T) { t.Run("empty observed", func(t *testing.T) { observed := &observedInstances{} - pod, instance := observed.writablePod("container") + pod, instance := observed.WritablePod("container") assert.Assert(t, pod == nil) assert.Assert(t, instance == nil) }) @@ -415,7 +415,7 @@ func TestWritablePod(t *testing.T) { terminating, known := observed.forCluster[0].IsTerminating() assert.Assert(t, terminating && known) - pod, instance := observed.writablePod("container") + pod, instance := observed.WritablePod("container") assert.Assert(t, pod == nil) assert.Assert(t, instance == nil) }) @@ -447,7 +447,7 @@ func TestWritablePod(t *testing.T) { running, known := observed.forCluster[0].IsRunning(container) assert.Check(t, !running && known) - pod, instance := observed.writablePod("container") + pod, instance := observed.WritablePod("container") assert.Assert(t, pod == nil) assert.Assert(t, instance == nil) }) @@ -480,7 +480,7 @@ func TestWritablePod(t *testing.T) { writable, known := observed.forCluster[0].IsWritable() assert.Check(t, !writable && known) - pod, instance := observed.writablePod("container") + pod, instance := observed.WritablePod("container") assert.Assert(t, pod == nil) assert.Assert(t, instance == nil) }) @@ -517,7 +517,7 @@ func TestWritablePod(t *testing.T) { running, known := observed.forCluster[0].IsRunning(container) assert.Check(t, running && known) - pod, instance := observed.writablePod("container") + pod, instance := observed.WritablePod("container") assert.Assert(t, pod != nil) assert.Assert(t, instance != nil) }) diff --git a/internal/controller/postgrescluster/pgbouncer.go b/internal/controller/postgrescluster/pgbouncer.go index 2fcd2802ff..e1aad93f1a 100644 --- a/internal/controller/postgrescluster/pgbouncer.go +++ b/internal/controller/postgrescluster/pgbouncer.go @@ -6,10 +6,11 @@ package postgrescluster import ( "context" + "errors" "fmt" "io" - "github.com/pkg/errors" + pkgerrors "github.com/pkg/errors" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" @@ -54,6 +55,12 @@ func (r *Reconciler) reconcilePGBouncer( if err == nil { err = r.reconcilePGBouncerInPostgreSQL(ctx, cluster, instances, secret) } + if err == nil { + // Stop PgBouncer pods if primary has changed, triggering graceful + // shutdown and container restart. New process will do fresh DNS lookup. + // This prevents stale connections from routing traffic to a demoted replica. + err = r.reconcilePGBouncerReconnect(ctx, cluster, instances) + } return err } @@ -71,14 +78,14 @@ func (r *Reconciler) reconcilePGBouncerConfigMap( // PgBouncer is disabled; delete the ConfigMap if it exists. Check the // client cache first using Get. key := client.ObjectKeyFromObject(configmap) - err := errors.WithStack(r.Client.Get(ctx, key, configmap)) + err := pkgerrors.WithStack(r.Client.Get(ctx, key, configmap)) if err == nil { - err = errors.WithStack(r.deleteControlled(ctx, cluster, configmap)) + err = pkgerrors.WithStack(r.deleteControlled(ctx, cluster, configmap)) } return nil, client.IgnoreNotFound(err) } - err := errors.WithStack(r.setControllerReference(cluster, configmap)) + err := pkgerrors.WithStack(r.setControllerReference(cluster, configmap)) configmap.Annotations = naming.Merge( cluster.Spec.Metadata.GetAnnotationsOrNil(), @@ -95,7 +102,7 @@ func (r *Reconciler) reconcilePGBouncerConfigMap( pgbouncer.ConfigMap(cluster, configmap) } if err == nil { - err = errors.WithStack(r.apply(ctx, configmap)) + err = pkgerrors.WithStack(r.apply(ctx, configmap)) } return configmap, err @@ -111,18 +118,9 @@ func (r *Reconciler) reconcilePGBouncerInPostgreSQL( ) error { log := logging.FromContext(ctx) - var pod *corev1.Pod - // Find the PostgreSQL instance that can execute SQL that writes to every // database. When there is none, return early. - - for _, instance := range instances.forCluster { - writable, known := instance.IsWritable() - if writable && known && len(instance.Pods) > 0 { - pod = instance.Pods[0] - break - } - } + pod, _ := instances.WritablePod(naming.ContainerDatabase) if pod == nil { return nil } @@ -140,12 +138,12 @@ func (r *Reconciler) reconcilePGBouncerInPostgreSQL( } action := func(ctx context.Context, exec postgres.Executor) error { - return errors.WithStack(pgbouncer.EnableInPostgreSQL(ctx, exec, clusterSecret, exposeSuperusers)) + return pkgerrors.WithStack(pgbouncer.EnableInPostgreSQL(ctx, exec, clusterSecret, exposeSuperusers)) } if cluster.Spec.Proxy == nil || cluster.Spec.Proxy.PGBouncer == nil { // PgBouncer is disabled. action = func(ctx context.Context, exec postgres.Executor) error { - return errors.WithStack(pgbouncer.DisableInPostgreSQL(ctx, exec)) + return pkgerrors.WithStack(pgbouncer.DisableInPostgreSQL(ctx, exec)) } } @@ -201,7 +199,7 @@ func (r *Reconciler) reconcilePGBouncerSecret( root *pki.RootCertificateAuthority, service *corev1.Service, ) (*corev1.Secret, error) { existing := &corev1.Secret{ObjectMeta: naming.ClusterPGBouncer(cluster)} - err := errors.WithStack( + err := pkgerrors.WithStack( r.Client.Get(ctx, client.ObjectKeyFromObject(existing), existing)) if client.IgnoreNotFound(err) != nil { return nil, err @@ -210,7 +208,7 @@ func (r *Reconciler) reconcilePGBouncerSecret( if cluster.Spec.Proxy == nil || cluster.Spec.Proxy.PGBouncer == nil { // PgBouncer is disabled; delete the Secret if it exists. if err == nil { - err = errors.WithStack(r.deleteControlled(ctx, cluster, existing)) + err = pkgerrors.WithStack(r.deleteControlled(ctx, cluster, existing)) } return nil, client.IgnoreNotFound(err) } @@ -224,7 +222,7 @@ func (r *Reconciler) reconcilePGBouncerSecret( // K8SPG-330: Keep this commented in case of conflicts. // We don't want to delete TLS secrets on cluster deletion. // if err == nil { - // err = errors.WithStack(r.setControllerReference(cluster, intent)) + // err = pkgerrors.WithStack(r.setControllerReference(cluster, intent)) // } intent.Annotations = naming.Merge( @@ -242,7 +240,7 @@ func (r *Reconciler) reconcilePGBouncerSecret( err = pgbouncer.Secret(ctx, cluster, root, existing, service, intent) } if err == nil { - err = errors.WithStack(r.apply(ctx, intent)) + err = pkgerrors.WithStack(r.apply(ctx, intent)) } return intent, err @@ -323,7 +321,7 @@ func (r *Reconciler) generatePGBouncerService( } service.Spec.Ports = []corev1.ServicePort{servicePort} - err := errors.WithStack(r.setControllerReference(cluster, service)) + err := pkgerrors.WithStack(r.setControllerReference(cluster, service)) return service, true, err } @@ -341,15 +339,15 @@ func (r *Reconciler) reconcilePGBouncerService( // PgBouncer is disabled; delete the Service if it exists. Check the client // cache first using Get. key := client.ObjectKeyFromObject(service) - err := errors.WithStack(r.Client.Get(ctx, key, service)) + err := pkgerrors.WithStack(r.Client.Get(ctx, key, service)) if err == nil { - err = errors.WithStack(r.deleteControlled(ctx, cluster, service)) + err = pkgerrors.WithStack(r.deleteControlled(ctx, cluster, service)) } return nil, client.IgnoreNotFound(err) } if err == nil { - err = errors.WithStack(r.apply(ctx, service)) + err = pkgerrors.WithStack(r.apply(ctx, service)) } return service, err } @@ -457,7 +455,7 @@ func (r *Reconciler) generatePGBouncerDeployment( // set the image pull secrets, if any exist deploy.Spec.Template.Spec.ImagePullSecrets = cluster.Spec.ImagePullSecrets - err := errors.WithStack(r.setControllerReference(cluster, deploy)) + err := pkgerrors.WithStack(r.setControllerReference(cluster, deploy)) if err == nil { pgbouncer.Pod(ctx, cluster, configmap, primaryCertificate, secret, &deploy.Spec.Template.Spec) @@ -512,15 +510,15 @@ func (r *Reconciler) reconcilePGBouncerDeployment( // PgBouncer is disabled; delete the Deployment if it exists. Check the // client cache first using Get. key := client.ObjectKeyFromObject(deploy) - err := errors.WithStack(r.Client.Get(ctx, key, deploy)) + err := pkgerrors.WithStack(r.Client.Get(ctx, key, deploy)) if err == nil { - err = errors.WithStack(r.deleteControlled(ctx, cluster, deploy)) + err = pkgerrors.WithStack(r.deleteControlled(ctx, cluster, deploy)) } return client.IgnoreNotFound(err) } if err == nil { - err = errors.WithStack(r.apply(ctx, deploy)) + err = pkgerrors.WithStack(r.apply(ctx, deploy)) } return err } @@ -537,9 +535,9 @@ func (r *Reconciler) reconcilePGBouncerPodDisruptionBudget( ) error { deleteExistingPDB := func(cluster *v1beta1.PostgresCluster) error { existing := &policyv1.PodDisruptionBudget{ObjectMeta: naming.ClusterPGBouncer(cluster)} - err := errors.WithStack(r.Client.Get(ctx, client.ObjectKeyFromObject(existing), existing)) + err := pkgerrors.WithStack(r.Client.Get(ctx, client.ObjectKeyFromObject(existing), existing)) if err == nil { - err = errors.WithStack(r.deleteControlled(ctx, cluster, existing)) + err = pkgerrors.WithStack(r.deleteControlled(ctx, cluster, existing)) } return client.IgnoreNotFound(err) } @@ -580,7 +578,132 @@ func (r *Reconciler) reconcilePGBouncerPodDisruptionBudget( } if err == nil { - err = errors.WithStack(r.apply(ctx, pdb)) + err = pkgerrors.WithStack(r.apply(ctx, pdb)) } return err } + +// pgbouncerPods returns a list of PgBouncer pods for the given cluster. +func (r *Reconciler) pgbouncerPods(ctx context.Context, cluster *v1beta1.PostgresCluster) (*corev1.PodList, error) { + pgbouncerPods := &corev1.PodList{} + selector, err := naming.AsSelector(naming.ClusterPGBouncerSelector(cluster)) + if err != nil { + return nil, pkgerrors.WithStack(err) + } + + if err := r.Client.List(ctx, pgbouncerPods, + client.InNamespace(cluster.Namespace), + client.MatchingLabelsSelector{Selector: selector}); err != nil { + return nil, pkgerrors.WithStack(err) + } + return pgbouncerPods, nil +} + +// reconcilePGBouncerReconnect is a sub-reconciler that deletes PgBouncer pods +// when the primary has changed. This forces PgBouncer to establish new +// server connections to the correct primary, preventing stale connections +// from routing traffic to a demoted replica after failover. +// +// Delete pod sends a SIGTERM to PgBouncer to trigger SHUTDOWN WAIT_FOR_CLIENTS [1] +// mode, waiting for clients to gracefully disconnect [2]. This approach was +// suggested by a PgBouncer maintainer [3] to deal with failovers in Kubernetes. +// +// What happens: +// 1. Kubernetes sends SIGTERM [2] to PgBouncer +// 2. PgBouncer enters SHUTDOWN WAIT_FOR_CLIENTS mode [1]. +// 3. After Kubernetes grace period (default 30s), SIGKILL is sent if process still hasn't exited +// 4. Container is terminated and restarted by Kubernetes Deployment controller. +// 5. New PgBouncer process does fresh DNS lookup → connects to current primary. +// +// This approach is more effective than RECONNECT command for session mode with persistent +// clients (MPG clusters) because RECONNECT waits for clients to disconnect, which never happens +// for persistent clients. SIGTERM will guarantee termination and restarts after a grace period. +// +// [1] https://www.pgbouncer.org/usage.html#shutdown +// [2] https://www.pgbouncer.org/usage.html#signals +// [3] https://github.com/pgbouncer/pgbouncer/issues/1361 +func (r *Reconciler) reconcilePGBouncerReconnect( + ctx context.Context, cluster *v1beta1.PostgresCluster, + instances *observedInstances, +) error { + log := logging.FromContext(ctx) + + // Skip if PgBouncer is disabled + if cluster.Spec.Proxy == nil || cluster.Spec.Proxy.PGBouncer == nil { + return nil + } + + primaryPod, _ := instances.WritablePod(naming.ContainerDatabase) + if primaryPod == nil { + // We will retry later. + log.V(1).Info("No writable instance found, skipping PgBouncer failover signal") + return nil + } + + currentPrimaryUID := string(primaryPod.UID) + lastFailoverUID := cluster.Status.Proxy.PGBouncer.LastFailoverPrimaryUID + + if currentPrimaryUID == lastFailoverUID { + // Primary hasn't changed, no need to trigger failover. + return nil + } + + if lastFailoverUID == "" { + // First time seeing this cluster or status field was just added. + // Initialize with current primary UID without triggering pod restart. + log.V(1).Info("Initializing PgBouncer failover tracking", + "currentPrimaryUID", currentPrimaryUID, + "currentPrimaryName", primaryPod.Name) + + cluster.Status.Proxy.PGBouncer.LastFailoverPrimaryUID = currentPrimaryUID + return nil + } + + log.Info("Primary changed, triggering PgBouncer failover signal (SIGTERM)", + "previousPrimaryUID", lastFailoverUID, + "currentPrimaryUID", currentPrimaryUID, + "currentPrimaryName", primaryPod.Name) + + pgbouncerPods, err := r.pgbouncerPods(ctx, cluster) + if err != nil { + return err + } + + var failoverErrs []error + successCount := 0 + + for _, pod := range pgbouncerPods.Items { + if pod.Status.Phase != corev1.PodRunning { + continue + } + + if err := r.Client.Delete(ctx, &pod); err != nil { + log.Error(err, "PgBouncer failover signal: failed to delete pod", "pod", pod.Name) + failoverErrs = append(failoverErrs, fmt.Errorf("pod %s: %w", pod.Name, err)) + } else { + log.Info("PgBouncer failover signal: deleted pod for recreation", "pod", pod.Name) + successCount++ + } + } + + // Update status only if all pods were successfully stopped. + // Partial failures will be retried in the next reconciliation loop. + if len(failoverErrs) == 0 { + cluster.Status.Proxy.PGBouncer.LastFailoverPrimaryUID = currentPrimaryUID + } + + log.Info("PgBouncer failover signal: done", + "failed", len(failoverErrs) > 0, + "successCount", successCount, + "failureCount", len(failoverErrs), + "totalPods", len(pgbouncerPods.Items), + ) + + // Return aggregated errors if any failed + if len(failoverErrs) > 0 { + return fmt.Errorf("failed to signal %d of %d pgbouncer pods: %w", + len(failoverErrs), len(pgbouncerPods.Items), errors.Join(failoverErrs...)) + } + + return nil +} diff --git a/internal/controller/postgrescluster/pgmonitor.go b/internal/controller/postgrescluster/pgmonitor.go index c8b3d400ba..935a16dd7d 100644 --- a/internal/controller/postgrescluster/pgmonitor.go +++ b/internal/controller/postgrescluster/pgmonitor.go @@ -58,7 +58,7 @@ func (r *Reconciler) reconcilePGMonitorExporter(ctx context.Context, // Find the PostgreSQL instance that can execute SQL that writes to every // database. When there is none, return early. - writablePod, writableInstance = instances.writablePod(naming.ContainerDatabase) + writablePod, writableInstance = instances.WritablePod(naming.ContainerDatabase) if writableInstance == nil || writablePod == nil { return nil } diff --git a/internal/controller/postgrescluster/postgres.go b/internal/controller/postgrescluster/postgres.go index b9a35d62d3..ada1108a2f 100644 --- a/internal/controller/postgrescluster/postgres.go +++ b/internal/controller/postgrescluster/postgres.go @@ -204,7 +204,7 @@ func (r *Reconciler) reconcilePostgresDatabases( // Find the PostgreSQL instance that can execute SQL that writes system // catalogs. When there is none, return early. - pod, _ := instances.writablePod(container) + pod, _ := instances.WritablePod(container) if pod == nil { return nil } @@ -1047,7 +1047,7 @@ func (r *Reconciler) reconcileDatabaseInitSQL(ctx context.Context, // Now that we have the data provided by the user. We can check for a // writable pod and get the podExecutor for the pod's database container var podExecutor postgres.Executor - pod, _ := instances.writablePod(naming.ContainerDatabase) + pod, _ := instances.WritablePod(naming.ContainerDatabase) if pod == nil { log.V(1).Info("Could not find a pod with a writable database container.") return nil diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgbouncer_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgbouncer_types.go index 9119cd6fe3..8a66a0a45e 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgbouncer_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgbouncer_types.go @@ -173,4 +173,10 @@ type PGBouncerPodStatus struct { // Total number of non-terminated pods. Replicas int32 `json:"replicas,omitempty"` + + // Identifies the primary pod UID when failover signal (SIGTERM) was last triggered. + // Used to detect failovers and trigger PgBouncer container restart for fresh + // connection pool and DNS lookup to the correct primary. + // +optional + LastFailoverPrimaryUID string `json:"lastFailoverPrimaryUID,omitempty"` }