From 4cea22d3d5533e5a7793dbcc1d6a6d8b76ebd45f Mon Sep 17 00:00:00 2001 From: Juliana Oliveira Date: Mon, 26 Jan 2026 16:18:49 -0300 Subject: [PATCH 1/7] wip --- .../controller/postgrescluster/pgbouncer.go | 102 ++++++++++++++++++ internal/pgbouncer/config.go | 13 ++- internal/pgbouncer/reconnect.go | 57 ++++++++++ .../v1beta1/pgbouncer_types.go | 6 ++ 4 files changed, 176 insertions(+), 2 deletions(-) create mode 100644 internal/pgbouncer/reconnect.go diff --git a/internal/controller/postgrescluster/pgbouncer.go b/internal/controller/postgrescluster/pgbouncer.go index 2fcd2802ff..b98c42113b 100644 --- a/internal/controller/postgrescluster/pgbouncer.go +++ b/internal/controller/postgrescluster/pgbouncer.go @@ -54,6 +54,11 @@ func (r *Reconciler) reconcilePGBouncer( if err == nil { err = r.reconcilePGBouncerInPostgreSQL(ctx, cluster, instances, secret) } + if err == nil { + // Trigger RECONNECT if primary has changed to force new server connections. + // This prevents stale connections from routing traffic to a demoted replica. + err = r.reconcilePGBouncerReconnect(ctx, cluster, instances) + } return err } @@ -584,3 +589,100 @@ func (r *Reconciler) reconcilePGBouncerPodDisruptionBudget( } return err } + +// reconcilePGBouncerReconnect triggers a RECONNECT command on all PgBouncer +// pods when the primary has changed. This forces PgBouncer to establish new +// server connections to the correct primary, preventing stale connections +// from routing traffic to a demoted replica after failover. +// +// Note: RECONNECT closes server connections when they are "released" according +// to the pool mode. In transaction mode, this happens after each transaction. +// In session mode, this happens when the client disconnects - so persistent +// clients may continue hitting the old primary until they reconnect. +func (r *Reconciler) reconcilePGBouncerReconnect( + ctx context.Context, cluster *v1beta1.PostgresCluster, + instances *observedInstances, +) error { + log := logging.FromContext(ctx) + + // Skip if PgBouncer is disabled + if cluster.Spec.Proxy == nil || cluster.Spec.Proxy.PGBouncer == nil { + return nil + } + + var primaryPod *corev1.Pod + for _, instance := range instances.forCluster { + // Same condition as writablePod fn + if writable, known := instance.IsWritable(); writable && known && len(instance.Pods) > 0 { + primaryPod = instance.Pods[0] + break + } + } + + if primaryPod == nil { + // We will retry later. + log.V(1).Info("No writable instance found, skipping PgBouncer RECONNECT") + return nil + } + + currentPrimaryUID := string(primaryPod.UID) + lastReconnectUID := cluster.Status.Proxy.PGBouncer.LastReconnectPrimaryUID + + if currentPrimaryUID == lastReconnectUID { + // Primary hasn't changed, no need to Reconnect. + return nil + } + + log.Info("Primary changed, triggering PgBouncer RECONNECT", + "previousPrimaryUID", lastReconnectUID, + "currentPrimaryUID", currentPrimaryUID, + "currentPrimaryName", primaryPod.Name) + + pgbouncerPods := &corev1.PodList{} + selector, err := naming.AsSelector(naming.ClusterPGBouncerSelector(cluster)) + if err != nil { + return errors.WithStack(err) + } + + if err := r.Client.List(ctx, pgbouncerPods, + client.InNamespace(cluster.Namespace), + client.MatchingLabelsSelector{Selector: selector}); err != nil { + return errors.WithStack(err) + } + + // Send RECONNECT to each running PgBouncer pod + var reconnectErr error + successCount := 0 + + for i := range pgbouncerPods.Items { + pod := &pgbouncerPods.Items[i] + if pod.Status.Phase != corev1.PodRunning { + continue + } + + exec := func(ctx context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string) error { + return r.PodExec(ctx, pod.Namespace, pod.Name, naming.ContainerPGBouncer, stdin, stdout, stderr, command...) + } + + if err := pgbouncer.Reconnect(ctx, exec); err != nil { + log.Error(err, "PgBouncer RECONNECT: failed to issue command to pod.", "pod", pod.Name) + reconnectErr = err + } else { + successCount++ + } + } + + // If we can't send a RECONNECT command to one of the pods, we won't update the LastReconnectPrimaryUID. + // This means this will run again in the next reconciliation loop. + if reconnectErr == nil { + cluster.Status.Proxy.PGBouncer.LastReconnectPrimaryUID = currentPrimaryUID + } + + log.Info("PgBouncer RECONNECT: done", + "failed", reconnectErr != nil, + "successCount", successCount, + "totalPods", len(pgbouncerPods.Items), + ) + + return reconnectErr +} diff --git a/internal/pgbouncer/config.go b/internal/pgbouncer/config.go index 3ac90b5bfe..4f4b8d7c5c 100644 --- a/internal/pgbouncer/config.go +++ b/internal/pgbouncer/config.go @@ -120,8 +120,17 @@ func clusterINI(cluster *v1beta1.PostgresCluster) string { "server_tls_sslmode": "verify-full", "server_tls_ca_file": certBackendAuthorityAbsolutePath, - // Disable Unix sockets to keep the filesystem read-only. - "unix_socket_dir": "", + // Enable Unix socket for admin console access. The special user + // "pgbouncer" can connect without a password when using a Unix socket + // from the same UID as the running process. This allows the operator + // to send admin commands like RECONNECT after failover. + // Ref.: https://www.pgbouncer.org/usage.html#admin-console + "unix_socket_dir": "/tmp/pgbouncer", + + // Allow the "pgbouncer" user to run admin commands (PAUSE, RESUME, + // RECONNECT, etc.) on the admin console. Combined with unix_socket_dir, + // this enables password-free admin access from within the container. + "admin_users": "pgbouncer", } // Override the above with any specified settings. diff --git a/internal/pgbouncer/reconnect.go b/internal/pgbouncer/reconnect.go new file mode 100644 index 0000000000..e527850d48 --- /dev/null +++ b/internal/pgbouncer/reconnect.go @@ -0,0 +1,57 @@ +package pgbouncer + +import ( + "bytes" + "context" + + "github.com/percona/percona-postgresql-operator/internal/logging" + "github.com/percona/percona-postgresql-operator/internal/postgres" +) + +// Reconnect sends the RECONNECT command to PgBouncer's admin console. +// This closes all server connections at the next opportunity, forcing +// PgBouncer to establish new connections to the current primary. +// +// From PgBouncer docs: "Close each open server connection for the given +// database, or all databases, at the next opportunity." +// +// This is non-disruptive: PgBouncer waits for the connection to be +// "released" before closing it. In transaction pooling mode, this means +// waiting for the current transaction to complete. In session pooling +// mode, this means waiting for the client to disconnect. +// +// The command connects via Unix socket as the special "pgbouncer" user, +// which is allowed without password when the client has the same UID +// as the running PgBouncer process. +// +// Ref.: https://www.pgbouncer.org/usage.html#admin-console +func Reconnect(ctx context.Context, exec postgres.Executor) error { + log := logging.FromContext(ctx) + log.Info("Triggering PgBouncer RECONNECT to force new server connections") + + stdout, stderr := &bytes.Buffer{}, &bytes.Buffer{} + + // Connect to PgBouncer admin console via Unix socket. + // The "pgbouncer" user can connect without password from same UID. + // The "pgbouncer" database is the virtual admin console. + // Ref.: + err := exec(ctx, nil, stdout, stderr, + "psql", + "-h", "/tmp/pgbouncer", + "-p", "6432", + "-U", "pgbouncer", + "-d", "pgbouncer", + "-c", "RECONNECT", + ) + + if err != nil { + log.Error(err, "RECONNECT failed", + "stdout", stdout.String(), + "stderr", stderr.String()) + } else { + log.V(1).Info("RECONNECT succeeded", + "stdout", stdout.String()) + } + + return err +} diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgbouncer_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgbouncer_types.go index 9119cd6fe3..8b2066b5b5 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgbouncer_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgbouncer_types.go @@ -173,4 +173,10 @@ type PGBouncerPodStatus struct { // Total number of non-terminated pods. Replicas int32 `json:"replicas,omitempty"` + + // Identifies the primary pod UID when RECONNECT was last triggered. + // Used to detect failovers and force PgBouncer to establish new + // server connections to the correct primary. + // +optional + LastReconnectPrimaryUID string `json:"lastReconnectPrimaryUID,omitempty"` } From f25243264d607f127b1ab622c5e78bf458ad8312 Mon Sep 17 00:00:00 2001 From: Juliana Oliveira Date: Mon, 26 Jan 2026 20:30:06 -0300 Subject: [PATCH 2/7] wip --- .../controller/postgrescluster/instance.go | 4 +- .../postgrescluster/instance_test.go | 10 +- .../controller/postgrescluster/pgbouncer.go | 105 +++++++++--------- .../controller/postgrescluster/pgmonitor.go | 2 +- .../controller/postgrescluster/postgres.go | 4 +- internal/pgbouncer/config.go | 11 -- internal/pgbouncer/reconnect.go | 57 ---------- internal/pgbouncer/signal_failover.go | 36 ++++++ .../v1beta1/pgbouncer_types.go | 8 +- 9 files changed, 102 insertions(+), 135 deletions(-) delete mode 100644 internal/pgbouncer/reconnect.go create mode 100644 internal/pgbouncer/signal_failover.go diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index ee616a0395..fdb61e7a14 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -262,11 +262,11 @@ func newObservedInstances( return &observed } -// writablePod looks at observedInstances and finds an instance that matches +// WritablePod looks at observedInstances and finds an instance that matches // a few conditions. The instance should be non-terminating, running, and // writable i.e. the instance with the primary. If such an instance exists, it // is returned along with the instance pod. -func (observed *observedInstances) writablePod(container string) (*corev1.Pod, *Instance) { +func (observed *observedInstances) WritablePod(container string) (*corev1.Pod, *Instance) { if observed == nil { return nil, nil } diff --git a/internal/controller/postgrescluster/instance_test.go b/internal/controller/postgrescluster/instance_test.go index c4067ad303..02631e21c1 100644 --- a/internal/controller/postgrescluster/instance_test.go +++ b/internal/controller/postgrescluster/instance_test.go @@ -381,7 +381,7 @@ func TestWritablePod(t *testing.T) { t.Run("empty observed", func(t *testing.T) { observed := &observedInstances{} - pod, instance := observed.writablePod("container") + pod, instance := observed.WritablePod("container") assert.Assert(t, pod == nil) assert.Assert(t, instance == nil) }) @@ -415,7 +415,7 @@ func TestWritablePod(t *testing.T) { terminating, known := observed.forCluster[0].IsTerminating() assert.Assert(t, terminating && known) - pod, instance := observed.writablePod("container") + pod, instance := observed.WritablePod("container") assert.Assert(t, pod == nil) assert.Assert(t, instance == nil) }) @@ -447,7 +447,7 @@ func TestWritablePod(t *testing.T) { running, known := observed.forCluster[0].IsRunning(container) assert.Check(t, !running && known) - pod, instance := observed.writablePod("container") + pod, instance := observed.WritablePod("container") assert.Assert(t, pod == nil) assert.Assert(t, instance == nil) }) @@ -480,7 +480,7 @@ func TestWritablePod(t *testing.T) { writable, known := observed.forCluster[0].IsWritable() assert.Check(t, !writable && known) - pod, instance := observed.writablePod("container") + pod, instance := observed.WritablePod("container") assert.Assert(t, pod == nil) assert.Assert(t, instance == nil) }) @@ -517,7 +517,7 @@ func TestWritablePod(t *testing.T) { running, known := observed.forCluster[0].IsRunning(container) assert.Check(t, running && known) - pod, instance := observed.writablePod("container") + pod, instance := observed.WritablePod("container") assert.Assert(t, pod != nil) assert.Assert(t, instance != nil) }) diff --git a/internal/controller/postgrescluster/pgbouncer.go b/internal/controller/postgrescluster/pgbouncer.go index b98c42113b..5bb798dd33 100644 --- a/internal/controller/postgrescluster/pgbouncer.go +++ b/internal/controller/postgrescluster/pgbouncer.go @@ -55,7 +55,8 @@ func (r *Reconciler) reconcilePGBouncer( err = r.reconcilePGBouncerInPostgreSQL(ctx, cluster, instances, secret) } if err == nil { - // Trigger RECONNECT if primary has changed to force new server connections. + // Send SIGTERM to PgBouncer if primary has changed, triggering graceful + // shutdown and container restart. New process will do fresh DNS lookup. // This prevents stale connections from routing traffic to a demoted replica. err = r.reconcilePGBouncerReconnect(ctx, cluster, instances) } @@ -116,18 +117,9 @@ func (r *Reconciler) reconcilePGBouncerInPostgreSQL( ) error { log := logging.FromContext(ctx) - var pod *corev1.Pod - // Find the PostgreSQL instance that can execute SQL that writes to every // database. When there is none, return early. - - for _, instance := range instances.forCluster { - writable, known := instance.IsWritable() - if writable && known && len(instance.Pods) > 0 { - pod = instance.Pods[0] - break - } - } + pod, _ := instances.WritablePod(naming.ContainerDatabase) if pod == nil { return nil } @@ -590,8 +582,24 @@ func (r *Reconciler) reconcilePGBouncerPodDisruptionBudget( return err } -// reconcilePGBouncerReconnect triggers a RECONNECT command on all PgBouncer -// pods when the primary has changed. This forces PgBouncer to establish new +// pgbouncerPods returns a list of PgBouncer pods for the given cluster. +func (r *Reconciler) pgbouncerPods(ctx context.Context, cluster *v1beta1.PostgresCluster) (*corev1.PodList, error) { + pgbouncerPods := &corev1.PodList{} + selector, err := naming.AsSelector(naming.ClusterPGBouncerSelector(cluster)) + if err != nil { + return nil, errors.WithStack(err) + } + + if err := r.Client.List(ctx, pgbouncerPods, + client.InNamespace(cluster.Namespace), + client.MatchingLabelsSelector{Selector: selector}); err != nil { + return nil, errors.WithStack(err) + } + return pgbouncerPods, nil +} + +// reconcilePGBouncerReconnect is a sub-reconciler that signals PgBouncer pods +// when the primary has changed. This forces PgBouncer to establish new // server connections to the correct primary, preventing stale connections // from routing traffic to a demoted replica after failover. // @@ -599,6 +607,7 @@ func (r *Reconciler) reconcilePGBouncerPodDisruptionBudget( // to the pool mode. In transaction mode, this happens after each transaction. // In session mode, this happens when the client disconnects - so persistent // clients may continue hitting the old primary until they reconnect. +// It returns error for integration with the parent reconciler's error handling chain. func (r *Reconciler) reconcilePGBouncerReconnect( ctx context.Context, cluster *v1beta1.PostgresCluster, instances *observedInstances, @@ -610,79 +619,69 @@ func (r *Reconciler) reconcilePGBouncerReconnect( return nil } - var primaryPod *corev1.Pod - for _, instance := range instances.forCluster { - // Same condition as writablePod fn - if writable, known := instance.IsWritable(); writable && known && len(instance.Pods) > 0 { - primaryPod = instance.Pods[0] - break - } - } - + primaryPod, _ := instances.WritablePod(naming.ContainerDatabase) if primaryPod == nil { // We will retry later. - log.V(1).Info("No writable instance found, skipping PgBouncer RECONNECT") + log.V(1).Info("No writable instance found, skipping PgBouncer failover signal") return nil } currentPrimaryUID := string(primaryPod.UID) - lastReconnectUID := cluster.Status.Proxy.PGBouncer.LastReconnectPrimaryUID + lastFailoverUID := cluster.Status.Proxy.PGBouncer.LastFailoverPrimaryUID - if currentPrimaryUID == lastReconnectUID { - // Primary hasn't changed, no need to Reconnect. + if currentPrimaryUID == lastFailoverUID { + // Primary hasn't changed, no need to trigger failover. return nil } - log.Info("Primary changed, triggering PgBouncer RECONNECT", - "previousPrimaryUID", lastReconnectUID, + log.Info("Primary changed, triggering PgBouncer failover signal (SIGTERM)", + "previousPrimaryUID", lastFailoverUID, "currentPrimaryUID", currentPrimaryUID, "currentPrimaryName", primaryPod.Name) - pgbouncerPods := &corev1.PodList{} - selector, err := naming.AsSelector(naming.ClusterPGBouncerSelector(cluster)) + pgbouncerPods, err := r.pgbouncerPods(ctx, cluster) if err != nil { - return errors.WithStack(err) - } - - if err := r.Client.List(ctx, pgbouncerPods, - client.InNamespace(cluster.Namespace), - client.MatchingLabelsSelector{Selector: selector}); err != nil { - return errors.WithStack(err) + return err } - // Send RECONNECT to each running PgBouncer pod - var reconnectErr error + // Send SIGTERM to each running PgBouncer pod to trigger graceful shutdown + // and container restart. New PgBouncer process will do fresh DNS lookup. + var failoverErrs []error successCount := 0 for i := range pgbouncerPods.Items { - pod := &pgbouncerPods.Items[i] + pod := pgbouncerPods.Items[i] // Copy value to avoid closure reference issues if pod.Status.Phase != corev1.PodRunning { continue } - exec := func(ctx context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string) error { + if err := pgbouncer.SignalFailover(ctx, func(ctx context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string) error { return r.PodExec(ctx, pod.Namespace, pod.Name, naming.ContainerPGBouncer, stdin, stdout, stderr, command...) - } - - if err := pgbouncer.Reconnect(ctx, exec); err != nil { - log.Error(err, "PgBouncer RECONNECT: failed to issue command to pod.", "pod", pod.Name) - reconnectErr = err + }); err != nil { + log.Error(err, "PgBouncer failover signal: failed to send SIGTERM to pod", "pod", pod.Name) + failoverErrs = append(failoverErrs, fmt.Errorf("pod %s: %w", pod.Name, err)) } else { successCount++ } } - // If we can't send a RECONNECT command to one of the pods, we won't update the LastReconnectPrimaryUID. - // This means this will run again in the next reconciliation loop. - if reconnectErr == nil { - cluster.Status.Proxy.PGBouncer.LastReconnectPrimaryUID = currentPrimaryUID + // Update status only if all pods were successfully signaled. + // Partial failures will be retried in the next reconciliation loop. + if len(failoverErrs) == 0 { + cluster.Status.Proxy.PGBouncer.LastFailoverPrimaryUID = currentPrimaryUID } - log.Info("PgBouncer RECONNECT: done", - "failed", reconnectErr != nil, + log.Info("PgBouncer failover signal: done", + "failed", len(failoverErrs) > 0, "successCount", successCount, + "failureCount", len(failoverErrs), "totalPods", len(pgbouncerPods.Items), ) - return reconnectErr + // Return aggregated errors if any pods failed + if len(failoverErrs) > 0 { + return fmt.Errorf("failed to signal %d of %d pgbouncer pods: %w", + len(failoverErrs), len(pgbouncerPods.Items), failoverErrs[0]) + } + return nil } diff --git a/internal/controller/postgrescluster/pgmonitor.go b/internal/controller/postgrescluster/pgmonitor.go index c8b3d400ba..935a16dd7d 100644 --- a/internal/controller/postgrescluster/pgmonitor.go +++ b/internal/controller/postgrescluster/pgmonitor.go @@ -58,7 +58,7 @@ func (r *Reconciler) reconcilePGMonitorExporter(ctx context.Context, // Find the PostgreSQL instance that can execute SQL that writes to every // database. When there is none, return early. - writablePod, writableInstance = instances.writablePod(naming.ContainerDatabase) + writablePod, writableInstance = instances.WritablePod(naming.ContainerDatabase) if writableInstance == nil || writablePod == nil { return nil } diff --git a/internal/controller/postgrescluster/postgres.go b/internal/controller/postgrescluster/postgres.go index b9a35d62d3..ada1108a2f 100644 --- a/internal/controller/postgrescluster/postgres.go +++ b/internal/controller/postgrescluster/postgres.go @@ -204,7 +204,7 @@ func (r *Reconciler) reconcilePostgresDatabases( // Find the PostgreSQL instance that can execute SQL that writes system // catalogs. When there is none, return early. - pod, _ := instances.writablePod(container) + pod, _ := instances.WritablePod(container) if pod == nil { return nil } @@ -1047,7 +1047,7 @@ func (r *Reconciler) reconcileDatabaseInitSQL(ctx context.Context, // Now that we have the data provided by the user. We can check for a // writable pod and get the podExecutor for the pod's database container var podExecutor postgres.Executor - pod, _ := instances.writablePod(naming.ContainerDatabase) + pod, _ := instances.WritablePod(naming.ContainerDatabase) if pod == nil { log.V(1).Info("Could not find a pod with a writable database container.") return nil diff --git a/internal/pgbouncer/config.go b/internal/pgbouncer/config.go index 4f4b8d7c5c..4b6a4afb08 100644 --- a/internal/pgbouncer/config.go +++ b/internal/pgbouncer/config.go @@ -120,17 +120,6 @@ func clusterINI(cluster *v1beta1.PostgresCluster) string { "server_tls_sslmode": "verify-full", "server_tls_ca_file": certBackendAuthorityAbsolutePath, - // Enable Unix socket for admin console access. The special user - // "pgbouncer" can connect without a password when using a Unix socket - // from the same UID as the running process. This allows the operator - // to send admin commands like RECONNECT after failover. - // Ref.: https://www.pgbouncer.org/usage.html#admin-console - "unix_socket_dir": "/tmp/pgbouncer", - - // Allow the "pgbouncer" user to run admin commands (PAUSE, RESUME, - // RECONNECT, etc.) on the admin console. Combined with unix_socket_dir, - // this enables password-free admin access from within the container. - "admin_users": "pgbouncer", } // Override the above with any specified settings. diff --git a/internal/pgbouncer/reconnect.go b/internal/pgbouncer/reconnect.go deleted file mode 100644 index e527850d48..0000000000 --- a/internal/pgbouncer/reconnect.go +++ /dev/null @@ -1,57 +0,0 @@ -package pgbouncer - -import ( - "bytes" - "context" - - "github.com/percona/percona-postgresql-operator/internal/logging" - "github.com/percona/percona-postgresql-operator/internal/postgres" -) - -// Reconnect sends the RECONNECT command to PgBouncer's admin console. -// This closes all server connections at the next opportunity, forcing -// PgBouncer to establish new connections to the current primary. -// -// From PgBouncer docs: "Close each open server connection for the given -// database, or all databases, at the next opportunity." -// -// This is non-disruptive: PgBouncer waits for the connection to be -// "released" before closing it. In transaction pooling mode, this means -// waiting for the current transaction to complete. In session pooling -// mode, this means waiting for the client to disconnect. -// -// The command connects via Unix socket as the special "pgbouncer" user, -// which is allowed without password when the client has the same UID -// as the running PgBouncer process. -// -// Ref.: https://www.pgbouncer.org/usage.html#admin-console -func Reconnect(ctx context.Context, exec postgres.Executor) error { - log := logging.FromContext(ctx) - log.Info("Triggering PgBouncer RECONNECT to force new server connections") - - stdout, stderr := &bytes.Buffer{}, &bytes.Buffer{} - - // Connect to PgBouncer admin console via Unix socket. - // The "pgbouncer" user can connect without password from same UID. - // The "pgbouncer" database is the virtual admin console. - // Ref.: - err := exec(ctx, nil, stdout, stderr, - "psql", - "-h", "/tmp/pgbouncer", - "-p", "6432", - "-U", "pgbouncer", - "-d", "pgbouncer", - "-c", "RECONNECT", - ) - - if err != nil { - log.Error(err, "RECONNECT failed", - "stdout", stdout.String(), - "stderr", stderr.String()) - } else { - log.V(1).Info("RECONNECT succeeded", - "stdout", stdout.String()) - } - - return err -} diff --git a/internal/pgbouncer/signal_failover.go b/internal/pgbouncer/signal_failover.go new file mode 100644 index 0000000000..9e7d855c3b --- /dev/null +++ b/internal/pgbouncer/signal_failover.go @@ -0,0 +1,36 @@ +package pgbouncer + +import ( + "context" + + "github.com/percona/percona-postgresql-operator/internal/logging" + "github.com/percona/percona-postgresql-operator/internal/postgres" +) + +// SignalFailover sends a SIGTERM to PgBouncer to trigger SHUTDOWN WAIT_FOR_CLIENTS [1] +// mode, waiting for clients to gracefully disconnect [2]. This approach was +// suggested by a PgBouncer maintainer [3] to deal with failovers in Kubernetes. +// +// What happens: +// 1. Operator sends SIGTERM [2] to PgBouncer process (PID 1 in container) +// 2. PgBouncer enters SHUTDOWN WAIT_FOR_CLIENTS mode [1]. +// 3. After Kubernetes grace period (default 30s), SIGKILL is sent if process still hasn't exited +// 4. Container is terminated and restarted by Kubernetes Deployment controller. +// 5. New PgBouncer process does fresh DNS lookup → connects to current primary. +// +// This approach is more effective than RECONNECT command for session mode with persistent +// clients (MPG clusters) because RECONNECT waits for clients to disconnect, which never happens +// for persistent clients. SIGTERM will guarantee termination and restarts after a grace period. +// +// [1] https://www.pgbouncer.org/usage.html#shutdown +// [2] https://www.pgbouncer.org/usage.html#signals +// [3] https://github.com/pgbouncer/pgbouncer/issues/1361 +func SignalFailover(ctx context.Context, exec postgres.Executor) error { + log := logging.FromContext(ctx) + log.Info("SignalFailover: sending SIGTERM to force container restart") + + err := exec(ctx, nil, nil, nil, "kill", "-TERM", "1") + + log.Info("SignalFailover: SIGTERM sent.", "failed", err != nil) + return err +} diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgbouncer_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgbouncer_types.go index 8b2066b5b5..8a66a0a45e 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgbouncer_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgbouncer_types.go @@ -174,9 +174,9 @@ type PGBouncerPodStatus struct { // Total number of non-terminated pods. Replicas int32 `json:"replicas,omitempty"` - // Identifies the primary pod UID when RECONNECT was last triggered. - // Used to detect failovers and force PgBouncer to establish new - // server connections to the correct primary. + // Identifies the primary pod UID when failover signal (SIGTERM) was last triggered. + // Used to detect failovers and trigger PgBouncer container restart for fresh + // connection pool and DNS lookup to the correct primary. // +optional - LastReconnectPrimaryUID string `json:"lastReconnectPrimaryUID,omitempty"` + LastFailoverPrimaryUID string `json:"lastFailoverPrimaryUID,omitempty"` } From 9ef6137d106b120700acf1486061cfeeb9f23adc Mon Sep 17 00:00:00 2001 From: Juliana Oliveira Date: Mon, 26 Jan 2026 20:35:11 -0300 Subject: [PATCH 3/7] wip --- internal/pgbouncer/config.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/pgbouncer/config.go b/internal/pgbouncer/config.go index 4b6a4afb08..3ac90b5bfe 100644 --- a/internal/pgbouncer/config.go +++ b/internal/pgbouncer/config.go @@ -120,6 +120,8 @@ func clusterINI(cluster *v1beta1.PostgresCluster) string { "server_tls_sslmode": "verify-full", "server_tls_ca_file": certBackendAuthorityAbsolutePath, + // Disable Unix sockets to keep the filesystem read-only. + "unix_socket_dir": "", } // Override the above with any specified settings. From 6c7ebcdb93b8be226720c8051a3f2b78bf589922 Mon Sep 17 00:00:00 2001 From: Juliana Oliveira Date: Mon, 26 Jan 2026 22:50:09 -0300 Subject: [PATCH 4/7] wip --- ...ator.crunchydata.com_postgresclusters.yaml | 6 ++ ...ator.crunchydata.com_postgresclusters.yaml | 6 ++ deploy/crd.yaml | 6 ++ .../controller/postgrescluster/pgbouncer.go | 100 ++++++++++-------- internal/pgbouncer/signal_failover.go | 36 ------- 5 files changed, 76 insertions(+), 78 deletions(-) delete mode 100644 internal/pgbouncer/signal_failover.go diff --git a/build/crd/crunchy/generated/postgres-operator.crunchydata.com_postgresclusters.yaml b/build/crd/crunchy/generated/postgres-operator.crunchydata.com_postgresclusters.yaml index 8d1bf8dbec..9fb80ba5d2 100644 --- a/build/crd/crunchy/generated/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/build/crd/crunchy/generated/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -22972,6 +22972,12 @@ spec: properties: pgBouncer: properties: + lastFailoverPrimaryUID: + description: |- + Identifies the primary pod UID when failover signal (SIGTERM) was last triggered. + Used to detect failovers and trigger PgBouncer container restart for fresh + connection pool and DNS lookup to the correct primary. + type: string postgresRevision: description: |- Identifies the revision of PgBouncer assets that have been installed into diff --git a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml index 17a1908ccf..6bffe1b60a 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -22870,6 +22870,12 @@ spec: properties: pgBouncer: properties: + lastFailoverPrimaryUID: + description: |- + Identifies the primary pod UID when failover signal (SIGTERM) was last triggered. + Used to detect failovers and trigger PgBouncer container restart for fresh + connection pool and DNS lookup to the correct primary. + type: string postgresRevision: description: |- Identifies the revision of PgBouncer assets that have been installed into diff --git a/deploy/crd.yaml b/deploy/crd.yaml index c06debdaec..9bc5532465 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -51472,6 +51472,12 @@ spec: properties: pgBouncer: properties: + lastFailoverPrimaryUID: + description: |- + Identifies the primary pod UID when failover signal (SIGTERM) was last triggered. + Used to detect failovers and trigger PgBouncer container restart for fresh + connection pool and DNS lookup to the correct primary. + type: string postgresRevision: description: |- Identifies the revision of PgBouncer assets that have been installed into diff --git a/internal/controller/postgrescluster/pgbouncer.go b/internal/controller/postgrescluster/pgbouncer.go index 5bb798dd33..7b9e2c9b75 100644 --- a/internal/controller/postgrescluster/pgbouncer.go +++ b/internal/controller/postgrescluster/pgbouncer.go @@ -6,10 +6,11 @@ package postgrescluster import ( "context" + "errors" "fmt" "io" - "github.com/pkg/errors" + pkgerrors "github.com/pkg/errors" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" @@ -55,7 +56,7 @@ func (r *Reconciler) reconcilePGBouncer( err = r.reconcilePGBouncerInPostgreSQL(ctx, cluster, instances, secret) } if err == nil { - // Send SIGTERM to PgBouncer if primary has changed, triggering graceful + // Stop PgBouncer pods if primary has changed, triggering graceful // shutdown and container restart. New process will do fresh DNS lookup. // This prevents stale connections from routing traffic to a demoted replica. err = r.reconcilePGBouncerReconnect(ctx, cluster, instances) @@ -77,14 +78,14 @@ func (r *Reconciler) reconcilePGBouncerConfigMap( // PgBouncer is disabled; delete the ConfigMap if it exists. Check the // client cache first using Get. key := client.ObjectKeyFromObject(configmap) - err := errors.WithStack(r.Client.Get(ctx, key, configmap)) + err := pkgerrors.WithStack(r.Client.Get(ctx, key, configmap)) if err == nil { - err = errors.WithStack(r.deleteControlled(ctx, cluster, configmap)) + err = pkgerrors.WithStack(r.deleteControlled(ctx, cluster, configmap)) } return nil, client.IgnoreNotFound(err) } - err := errors.WithStack(r.setControllerReference(cluster, configmap)) + err := pkgerrors.WithStack(r.setControllerReference(cluster, configmap)) configmap.Annotations = naming.Merge( cluster.Spec.Metadata.GetAnnotationsOrNil(), @@ -101,7 +102,7 @@ func (r *Reconciler) reconcilePGBouncerConfigMap( pgbouncer.ConfigMap(cluster, configmap) } if err == nil { - err = errors.WithStack(r.apply(ctx, configmap)) + err = pkgerrors.WithStack(r.apply(ctx, configmap)) } return configmap, err @@ -137,12 +138,12 @@ func (r *Reconciler) reconcilePGBouncerInPostgreSQL( } action := func(ctx context.Context, exec postgres.Executor) error { - return errors.WithStack(pgbouncer.EnableInPostgreSQL(ctx, exec, clusterSecret, exposeSuperusers)) + return pkgerrors.WithStack(pgbouncer.EnableInPostgreSQL(ctx, exec, clusterSecret, exposeSuperusers)) } if cluster.Spec.Proxy == nil || cluster.Spec.Proxy.PGBouncer == nil { // PgBouncer is disabled. action = func(ctx context.Context, exec postgres.Executor) error { - return errors.WithStack(pgbouncer.DisableInPostgreSQL(ctx, exec)) + return pkgerrors.WithStack(pgbouncer.DisableInPostgreSQL(ctx, exec)) } } @@ -198,7 +199,7 @@ func (r *Reconciler) reconcilePGBouncerSecret( root *pki.RootCertificateAuthority, service *corev1.Service, ) (*corev1.Secret, error) { existing := &corev1.Secret{ObjectMeta: naming.ClusterPGBouncer(cluster)} - err := errors.WithStack( + err := pkgerrors.WithStack( r.Client.Get(ctx, client.ObjectKeyFromObject(existing), existing)) if client.IgnoreNotFound(err) != nil { return nil, err @@ -207,7 +208,7 @@ func (r *Reconciler) reconcilePGBouncerSecret( if cluster.Spec.Proxy == nil || cluster.Spec.Proxy.PGBouncer == nil { // PgBouncer is disabled; delete the Secret if it exists. if err == nil { - err = errors.WithStack(r.deleteControlled(ctx, cluster, existing)) + err = pkgerrors.WithStack(r.deleteControlled(ctx, cluster, existing)) } return nil, client.IgnoreNotFound(err) } @@ -221,7 +222,7 @@ func (r *Reconciler) reconcilePGBouncerSecret( // K8SPG-330: Keep this commented in case of conflicts. // We don't want to delete TLS secrets on cluster deletion. // if err == nil { - // err = errors.WithStack(r.setControllerReference(cluster, intent)) + // err = pkgerrors.WithStack(r.setControllerReference(cluster, intent)) // } intent.Annotations = naming.Merge( @@ -239,7 +240,7 @@ func (r *Reconciler) reconcilePGBouncerSecret( err = pgbouncer.Secret(ctx, cluster, root, existing, service, intent) } if err == nil { - err = errors.WithStack(r.apply(ctx, intent)) + err = pkgerrors.WithStack(r.apply(ctx, intent)) } return intent, err @@ -320,7 +321,7 @@ func (r *Reconciler) generatePGBouncerService( } service.Spec.Ports = []corev1.ServicePort{servicePort} - err := errors.WithStack(r.setControllerReference(cluster, service)) + err := pkgerrors.WithStack(r.setControllerReference(cluster, service)) return service, true, err } @@ -338,15 +339,15 @@ func (r *Reconciler) reconcilePGBouncerService( // PgBouncer is disabled; delete the Service if it exists. Check the client // cache first using Get. key := client.ObjectKeyFromObject(service) - err := errors.WithStack(r.Client.Get(ctx, key, service)) + err := pkgerrors.WithStack(r.Client.Get(ctx, key, service)) if err == nil { - err = errors.WithStack(r.deleteControlled(ctx, cluster, service)) + err = pkgerrors.WithStack(r.deleteControlled(ctx, cluster, service)) } return nil, client.IgnoreNotFound(err) } if err == nil { - err = errors.WithStack(r.apply(ctx, service)) + err = pkgerrors.WithStack(r.apply(ctx, service)) } return service, err } @@ -454,7 +455,7 @@ func (r *Reconciler) generatePGBouncerDeployment( // set the image pull secrets, if any exist deploy.Spec.Template.Spec.ImagePullSecrets = cluster.Spec.ImagePullSecrets - err := errors.WithStack(r.setControllerReference(cluster, deploy)) + err := pkgerrors.WithStack(r.setControllerReference(cluster, deploy)) if err == nil { pgbouncer.Pod(ctx, cluster, configmap, primaryCertificate, secret, &deploy.Spec.Template.Spec) @@ -509,15 +510,15 @@ func (r *Reconciler) reconcilePGBouncerDeployment( // PgBouncer is disabled; delete the Deployment if it exists. Check the // client cache first using Get. key := client.ObjectKeyFromObject(deploy) - err := errors.WithStack(r.Client.Get(ctx, key, deploy)) + err := pkgerrors.WithStack(r.Client.Get(ctx, key, deploy)) if err == nil { - err = errors.WithStack(r.deleteControlled(ctx, cluster, deploy)) + err = pkgerrors.WithStack(r.deleteControlled(ctx, cluster, deploy)) } return client.IgnoreNotFound(err) } if err == nil { - err = errors.WithStack(r.apply(ctx, deploy)) + err = pkgerrors.WithStack(r.apply(ctx, deploy)) } return err } @@ -534,9 +535,9 @@ func (r *Reconciler) reconcilePGBouncerPodDisruptionBudget( ) error { deleteExistingPDB := func(cluster *v1beta1.PostgresCluster) error { existing := &policyv1.PodDisruptionBudget{ObjectMeta: naming.ClusterPGBouncer(cluster)} - err := errors.WithStack(r.Client.Get(ctx, client.ObjectKeyFromObject(existing), existing)) + err := pkgerrors.WithStack(r.Client.Get(ctx, client.ObjectKeyFromObject(existing), existing)) if err == nil { - err = errors.WithStack(r.deleteControlled(ctx, cluster, existing)) + err = pkgerrors.WithStack(r.deleteControlled(ctx, cluster, existing)) } return client.IgnoreNotFound(err) } @@ -577,7 +578,7 @@ func (r *Reconciler) reconcilePGBouncerPodDisruptionBudget( } if err == nil { - err = errors.WithStack(r.apply(ctx, pdb)) + err = pkgerrors.WithStack(r.apply(ctx, pdb)) } return err } @@ -587,27 +588,45 @@ func (r *Reconciler) pgbouncerPods(ctx context.Context, cluster *v1beta1.Postgre pgbouncerPods := &corev1.PodList{} selector, err := naming.AsSelector(naming.ClusterPGBouncerSelector(cluster)) if err != nil { - return nil, errors.WithStack(err) + return nil, pkgerrors.WithStack(err) } if err := r.Client.List(ctx, pgbouncerPods, client.InNamespace(cluster.Namespace), client.MatchingLabelsSelector{Selector: selector}); err != nil { - return nil, errors.WithStack(err) + return nil, pkgerrors.WithStack(err) } return pgbouncerPods, nil } -// reconcilePGBouncerReconnect is a sub-reconciler that signals PgBouncer pods +// reconcilePGBouncerReconnect is a sub-reconciler that deletes PgBouncer pods // when the primary has changed. This forces PgBouncer to establish new // server connections to the correct primary, preventing stale connections // from routing traffic to a demoted replica after failover. // -// Note: RECONNECT closes server connections when they are "released" according -// to the pool mode. In transaction mode, this happens after each transaction. -// In session mode, this happens when the client disconnects - so persistent -// clients may continue hitting the old primary until they reconnect. -// It returns error for integration with the parent reconciler's error handling chain. +// Delete pod sends a SIGTERM to PgBouncer to trigger SHUTDOWN WAIT_FOR_CLIENTS [1] +// mode, waiting for clients to gracefully disconnect [2]. This approach was +// suggested by a PgBouncer maintainer [3] to deal with failovers in Kubernetes. +// +// What happens: +// 1. Kubernetes sends SIGTERM [2] to PgBouncer +// 2. PgBouncer enters SHUTDOWN WAIT_FOR_CLIENTS mode [1]. +// 3. After Kubernetes grace period (default 30s), SIGKILL is sent if process still hasn't exited +// 4. Container is terminated and restarted by Kubernetes Deployment controller. +// 5. New PgBouncer process does fresh DNS lookup → connects to current primary. +// +// This approach is more effective than RECONNECT command for session mode with persistent +// clients (MPG clusters) because RECONNECT waits for clients to disconnect, which never happens +// for persistent clients. SIGTERM will guarantee termination and restarts after a grace period. +// +// [1] https://www.pgbouncer.org/usage.html#shutdown +// [2] https://www.pgbouncer.org/usage.html#signals +// [3] https://github.com/pgbouncer/pgbouncer/issues/1361 +// +// Note: We don't use RECONNECT because it closes server connections when they are "released" +// according to the pool mode. In transaction mode, this happens after each transaction. In session +// mode, this happens when the client disconnects - so persistent clients may continue hitting +// the old primary until they reconnect. func (r *Reconciler) reconcilePGBouncerReconnect( ctx context.Context, cluster *v1beta1.PostgresCluster, instances *observedInstances, @@ -644,28 +663,24 @@ func (r *Reconciler) reconcilePGBouncerReconnect( return err } - // Send SIGTERM to each running PgBouncer pod to trigger graceful shutdown - // and container restart. New PgBouncer process will do fresh DNS lookup. var failoverErrs []error successCount := 0 - for i := range pgbouncerPods.Items { - pod := pgbouncerPods.Items[i] // Copy value to avoid closure reference issues + for _, pod := range pgbouncerPods.Items { if pod.Status.Phase != corev1.PodRunning { continue } - if err := pgbouncer.SignalFailover(ctx, func(ctx context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string) error { - return r.PodExec(ctx, pod.Namespace, pod.Name, naming.ContainerPGBouncer, stdin, stdout, stderr, command...) - }); err != nil { - log.Error(err, "PgBouncer failover signal: failed to send SIGTERM to pod", "pod", pod.Name) + if err := r.Client.Delete(ctx, &pod); err != nil { + log.Error(err, "PgBouncer failover signal: failed to delete pod", "pod", pod.Name) failoverErrs = append(failoverErrs, fmt.Errorf("pod %s: %w", pod.Name, err)) } else { + log.Info("PgBouncer failover signal: deleted pod for recreation", "pod", pod.Name) successCount++ } } - // Update status only if all pods were successfully signaled. + // Update status only if all pods were successfully stopped. // Partial failures will be retried in the next reconciliation loop. if len(failoverErrs) == 0 { cluster.Status.Proxy.PGBouncer.LastFailoverPrimaryUID = currentPrimaryUID @@ -678,10 +693,11 @@ func (r *Reconciler) reconcilePGBouncerReconnect( "totalPods", len(pgbouncerPods.Items), ) - // Return aggregated errors if any pods failed + // Return aggregated errors if any failed if len(failoverErrs) > 0 { return fmt.Errorf("failed to signal %d of %d pgbouncer pods: %w", - len(failoverErrs), len(pgbouncerPods.Items), failoverErrs[0]) + len(failoverErrs), len(pgbouncerPods.Items), errors.Join(failoverErrs...)) } + return nil } diff --git a/internal/pgbouncer/signal_failover.go b/internal/pgbouncer/signal_failover.go deleted file mode 100644 index 9e7d855c3b..0000000000 --- a/internal/pgbouncer/signal_failover.go +++ /dev/null @@ -1,36 +0,0 @@ -package pgbouncer - -import ( - "context" - - "github.com/percona/percona-postgresql-operator/internal/logging" - "github.com/percona/percona-postgresql-operator/internal/postgres" -) - -// SignalFailover sends a SIGTERM to PgBouncer to trigger SHUTDOWN WAIT_FOR_CLIENTS [1] -// mode, waiting for clients to gracefully disconnect [2]. This approach was -// suggested by a PgBouncer maintainer [3] to deal with failovers in Kubernetes. -// -// What happens: -// 1. Operator sends SIGTERM [2] to PgBouncer process (PID 1 in container) -// 2. PgBouncer enters SHUTDOWN WAIT_FOR_CLIENTS mode [1]. -// 3. After Kubernetes grace period (default 30s), SIGKILL is sent if process still hasn't exited -// 4. Container is terminated and restarted by Kubernetes Deployment controller. -// 5. New PgBouncer process does fresh DNS lookup → connects to current primary. -// -// This approach is more effective than RECONNECT command for session mode with persistent -// clients (MPG clusters) because RECONNECT waits for clients to disconnect, which never happens -// for persistent clients. SIGTERM will guarantee termination and restarts after a grace period. -// -// [1] https://www.pgbouncer.org/usage.html#shutdown -// [2] https://www.pgbouncer.org/usage.html#signals -// [3] https://github.com/pgbouncer/pgbouncer/issues/1361 -func SignalFailover(ctx context.Context, exec postgres.Executor) error { - log := logging.FromContext(ctx) - log.Info("SignalFailover: sending SIGTERM to force container restart") - - err := exec(ctx, nil, nil, nil, "kill", "-TERM", "1") - - log.Info("SignalFailover: SIGTERM sent.", "failed", err != nil) - return err -} From 9a9eca73f8b4a36c8680ff62fb5d5aa0a31d237f Mon Sep 17 00:00:00 2001 From: Juliana Oliveira Date: Mon, 26 Jan 2026 22:53:31 -0300 Subject: [PATCH 5/7] wip --- internal/controller/postgrescluster/pgbouncer.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/internal/controller/postgrescluster/pgbouncer.go b/internal/controller/postgrescluster/pgbouncer.go index 7b9e2c9b75..e9cadd74f6 100644 --- a/internal/controller/postgrescluster/pgbouncer.go +++ b/internal/controller/postgrescluster/pgbouncer.go @@ -622,11 +622,6 @@ func (r *Reconciler) pgbouncerPods(ctx context.Context, cluster *v1beta1.Postgre // [1] https://www.pgbouncer.org/usage.html#shutdown // [2] https://www.pgbouncer.org/usage.html#signals // [3] https://github.com/pgbouncer/pgbouncer/issues/1361 -// -// Note: We don't use RECONNECT because it closes server connections when they are "released" -// according to the pool mode. In transaction mode, this happens after each transaction. In session -// mode, this happens when the client disconnects - so persistent clients may continue hitting -// the old primary until they reconnect. func (r *Reconciler) reconcilePGBouncerReconnect( ctx context.Context, cluster *v1beta1.PostgresCluster, instances *observedInstances, From 929a98d57c77beec0d9a9d54062f67c3f54f9dd4 Mon Sep 17 00:00:00 2001 From: Juliana Oliveira Date: Tue, 27 Jan 2026 14:51:13 -0300 Subject: [PATCH 6/7] test --- e2e-tests/functions | 1 + .../tests/pgbouncer-failover/00-assert.yaml | 12 ++++ .../00-deploy-operator.yaml | 14 +++++ .../tests/pgbouncer-failover/01-assert.yaml | 16 ++++++ .../pgbouncer-failover/01-create-cluster.yaml | 15 +++++ .../tests/pgbouncer-failover/02-assert.yaml | 17 ++++++ .../02-trigger-switchover.yaml | 55 +++++++++++++++++++ .../tests/pgbouncer-failover/03-cleanup.yaml | 21 +++++++ 8 files changed, 151 insertions(+) create mode 100644 e2e-tests/tests/pgbouncer-failover/00-assert.yaml create mode 100644 e2e-tests/tests/pgbouncer-failover/00-deploy-operator.yaml create mode 100644 e2e-tests/tests/pgbouncer-failover/01-assert.yaml create mode 100644 e2e-tests/tests/pgbouncer-failover/01-create-cluster.yaml create mode 100644 e2e-tests/tests/pgbouncer-failover/02-assert.yaml create mode 100644 e2e-tests/tests/pgbouncer-failover/02-trigger-switchover.yaml create mode 100644 e2e-tests/tests/pgbouncer-failover/03-cleanup.yaml diff --git a/e2e-tests/functions b/e2e-tests/functions index f27292760d..2a100bea25 100644 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -54,6 +54,7 @@ deploy_operator() { disable_telemetry=false fi yq eval '.spec.template.spec.containers[0].image = "'${IMAGE}'"' "${DEPLOY_DIR}/${cw_prefix}operator.yaml" \ + | yq eval '.spec.template.spec.containers[0].imagePullPolicy = "IfNotPresent"' - \ | yq eval '(.spec.template.spec.containers[] | select(.name=="operator") | .env[] | select(.name=="DISABLE_TELEMETRY") | .value) = "'${disable_telemetry}'"' - \ | kubectl -n "${OPERATOR_NS:-$NAMESPACE}" apply -f - } diff --git a/e2e-tests/tests/pgbouncer-failover/00-assert.yaml b/e2e-tests/tests/pgbouncer-failover/00-assert.yaml new file mode 100644 index 0000000000..14beb86c55 --- /dev/null +++ b/e2e-tests/tests/pgbouncer-failover/00-assert.yaml @@ -0,0 +1,12 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: percona-postgresql-operator +status: + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 diff --git a/e2e-tests/tests/pgbouncer-failover/00-deploy-operator.yaml b/e2e-tests/tests/pgbouncer-failover/00-deploy-operator.yaml new file mode 100644 index 0000000000..7faf4da852 --- /dev/null +++ b/e2e-tests/tests/pgbouncer-failover/00-deploy-operator.yaml @@ -0,0 +1,14 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + init_temp_dir # do this only in the first TestStep + + deploy_operator + deploy_client + deploy_s3_secrets diff --git a/e2e-tests/tests/pgbouncer-failover/01-assert.yaml b/e2e-tests/tests/pgbouncer-failover/01-assert.yaml new file mode 100644 index 0000000000..d92e1f49c5 --- /dev/null +++ b/e2e-tests/tests/pgbouncer-failover/01-assert.yaml @@ -0,0 +1,16 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGCluster +metadata: + name: pgbouncer-failover +status: + postgres: + ready: 2 + size: 2 + pgbouncer: + ready: 2 + size: 2 + state: ready diff --git a/e2e-tests/tests/pgbouncer-failover/01-create-cluster.yaml b/e2e-tests/tests/pgbouncer-failover/01-create-cluster.yaml new file mode 100644 index 0000000000..585b620930 --- /dev/null +++ b/e2e-tests/tests/pgbouncer-failover/01-create-cluster.yaml @@ -0,0 +1,15 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + get_cr \ + | yq eval ' + .spec.proxy.pgBouncer.replicas=2 | + .spec.instances[].replicas=2' - \ + | kubectl -n "${NAMESPACE}" apply -f - diff --git a/e2e-tests/tests/pgbouncer-failover/02-assert.yaml b/e2e-tests/tests/pgbouncer-failover/02-assert.yaml new file mode 100644 index 0000000000..cd0f5a0773 --- /dev/null +++ b/e2e-tests/tests/pgbouncer-failover/02-assert.yaml @@ -0,0 +1,17 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGCluster +metadata: + name: pgbouncer-failover +status: + postgres: + ready: 2 + size: 2 + pgbouncer: + # PgBouncer should be back to ready state with 2 replicas + ready: 2 + size: 2 + state: ready diff --git a/e2e-tests/tests/pgbouncer-failover/02-trigger-switchover.yaml b/e2e-tests/tests/pgbouncer-failover/02-trigger-switchover.yaml new file mode 100644 index 0000000000..a0d47edbc6 --- /dev/null +++ b/e2e-tests/tests/pgbouncer-failover/02-trigger-switchover.yaml @@ -0,0 +1,55 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 60 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + LEADER=$(kubectl get pods -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/role=primary \ + -o jsonpath='{.items[0].metadata.name}') + + echo "Current leader: $LEADER" + + REPLICA=$(kubectl get pods -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/role=replica \ + -o jsonpath='{.items[0].metadata.name}') + + echo "Replica to promote: $REPLICA" + + kubectl get pods -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/role=pgbouncer \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' \ + > /tmp/pgbouncer-pods-before.txt + + echo "PgBouncer pods before switchover:" + cat /tmp/pgbouncer-pods-before.txt + + kubectl exec "$LEADER" -c database -n "${NAMESPACE}" -- \ + patronictl switchover \ + --leader "$LEADER" \ + --candidate "$REPLICA" \ + --force + + echo "Switchover triggered successfully" + + # Wait for PgBouncer pods to be recreated (they should be deleted and recreated) + sleep 15 + + kubectl get pods -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/role=pgbouncer \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' \ + > /tmp/pgbouncer-pods-after.txt + + echo "PgBouncer pods after switchover:" + cat /tmp/pgbouncer-pods-after.txt + + if diff /tmp/pgbouncer-pods-before.txt /tmp/pgbouncer-pods-after.txt > /dev/null; then + echo "ERROR: PgBouncer pods were NOT recreated after failover!" + exit 1 + else + echo "SUCCESS: PgBouncer pods were recreated after failover" + fi diff --git a/e2e-tests/tests/pgbouncer-failover/03-cleanup.yaml b/e2e-tests/tests/pgbouncer-failover/03-cleanup.yaml new file mode 100644 index 0000000000..c6196ff4cf --- /dev/null +++ b/e2e-tests/tests/pgbouncer-failover/03-cleanup.yaml @@ -0,0 +1,21 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +delete: + - apiVersion: pgv2.percona.com/v2 + kind: PerconaPGCluster + metadata: + name: pgbouncer-failover + - apiVersion: postgres-operator.crunchydata.com/v1beta1 + kind: PostgresCluster + metadata: + name: pgbouncer-failover +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + remove_all_finalizers + destroy_operator + timeout: 60 From bc5d95709a21745662280b4874a1e6835816d0e7 Mon Sep 17 00:00:00 2001 From: Juliana Oliveira Date: Thu, 29 Jan 2026 18:34:29 -0300 Subject: [PATCH 7/7] fix: do not restart on first run Signed-off-by: Juliana Oliveira --- internal/controller/postgrescluster/pgbouncer.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/internal/controller/postgrescluster/pgbouncer.go b/internal/controller/postgrescluster/pgbouncer.go index e9cadd74f6..e1aad93f1a 100644 --- a/internal/controller/postgrescluster/pgbouncer.go +++ b/internal/controller/postgrescluster/pgbouncer.go @@ -648,6 +648,17 @@ func (r *Reconciler) reconcilePGBouncerReconnect( return nil } + if lastFailoverUID == "" { + // First time seeing this cluster or status field was just added. + // Initialize with current primary UID without triggering pod restart. + log.V(1).Info("Initializing PgBouncer failover tracking", + "currentPrimaryUID", currentPrimaryUID, + "currentPrimaryName", primaryPod.Name) + + cluster.Status.Proxy.PGBouncer.LastFailoverPrimaryUID = currentPrimaryUID + return nil + } + log.Info("Primary changed, triggering PgBouncer failover signal (SIGTERM)", "previousPrimaryUID", lastFailoverUID, "currentPrimaryUID", currentPrimaryUID,