From 838dd1459991af64d6230854ab1aef7dedd7f9c3 Mon Sep 17 00:00:00 2001 From: Jacek Tomasiak Date: Mon, 20 May 2019 14:29:13 +0200 Subject: [PATCH 1/2] Fix: sbd-cluster: stop dispatching cmap if disconnected If cmap socket is in HUP state, attempt to dispatch incoming events will trigger the callback again and cause infinite loop with high CPU load. Added check should solve this by destroying the cmap connection and removing it from the main loop. --- src/sbd-cluster.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c index 541212f..aa963fb 100644 --- a/src/sbd-cluster.c +++ b/src/sbd-cluster.c @@ -35,6 +35,18 @@ #if CHECK_TWO_NODE #include +// available since glib 2.58 +#ifndef G_SOURCE_FUNC +#define G_SOURCE_FUNC(f) ((GSourceFunc) (void (*)(void)) (f)) +#endif +// available since glib 2.32 +#ifndef G_SOURCE_REMOVE +#define G_SOURCE_REMOVE FALSE +#endif +// available since glib 2.32 +#ifndef G_SOURCE_CONTINUE +#define G_SOURCE_CONTINUE TRUE +#endif #endif #include "sbd.h" @@ -58,6 +70,9 @@ static crm_cluster_t cluster; static gboolean sbd_remote_check(gpointer user_data); static long unsigned int find_pacemaker_remote(void); static void sbd_membership_destroy(gpointer user_data); +#if CHECK_TWO_NODE +static void cmap_destroy(void); +#endif #if SUPPORT_PLUGIN @@ -168,10 +183,19 @@ static void sbd_cmap_notify_fn( } static gboolean -cmap_dispatch_callback (gpointer user_data) +cmap_dispatch_callback (gint cmap_fd, + GIOCondition condition, + gpointer user_data) { + /* CMAP connection lost */ + if (condition & G_IO_HUP) { + cl_log(LOG_WARNING, "CMAP service connection lost\n"); + cmap_destroy(); + /* remove the source from the main loop */ + return G_SOURCE_REMOVE; + } cmap_dispatch(cmap_handle, CS_DISPATCH_ALL); - return TRUE; + return G_SOURCE_CONTINUE; } static void @@ -222,7 +246,7 @@ sbd_get_two_node(void) cl_log(LOG_WARNING, "Couldn't create source for cmap\n"); goto out; } - g_source_set_callback(cmap_source, cmap_dispatch_callback, NULL, NULL); + g_source_set_callback(cmap_source, G_SOURCE_FUNC(cmap_dispatch_callback), NULL, NULL); g_source_attach(cmap_source, NULL); } From adafc28c8c14ca97d0816a21e5e2f9eca8f17c10 Mon Sep 17 00:00:00 2001 From: Jacek Tomasiak Date: Tue, 28 May 2019 13:57:37 +0200 Subject: [PATCH 2/2] Fix: sbd-cluster: exit if cmap is disconnected To avoid problems with lost CMAP connection, just exit and let the inquisitor fix the situation by restarting the servant. --- src/sbd-cluster.c | 16 +++++++++++----- src/sbd-inquisitor.c | 14 ++++++++++++++ src/sbd.h | 3 +++ 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c index aa963fb..b315e8b 100644 --- a/src/sbd-cluster.c +++ b/src/sbd-cluster.c @@ -67,12 +67,10 @@ static int reconnect_msec = 1000; static GMainLoop *mainloop = NULL; static guint notify_timer = 0; static crm_cluster_t cluster; +static void clean_up(int rc); static gboolean sbd_remote_check(gpointer user_data); static long unsigned int find_pacemaker_remote(void); static void sbd_membership_destroy(gpointer user_data); -#if CHECK_TWO_NODE -static void cmap_destroy(void); -#endif #if SUPPORT_PLUGIN @@ -190,9 +188,9 @@ cmap_dispatch_callback (gint cmap_fd, /* CMAP connection lost */ if (condition & G_IO_HUP) { cl_log(LOG_WARNING, "CMAP service connection lost\n"); - cmap_destroy(); + clean_up(EXIT_CLUSTER_DISCONNECT); /* remove the source from the main loop */ - return G_SOURCE_REMOVE; + return G_SOURCE_REMOVE; /* never reached */ } cmap_dispatch(cmap_handle, CS_DISPATCH_ALL); return G_SOURCE_CONTINUE; @@ -557,6 +555,14 @@ find_pacemaker_remote(void) static void clean_up(int rc) { +#if SUPPORT_COROSYNC && CHECK_TWO_NODE + cmap_destroy(); +#endif + + if (rc >= 0) { + exit(rc); + } + return; } diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c index abde4e5..fcb867c 100644 --- a/src/sbd-inquisitor.c +++ b/src/sbd-inquisitor.c @@ -526,6 +526,20 @@ void inquisitor_child(void) break; } } + } else if (sbd_is_cluster(s)) { + if (WIFEXITED(status)) { + switch(WEXITSTATUS(status)) { + case EXIT_CLUSTER_DISCONNECT: + cl_log(LOG_WARNING, "Cluster-Servant has exited (connection lost)"); + s->restarts = 0; + s->restart_blocked = 0; + s->outdated = 1; + s->t_last.tv_sec = 0; + break; + default: + break; + } + } } cleanup_servant_by_pid(pid); } diff --git a/src/sbd.h b/src/sbd.h index 3b05a11..45244ab 100644 --- a/src/sbd.h +++ b/src/sbd.h @@ -62,6 +62,9 @@ /* exit status for pcmk-servant */ #define EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN 30 +/* exit status for cluster-servant */ +#define EXIT_CLUSTER_DISCONNECT 40 + #define HOG_CHAR 0xff #define SECTOR_NAME_MAX 63