-
Notifications
You must be signed in to change notification settings - Fork 350
Fix: sbd-integration: sync pacemakerd with sbd #2119
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
17d5cea
e5ad1a6
927b43a
06da3c3
6ce5bb0
567cb6e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,8 +40,25 @@ static bool global_keep_tracking = false; | |
| #define PCMK_PROCESS_CHECK_INTERVAL 5 | ||
|
|
||
| static crm_trigger_t *shutdown_trigger = NULL; | ||
| static crm_trigger_t *startup_trigger = NULL; | ||
| static const char *pid_file = PCMK_RUN_DIR "/pacemaker.pid"; | ||
|
|
||
| /* state we report when asked via pacemakerd-api status-ping */ | ||
| static const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT; | ||
| static gboolean running_with_sbd = FALSE; /* local copy */ | ||
| /* When contacted via pacemakerd-api by a client having sbd in | ||
| * the name we assume it is sbd-daemon which wants to know | ||
| * if pacemakerd shutdown gracefully. | ||
| * Thus when everything is shutdown properly pacemakerd | ||
| * waits till it has reported the graceful completion of | ||
| * shutdown to sbd and just when sbd-client closes the | ||
| * connection we can assume that the report has arrived | ||
| * properly so that pacemakerd can finally exit. | ||
| * Following two variables are used to track that handshake. | ||
| */ | ||
| static unsigned int shutdown_complete_state_reported_to = 0; | ||
| static gboolean shutdown_complete_state_reported_client_closed = FALSE; | ||
|
|
||
| typedef struct pcmk_child_s { | ||
| pid_t pid; | ||
| long flag; | ||
|
|
@@ -374,21 +391,20 @@ escalate_shutdown(gpointer data) | |
| static gboolean | ||
| pcmk_shutdown_worker(gpointer user_data) | ||
| { | ||
| static int phase = 0; | ||
| static int phase = SIZEOF(pcmk_children); | ||
| static time_t next_log = 0; | ||
| static int max = SIZEOF(pcmk_children); | ||
|
|
||
| int lpc = 0; | ||
|
|
||
| if (phase == 0) { | ||
| if (phase == SIZEOF(pcmk_children)) { | ||
| crm_notice("Shutting down Pacemaker"); | ||
| phase = max; | ||
| pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN; | ||
| } | ||
|
|
||
| for (; phase > 0; phase--) { | ||
| /* Don't stop anything with start_seq < 1 */ | ||
|
|
||
| for (lpc = max - 1; lpc >= 0; lpc--) { | ||
| for (lpc = SIZEOF(pcmk_children) - 1; lpc >= 0; lpc--) { | ||
| pcmk_child_t *child = &(pcmk_children[lpc]); | ||
|
|
||
| if (phase != child->start_seq) { | ||
|
|
@@ -436,6 +452,13 @@ pcmk_shutdown_worker(gpointer user_data) | |
| } | ||
|
|
||
| crm_notice("Shutdown complete"); | ||
| pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE; | ||
| if (!fatal_error && running_with_sbd && | ||
| pcmk__get_sbd_sync_resource_startup() && | ||
| !shutdown_complete_state_reported_client_closed) { | ||
| crm_notice("Waiting for SBD to pick up shutdown-complete-state."); | ||
| return TRUE; | ||
| } | ||
|
|
||
| { | ||
| const char *delay = pcmk__env_option("shutdown_delay"); | ||
|
|
@@ -489,6 +512,55 @@ pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) | |
| return 0; | ||
| } | ||
|
|
||
| static void | ||
| pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id) | ||
| { | ||
| const char *value = NULL; | ||
| xmlNode *ping = NULL; | ||
| xmlNode *reply = NULL; | ||
| time_t pinged = time(NULL); | ||
| const char *from = crm_element_value(msg, F_CRM_SYS_FROM); | ||
|
|
||
| /* Pinged for status */ | ||
| crm_trace("Pinged from %s.%s", | ||
| crm_str(crm_element_value(msg, F_CRM_ORIGIN)), | ||
| from?from:"unknown"); | ||
| ping = create_xml_node(NULL, XML_CRM_TAG_PING); | ||
| value = crm_element_value(msg, F_CRM_SYS_TO); | ||
| crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value); | ||
| crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state); | ||
| crm_xml_add_ll(ping, XML_ATTR_TSTAMP, (long long) pinged); | ||
| crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok"); | ||
| reply = create_reply(msg, ping); | ||
| free_xml(ping); | ||
| if (reply) { | ||
| if (pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event) != | ||
| pcmk_rc_ok) { | ||
| crm_err("Failed sending ping-reply"); | ||
| } | ||
| free_xml(reply); | ||
| } else { | ||
| crm_err("Failed building ping-reply"); | ||
| } | ||
| /* just proceed state on sbd pinging us */ | ||
| if (from && strstr(from, "sbd")) { | ||
| if (crm_str_eq(pacemakerd_state, | ||
| XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE, | ||
| TRUE)) { | ||
| if (pcmk__get_sbd_sync_resource_startup()) { | ||
| crm_notice("Shutdown-complete-state passed to SBD."); | ||
| } | ||
| shutdown_complete_state_reported_to = c->pid; | ||
| } else if (crm_str_eq(pacemakerd_state, | ||
| XML_PING_ATTR_PACEMAKERDSTATE_WAITPING, | ||
| TRUE)) { | ||
| crm_notice("Received startup-trigger from SBD."); | ||
| pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS; | ||
| mainloop_set_trigger(startup_trigger); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /* Exit code means? */ | ||
| static int32_t | ||
| pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) | ||
|
|
@@ -514,6 +586,9 @@ pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) | |
| crm_trace("Ignoring IPC request to purge node " | ||
| "because peer cache is not used"); | ||
|
|
||
| } else if (crm_str_eq(task, CRM_OP_PING, TRUE)) { | ||
| pcmk_handle_ping_request(c, msg, id); | ||
|
|
||
| } else { | ||
| crm_debug("Unrecognized IPC command '%s' sent to pacemakerd", | ||
| crm_str(task)); | ||
|
|
@@ -533,6 +608,12 @@ pcmk_ipc_closed(qb_ipcs_connection_t * c) | |
| return 0; | ||
| } | ||
| crm_trace("Connection %p", c); | ||
| if (shutdown_complete_state_reported_to == client->pid) { | ||
| shutdown_complete_state_reported_client_closed = TRUE; | ||
| if (shutdown_trigger) { | ||
| mainloop_set_trigger(shutdown_trigger); | ||
| } | ||
| } | ||
| pcmk__free_client(client); | ||
| return 0; | ||
| } | ||
|
|
@@ -924,8 +1005,8 @@ find_and_track_existing_processes(void) | |
| return pcmk_rc_ok; | ||
| } | ||
|
|
||
| static void | ||
| init_children_processes(void) | ||
| static gboolean | ||
| init_children_processes(void *user_data) | ||
| { | ||
| int start_seq = 1, lpc = 0; | ||
| static int max = SIZEOF(pcmk_children); | ||
|
|
@@ -951,6 +1032,8 @@ init_children_processes(void) | |
| * This may be useful for the daemons to know | ||
| */ | ||
| setenv("PCMK_respawned", "true", 1); | ||
| pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING; | ||
| return TRUE; | ||
| } | ||
|
|
||
| static void | ||
|
|
@@ -1154,6 +1237,7 @@ main(int argc, char **argv) | |
|
|
||
| if(pcmk_locate_sbd() > 0) { | ||
| setenv("PCMK_watchdog", "true", 1); | ||
| running_with_sbd = TRUE; | ||
| } else { | ||
| setenv("PCMK_watchdog", "false", 1); | ||
| } | ||
|
|
@@ -1170,7 +1254,19 @@ main(int argc, char **argv) | |
| mainloop_add_signal(SIGTERM, pcmk_shutdown); | ||
| mainloop_add_signal(SIGINT, pcmk_shutdown); | ||
|
|
||
| init_children_processes(); | ||
| if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) { | ||
kgaillot marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| crm_notice("Waiting for startup-trigger from SBD."); | ||
| pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING; | ||
kgaillot marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL); | ||
kgaillot marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } else { | ||
| if (running_with_sbd) { | ||
| crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported " | ||
| "by your SBD version) improve reliability of " | ||
| "interworking between SBD & pacemaker."); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Absolutely an awesome improvement. Thanks for the nice work! But in here, without knowing whether the sbd version even supports the feature, isn't a warning a little too scary for users who don't know about the details? It makes them think something is really wrong here. Would a notice be good enough to deliver the information? Similar for the warning in sbd: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm ... my perception of a warning is to point to something that might behave in an unintentional way or lead to sub-optimal behavior ...
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably we could've bumped the main version of libcrmcommon to make sure sbd built against it would only be able to run with a pacemaker that supports the feature, so that the default value of the parameter at build time could've been "auto". Anyway indeed for the time being, downstream needs to take good care of that. |
||
| } | ||
| pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS; | ||
| init_children_processes(NULL); | ||
| } | ||
|
|
||
| crm_notice("Pacemaker daemon successfully started and accepting connections"); | ||
| g_main_loop_run(mainloop); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,71 @@ | ||
| /* | ||
| * Copyright 2020 the Pacemaker project contributors | ||
| * | ||
| * The version control history for this file may have further details. | ||
| * | ||
| * This source code is licensed under the GNU Lesser General Public License | ||
| * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. | ||
| */ | ||
|
|
||
| #ifndef PCMK__IPC_PACEMAKERD__H | ||
| # define PCMK__IPC_PACEMAKERD__H | ||
|
|
||
| #ifdef __cplusplus | ||
| extern "C" { | ||
| #endif | ||
|
|
||
| /** | ||
| * \file | ||
| * \brief IPC commands for Pacemakerd | ||
| * | ||
| * \ingroup core | ||
| */ | ||
|
|
||
| #include <sys/types.h> // time_t | ||
| #include <crm/common/ipc.h> // pcmk_ipc_api_t | ||
|
|
||
| enum pcmk_pacemakerd_state { | ||
| pcmk_pacemakerd_state_invalid = -1, | ||
| pcmk_pacemakerd_state_init = 0, | ||
| pcmk_pacemakerd_state_starting_daemons, | ||
| pcmk_pacemakerd_state_wait_for_ping, | ||
| pcmk_pacemakerd_state_running, | ||
| pcmk_pacemakerd_state_shutting_down, | ||
| pcmk_pacemakerd_state_shutdown_complete, | ||
| pcmk_pacemakerd_state_max = pcmk_pacemakerd_state_shutdown_complete, | ||
| }; | ||
|
|
||
| //! Possible types of pacemakerd replies | ||
| enum pcmk_pacemakerd_api_reply { | ||
| pcmk_pacemakerd_reply_unknown, | ||
| pcmk_pacemakerd_reply_ping, | ||
| }; | ||
|
|
||
| /*! | ||
| * Pacemakerd reply passed to event callback | ||
| */ | ||
| typedef struct { | ||
| enum pcmk_pacemakerd_api_reply reply_type; | ||
|
|
||
| union { | ||
| // pcmk_pacemakerd_reply_ping | ||
| struct { | ||
| const char *sys_from; | ||
| enum pcmk_pacemakerd_state state; | ||
| time_t last_good; | ||
| int status; | ||
| } ping; | ||
| } data; | ||
| } pcmk_pacemakerd_api_reply_t; | ||
|
|
||
| int pcmk_pacemakerd_api_ping(pcmk_ipc_api_t *api, const char *ipc_name); | ||
| enum pcmk_pacemakerd_state | ||
| pcmk_pacemakerd_api_daemon_state_text2enum(const char *state); | ||
| const char | ||
| *pcmk_pacemakerd_api_daemon_state_enum2text(enum pcmk_pacemakerd_state state); | ||
|
|
||
| #ifdef __cplusplus | ||
| } | ||
| #endif | ||
|
|
||
| #endif // PCMK__IPC_PACEMAKERD__H |
Uh oh!
There was an error while loading. Please reload this page.