From ae885647e8d18ea2e517d56d6303611da90cb81e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 9 Feb 2026 11:43:54 +1030 Subject: [PATCH 1/2] pytest: reproduce crash when node disconnects between hooks: ``` lightningd-2 2026-02-09T00:41:35.196Z TRACE lightningd: Plugin peer_connected_logger_a.py returned from peer_connected hook call lightningd-2 2026-02-09T00:41:35.196Z TRACE lightningd: Calling peer_connected hook of plugin peer_connected_logger_b.py lightningd-2 2026-02-09T00:41:35.293Z **BROKEN** lightningd: FATAL SIGNAL 11 (version v25.12-257-g2a5fbd1-modded) lightningd-2 2026-02-09T00:41:35.293Z **BROKEN** lightningd: backtrace: common/daemon.c:46 (send_backtrace) 0x5b2abd7f29bd lightningd-2 2026-02-09T00:41:35.293Z **BROKEN** lightningd: backtrace: common/daemon.c:83 (crashdump) 0x5b2abd7f2a0c lightningd-2 2026-02-09T00:41:35.293Z **BROKEN** lightningd: backtrace: ./signal/../sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c:0 ((null)) 0x75950d84532f lightningd-2 2026-02-09T00:41:35.293Z **BROKEN** lightningd: backtrace: lightningd/peer_control.c:1333 (peer_connected_serialize) 0x5b2abd79c964 lightningd-2 2026-02-09T00:41:35.293Z **BROKEN** lightningd: backtrace: lightningd/plugin_hook.c:359 (plugin_hook_call_next) 0x5b2abd7ae14a lightningd-2 2026-02-09T00:41:35.293Z **BROKEN** lightningd: backtrace: lightningd/plugin_hook.c:299 (plugin_hook_callback) 0x5b2abd7ae38f lightningd-2 2026-02-09T00:41:35.293Z **BROKEN** lightningd: backtrace: lightningd/plugin.c:701 (plugin_response_handle) 0x5b2abd7a7e28 lightningd-2 2026-02-09T00:41:35.293Z **BROKEN** lightningd: backtrace: lightningd/plugin.c:790 (plugin_read_json) 0x5b2abd7ace9c lightningd-2 2026-02-09T00:41:35.293Z **BROKEN** lightningd: backtrace: ccan/ccan/io/io.c:60 (next_plan) 0x5b2abd81dada lightningd-2 2026-02-09T00:41:35.293Z **BROKEN** lightningd: backtrace: ccan/ccan/io/io.c:422 (do_plan) 0x5b2abd81def6 lightningd-2 2026-02-09T00:41:35.293Z **BROKEN** lightningd: backtrace: ccan/ccan/io/io.c:439 (io_ready) 0x5b2abd81dfb3 lightningd-2 2026-02-09T00:41:35.293Z **BROKEN** lightningd: backtrace: ccan/ccan/io/poll.c:470 (io_loop) 0x5b2abd81f0db lightningd-2 2026-02-09T00:41:35.293Z **BROKEN** lightningd: backtrace: lightningd/io_loop_with_timers.c:22 (io_loop_with_timers) 0x5b2abd77c13b lightningd-2 2026-02-09T00:41:35.293Z **BROKEN** lightningd: backtrace: lightningd/lightningd.c:1495 (main) 0x5b2abd781c6a lightningd-2 2026-02-09T00:41:35.293Z **BROKEN** lightningd: backtrace: ../sysdeps/nptl/libc_start_call_main.h:58 (__libc_start_call_main) 0x75950d82a1c9 lightningd-2 2026-02-09T00:41:35.293Z **BROKEN** lightningd: backtrace: ../csu/libc-start.c:360 (__libc_start_main_impl) 0x75950d82a28a lightningd-2 2026-02-09T00:41:35.293Z **BROKEN** lightningd: backtrace: (null):0 ((null)) 0x5b2abd752964 lightningd-2 2026-02-09T00:41:35.293Z **BROKEN** lightningd: backtrace: (null):0 ((null)) 0xffffffffffffffff ``` Signed-off-by: Rusty Russell --- tests/plugins/peer_connected_logger_a.py | 7 +++++++ tests/test_plugin.py | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/tests/plugins/peer_connected_logger_a.py b/tests/plugins/peer_connected_logger_a.py index 5ded23068203..484bb5f2cacc 100755 --- a/tests/plugins/peer_connected_logger_a.py +++ b/tests/plugins/peer_connected_logger_a.py @@ -4,6 +4,8 @@ """ from pyln.client import Plugin +import os +import time plugin = Plugin() @@ -11,7 +13,12 @@ @plugin.hook('peer_connected') def on_connected(peer, plugin, **kwargs): print(f"peer_connected_logger_a {peer['id']} {peer}") + if plugin.get_option("logger_a_sleep") is True: + # Block until file appears + while not os.path.exists("unsleep"): + time.sleep(0.25) return {'result': 'continue'} +plugin.add_option("logger_a_sleep", False, 'Block until unsleep file exists', opt_type='bool') plugin.run() diff --git a/tests/test_plugin.py b/tests/test_plugin.py index 12a9d6e5ed5a..b40cb515e899 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -514,6 +514,30 @@ def check_disconnect(): assert not l1.daemon.is_in_log(f"peer_connected_logger_b {l3id}") +@pytest.mark.xfail(strict=True) +def test_plugin_connected_hook_disconnect_crash(node_factory, executor): + """A peer disconnnects between plugin hook invocations""" + opts = [{}, + {'plugin': + [os.path.join(os.getcwd(), + 'tests/plugins/peer_connected_logger_a.py'), + os.path.join(os.getcwd(), + 'tests/plugins/peer_connected_logger_b.py')], + 'logger_a_sleep': True}, + ] + + l1, l2 = node_factory.get_nodes(2, opts=opts) + executor.submit(l1.rpc.connect, l2.info['id'], 'localhost', l2.port) + l2.daemon.wait_for_log(f'plugin-peer_connected_logger_a.py: peer_connected_logger_a {l1.info["id"]}') + l1.stop() + + # Now make first plugin continue... + open(os.path.join(l2.daemon.lightning_dir, TEST_NETWORK, "unsleep"), "w").close() + + # Should get log from second + l2.daemon.wait_for_log(f'plugin-peer_connected_logger_b.py: peer_connected_logger_b {l1.info["id"]}') + + def test_peer_connected_remote_addr(node_factory): """This tests the optional tlv `remote_addr` being passed to a plugin. From 7fec60492c2cb0cd008e3fcb1295a5eb1b584fa4 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 9 Feb 2026 11:43:56 +1030 Subject: [PATCH 2/2] lightningd: don't assume peer existrs in peer_connected_serialize. It's always true for the first hook invocation, but if there is more than one plugin, it could vanish between the two! In the default configuration, this can't happen. This bug has been around since v23.02. Note: we always tell all the plugins about the peer, even if it's already gone. Signed-off-by: Rusty Russell Changelog-Fixed: lightningd: possible crash when peers disconnected if there was more than one plugin servicing the `peer_connected` hook. Reported-by: https://github.com/santyr Fixes: https://github.com/ElementsProject/lightning/issues/8858 --- lightningd/peer_control.c | 17 ++++++++--------- tests/test_plugin.py | 1 - 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/lightningd/peer_control.c b/lightningd/peer_control.c index 2ec8bea3689f..6c1216902b2f 100644 --- a/lightningd/peer_control.c +++ b/lightningd/peer_control.c @@ -1309,6 +1309,7 @@ struct peer_connected_hook_payload { struct lightningd *ld; struct wireaddr_internal addr; struct wireaddr *remote_addr; + u8 *their_features; bool incoming; /* We don't keep a pointer to peer: it might be freed! */ struct node_id peer_id; @@ -1327,10 +1328,7 @@ peer_connected_serialize(struct peer_connected_hook_payload *payload, if (payload->remote_addr) json_add_string(stream, "remote_addr", fmt_wireaddr(tmpctx, payload->remote_addr)); - /* Since this is start of hook, peer is always in table! */ - json_add_hex_talarr(stream, "features", - peer_by_id(payload->ld, &payload->peer_id) - ->their_features); + json_add_hex_talarr(stream, "features", payload->their_features); json_object_end(stream); /* .peer */ } @@ -1759,7 +1757,6 @@ REGISTER_PLUGIN_HOOK(peer_connected, void handle_peer_connected(struct lightningd *ld, const u8 *msg) { struct node_id id; - u8 *their_features; struct peer *peer; struct peer_connected_hook_payload *hook_payload; u64 connectd_counter; @@ -1776,7 +1773,7 @@ void handle_peer_connected(struct lightningd *ld, const u8 *msg) &hook_payload->addr, &hook_payload->remote_addr, &hook_payload->incoming, - &their_features, + &hook_payload->their_features, &connect_reason, &connect_nsec)) { u64 prev_connectd_counter, connected_time_nsec; @@ -1786,7 +1783,7 @@ void handle_peer_connected(struct lightningd *ld, const u8 *msg) &hook_payload->addr, &hook_payload->remote_addr, &hook_payload->incoming, - &their_features, + &hook_payload->their_features, &connected_time_nsec)) { fatal("Connectd gave bad CONNECT_PEER_(RE)CONNECTED message %s", tal_hex(msg, msg)); @@ -1830,10 +1827,12 @@ void handle_peer_connected(struct lightningd *ld, const u8 *msg) /* If we connected to them, we know this is a good address. */ peer = new_peer(ld, 0, &id, &hook_payload->addr, last_known_addr, - take(their_features), hook_payload->incoming); + hook_payload->their_features, + hook_payload->incoming); } else { tal_free(peer->their_features); - peer->their_features = tal_steal(peer, their_features); + peer->their_features = tal_dup_talarr(peer, u8, + hook_payload->their_features); /* Update known address. */ tal_free(peer->last_known_addr); diff --git a/tests/test_plugin.py b/tests/test_plugin.py index b40cb515e899..b61d1cd4c920 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -514,7 +514,6 @@ def check_disconnect(): assert not l1.daemon.is_in_log(f"peer_connected_logger_b {l3id}") -@pytest.mark.xfail(strict=True) def test_plugin_connected_hook_disconnect_crash(node_factory, executor): """A peer disconnnects between plugin hook invocations""" opts = [{},