From 6c1ed9964d398c0e834de97b7dcf61dd09b2a65d Mon Sep 17 00:00:00 2001 From: Thomas Heller Date: Fri, 9 Jan 2026 15:34:27 +0100 Subject: [PATCH 1/5] Avoid use-after-free with stdexec::run_loop We need to synchronize returning from `__run_loop_base::run` with potentially concurrent calls to `__run_loop_base::finish`. This is done by introducing a counter, ensuring proper completion of all tasks in flight. Also see https://github.com/NVIDIA/stdexec/pull/1742 for additional information. --- include/stdexec/__detail/__run_loop.hpp | 31 +++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/include/stdexec/__detail/__run_loop.hpp b/include/stdexec/__detail/__run_loop.hpp index 684f105b9..e9a5dcb4e 100644 --- a/include/stdexec/__detail/__run_loop.hpp +++ b/include/stdexec/__detail/__run_loop.hpp @@ -26,6 +26,9 @@ #include "__schedulers.hpp" #include "__atomic.hpp" +#include "stdexec/__detail/__config.hpp" +#include +#include namespace stdexec { ///////////////////////////////////////////////////////////////////////////// @@ -34,6 +37,10 @@ namespace stdexec { public: __run_loop_base() = default; + ~__run_loop_base() noexcept { + STDEXEC_ASSERT(__task_count_.load(__std::memory_order_acquire) == 0); + } + STDEXEC_ATTRIBUTE(host, device) void run() noexcept { // execute work items until the __finishing_ flag is set: while (!__finishing_.load(__std::memory_order_acquire)) { @@ -41,17 +48,24 @@ namespace stdexec { __execute_all(); } // drain the queue, taking care to execute any tasks that get added while - // executing the remaining tasks: - while (__execute_all()) + // executing the remaining tasks (also wait for other tasks that might still be in flight): + while (__execute_all() || __task_count_.load(__std::memory_order_acquire) > 0) ; } STDEXEC_ATTRIBUTE(host, device) void finish() noexcept { + // Increment our task count to avoid lifetime issues. This is preventing + // a use-after-free issue if finish is called from a different thread. + __task_count_.fetch_add(1, __std::memory_order_release); if (!__finishing_.exchange(true, __std::memory_order_acq_rel)) { // push an empty work item to the queue to wake up the consuming thread - // and let it finish: + // and let it finish. + // The count will be decremented once the tasks executes. __queue_.push(&__noop_task); + return; } + // We are done finishing. Decrement the count, which signals final completion. + __task_count_.fetch_sub(1, __std::memory_order_release); } struct __task : __immovable { @@ -73,6 +87,7 @@ namespace stdexec { template struct __opstate_t : __task { + __std::atomic* __task_count_; __atomic_intrusive_queue<&__task::__next_>* __queue_; _Rcvr __rcvr_; @@ -89,14 +104,17 @@ namespace stdexec { STDEXEC_ATTRIBUTE(host, device) constexpr explicit __opstate_t( + __std::atomic* __task_count, __atomic_intrusive_queue<&__task::__next_>* __queue, _Rcvr __rcvr) : __task{&__execute_impl} + , __task_count_(__task_count) , __queue_{__queue} , __rcvr_{static_cast<_Rcvr&&>(__rcvr)} { } STDEXEC_ATTRIBUTE(host, device) constexpr void start() noexcept { + __task_count_->fetch_add(1, __std::memory_order_release); __queue_->push(this); } }; @@ -112,20 +130,25 @@ namespace stdexec { return false; // No tasks to execute. } + std::size_t __task_count = 0; + do { // Take care to increment the iterator before executing the task, // because __execute() may invalidate the current node. auto __prev = __it++; (*__prev)->__execute(); + ++__task_count; } while (__it != __queue.end()); __queue.clear(); + __task_count_.fetch_sub(__task_count, __std::memory_order_release); return true; } STDEXEC_ATTRIBUTE(host, device) static void __noop_(__task*) noexcept { } + __std::atomic __task_count_{0}; __std::atomic __finishing_{false}; __atomic_intrusive_queue<&__task::__next_> __queue_{}; __task __noop_task{&__noop_}; @@ -186,7 +209,7 @@ namespace stdexec { template STDEXEC_ATTRIBUTE(nodiscard, host, device) constexpr auto connect(_Rcvr __rcvr) const noexcept -> __opstate_t<_Rcvr> { - return __opstate_t<_Rcvr>{&__loop_->__queue_, static_cast<_Rcvr&&>(__rcvr)}; + return __opstate_t<_Rcvr>{&__loop_->__task_count_, &__loop_->__queue_, static_cast<_Rcvr&&>(__rcvr)}; } STDEXEC_ATTRIBUTE(nodiscard, host, device) From 8baf34bab014f28cb64f8fc83f5c9b6eb6060fb2 Mon Sep 17 00:00:00 2001 From: Thomas Heller Date: Sat, 10 Jan 2026 14:40:49 +0100 Subject: [PATCH 2/5] Fixing use after free - Properly synchronizing `finish` with `run` with task counts - Adding sync_wait relacy test for verification - adapting atomic wrappers to account for missing std::atomic_ref in relacy --- include/stdexec/__detail/__atomic.hpp | 2 +- include/stdexec/__detail/__run_loop.hpp | 12 +++++++---- test/rrd/Makefile | 4 ++-- test/rrd/sync_wait.cpp | 27 +++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 7 deletions(-) create mode 100644 test/rrd/sync_wait.cpp diff --git a/include/stdexec/__detail/__atomic.hpp b/include/stdexec/__detail/__atomic.hpp index d1c6dc2b8..fecd404f2 100644 --- a/include/stdexec/__detail/__atomic.hpp +++ b/include/stdexec/__detail/__atomic.hpp @@ -58,7 +58,7 @@ namespace stdexec::__std { using std::atomic_thread_fence; using std::atomic_signal_fence; -# if __cpp_lib_atomic_ref >= 2018'06L +#if __cpp_lib_atomic_ref >= 2018'06L && !defined(STDEXEC_RELACY) using std::atomic_ref; # else inline constexpr int __atomic_flag_map[] = { diff --git a/include/stdexec/__detail/__run_loop.hpp b/include/stdexec/__detail/__run_loop.hpp index e9a5dcb4e..93d74c718 100644 --- a/include/stdexec/__detail/__run_loop.hpp +++ b/include/stdexec/__detail/__run_loop.hpp @@ -27,7 +27,6 @@ #include "__atomic.hpp" #include "stdexec/__detail/__config.hpp" -#include #include namespace stdexec { @@ -56,16 +55,21 @@ namespace stdexec { STDEXEC_ATTRIBUTE(host, device) void finish() noexcept { // Increment our task count to avoid lifetime issues. This is preventing // a use-after-free issue if finish is called from a different thread. - __task_count_.fetch_add(1, __std::memory_order_release); + // We increment the task counter by two to avoid the run loop to exit before + // we scheduled the noop task + __task_count_.fetch_add(2, __std::memory_order_release); if (!__finishing_.exchange(true, __std::memory_order_acq_rel)) { // push an empty work item to the queue to wake up the consuming thread // and let it finish. // The count will be decremented once the tasks executes. __queue_.push(&__noop_task); + // If the task got pushed, simply subtract one again, the other increment + // happens when the noop task got executed. + __task_count_.fetch_sub(1, __std::memory_order_release); return; } - // We are done finishing. Decrement the count, which signals final completion. - __task_count_.fetch_sub(1, __std::memory_order_release); + // We are done finishing. Decrement the count by two, which signals final completion. + __task_count_.fetch_sub(2, __std::memory_order_release); } struct __task : __immovable { diff --git a/test/rrd/Makefile b/test/rrd/Makefile index 4bc190633..d68321426 100644 --- a/test/rrd/Makefile +++ b/test/rrd/Makefile @@ -1,13 +1,13 @@ # User-customizable variables: CXX ?= c++ CXX_STD ?= c++20 -CXXFLAGS ?= -I relacy -I relacy/relacy/fakestd -O1 -std=$(CXX_STD) -I ../../include -I ../../test -g +CXXFLAGS ?= -DSTDEXEC_RELACY -I relacy -I relacy/relacy/fakestd -O1 -std=$(CXX_STD) -I ../../include -I ../../test -g DEPFLAGS ?= -MD -MF $(@).d -MP -MT $(@) build_dir = build .SECONDARY: -test_programs = split async_scope +test_programs = split async_scope sync_wait test_exe_files = $(foreach name,$(test_programs),$(build_dir)/$(name)) diff --git a/test/rrd/sync_wait.cpp b/test/rrd/sync_wait.cpp new file mode 100644 index 000000000..299b9639e --- /dev/null +++ b/test/rrd/sync_wait.cpp @@ -0,0 +1,27 @@ +#include "../../relacy/relacy_std.hpp" + +#include +#include + +namespace ex = stdexec; + +struct sync_wait_bg_thread : rl::test_suite { + static size_t const dynamic_thread_count = 1; + + void thread(unsigned) { + exec::static_thread_pool pool{1}; + auto sender = ex::schedule(pool.get_scheduler()) | ex::then([] { return 42; }); + + auto [val] = ex::sync_wait(sender).value(); + RL_ASSERT(val == 42); + } +}; + +auto main() -> int { + rl::test_params p; + p.iteration_count = 50000; + p.execution_depth_limit = 10000; + p.search_type = rl::random_scheduler_type; + rl::simulate(p); + return 0; +} From fd4842c68ac87a7f0829974393ee59f72c3a8604 Mon Sep 17 00:00:00 2001 From: Thomas Heller Date: Mon, 12 Jan 2026 10:49:07 +0100 Subject: [PATCH 3/5] Apply suggestions from code review Co-authored-by: Eric Niebler --- include/stdexec/__detail/__atomic.hpp | 2 +- include/stdexec/__detail/__run_loop.hpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/stdexec/__detail/__atomic.hpp b/include/stdexec/__detail/__atomic.hpp index fecd404f2..2876a6f8f 100644 --- a/include/stdexec/__detail/__atomic.hpp +++ b/include/stdexec/__detail/__atomic.hpp @@ -58,7 +58,7 @@ namespace stdexec::__std { using std::atomic_thread_fence; using std::atomic_signal_fence; -#if __cpp_lib_atomic_ref >= 2018'06L && !defined(STDEXEC_RELACY) +# if __cpp_lib_atomic_ref >= 2018'06L && !defined(STDEXEC_RELACY) using std::atomic_ref; # else inline constexpr int __atomic_flag_map[] = { diff --git a/include/stdexec/__detail/__run_loop.hpp b/include/stdexec/__detail/__run_loop.hpp index 93d74c718..7dfcd9834 100644 --- a/include/stdexec/__detail/__run_loop.hpp +++ b/include/stdexec/__detail/__run_loop.hpp @@ -26,7 +26,7 @@ #include "__schedulers.hpp" #include "__atomic.hpp" -#include "stdexec/__detail/__config.hpp" +#include "__config.hpp" #include namespace stdexec { @@ -55,8 +55,8 @@ namespace stdexec { STDEXEC_ATTRIBUTE(host, device) void finish() noexcept { // Increment our task count to avoid lifetime issues. This is preventing // a use-after-free issue if finish is called from a different thread. - // We increment the task counter by two to avoid the run loop to exit before - // we scheduled the noop task + // We increment the task counter by two to prevent the run loop from + // exiting before we schedule the noop task. __task_count_.fetch_add(2, __std::memory_order_release); if (!__finishing_.exchange(true, __std::memory_order_acq_rel)) { // push an empty work item to the queue to wake up the consuming thread @@ -118,7 +118,7 @@ namespace stdexec { } STDEXEC_ATTRIBUTE(host, device) constexpr void start() noexcept { - __task_count_->fetch_add(1, __std::memory_order_release); + __task_count_->fetch_add(1, __std::memory_order_release); __queue_->push(this); } }; From 38c179eb9afa372509213897e8544ba07dd47c28 Mon Sep 17 00:00:00 2001 From: Thomas Heller Date: Mon, 12 Jan 2026 10:54:36 +0100 Subject: [PATCH 4/5] More review comments --- include/stdexec/__detail/__run_loop.hpp | 2 +- test/rrd/sync_wait.cpp | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/include/stdexec/__detail/__run_loop.hpp b/include/stdexec/__detail/__run_loop.hpp index 7dfcd9834..0502fbc20 100644 --- a/include/stdexec/__detail/__run_loop.hpp +++ b/include/stdexec/__detail/__run_loop.hpp @@ -63,7 +63,7 @@ namespace stdexec { // and let it finish. // The count will be decremented once the tasks executes. __queue_.push(&__noop_task); - // If the task got pushed, simply subtract one again, the other increment + // If the task got pushed, simply subtract one again, the other decrement // happens when the noop task got executed. __task_count_.fetch_sub(1, __std::memory_order_release); return; diff --git a/test/rrd/sync_wait.cpp b/test/rrd/sync_wait.cpp index 299b9639e..38dfca571 100644 --- a/test/rrd/sync_wait.cpp +++ b/test/rrd/sync_wait.cpp @@ -1,3 +1,19 @@ +/* + * Copyright (c) 2025 Chris Cotter + * + * Licensed under the Apache License Version 2.0 with LLVM Exceptions + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://llvm.org/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + #include "../../relacy/relacy_std.hpp" #include From 702a7915a86e32f9316b07602c65fc517ab90749 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Mon, 12 Jan 2026 08:00:42 -0800 Subject: [PATCH 5/5] add NVIDIA copyright --- test/rrd/sync_wait.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/test/rrd/sync_wait.cpp b/test/rrd/sync_wait.cpp index 38dfca571..399a2fe5a 100644 --- a/test/rrd/sync_wait.cpp +++ b/test/rrd/sync_wait.cpp @@ -1,4 +1,5 @@ /* + * Copyright (c) 2025 NVIDIA Corporation * Copyright (c) 2025 Chris Cotter * * Licensed under the Apache License Version 2.0 with LLVM Exceptions