From af744180fd915ed86ab6d27762dd0f8b49bce9d6 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Sun, 4 Jan 2026 16:31:08 +0100 Subject: [PATCH 1/2] [riscv] Remove warning about shadowing ctx --- include/xsimd/arch/xsimd_rvv.hpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/include/xsimd/arch/xsimd_rvv.hpp b/include/xsimd/arch/xsimd_rvv.hpp index 5d2fdef03..fddaf7e20 100644 --- a/include/xsimd/arch/xsimd_rvv.hpp +++ b/include/xsimd/arch/xsimd_rvv.hpp @@ -135,10 +135,9 @@ template \ struct impl \ { \ - using ctx = ctx; \ constexpr Ret operator()(Args... args) const noexcept \ { \ - return CALLEE(args..., ctx::vl); \ + return CALLEE(args..., ctx::vl); \ }; \ }; #define XSIMD_RVV_WRAPPER_NOVL(KEY, CALLEE, ...) \ @@ -154,20 +153,18 @@ template \ struct impl \ { \ - using ctx = ctx; \ constexpr Ret operator()(First, Args... args) const noexcept \ { \ - return CALLEE(args..., ctx::vl); \ + return CALLEE(args..., ctx::vl); \ }; \ }; #define XSIMD_RVV_WRAPPER_DROP_1ST_CUSTOM_ARGS(KEY, CALLEE, SIGNATURE, ...) \ template \ struct impl \ { \ - using ctx = ctx; \ constexpr Ret operator()(First, Args... args) const noexcept \ { \ - return CALLEE(__VA_ARGS__, ctx::vl); \ + return CALLEE(__VA_ARGS__, ctx::vl); \ }; \ }; #define XSIMD_RVV_WRAPPER_DROP_1ST_CUSTOM_ARGS_NOVL(KEY, CALLEE, SIGNATURE, ...) \ From 2b4f9034fd59eb4f26df2339812350885019ba90 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Tue, 30 Dec 2025 11:08:46 +0100 Subject: [PATCH 2/2] Implement batch_bool::mask() for riscv As a followup to #1236 --- include/xsimd/arch/xsimd_rvv.hpp | 54 ++++++++++++++++++++++ include/xsimd/types/xsimd_rvv_register.hpp | 1 + 2 files changed, 55 insertions(+) diff --git a/include/xsimd/arch/xsimd_rvv.hpp b/include/xsimd/arch/xsimd_rvv.hpp index fddaf7e20..9e4098d30 100644 --- a/include/xsimd/arch/xsimd_rvv.hpp +++ b/include/xsimd/arch/xsimd_rvv.hpp @@ -14,6 +14,7 @@ #include #include +#include "../types/xsimd_batch_constant.hpp" #include "../types/xsimd_rvv_register.hpp" #include "xsimd_constants.hpp" @@ -1504,6 +1505,59 @@ namespace xsimd const auto mask = abs(arg) < constants::maxflint>(); return select(mask, to_float(detail::rvvfcvt_default(arg)), arg, rvv {}); } + + // mask + template + XSIMD_INLINE uint64_t mask(batch_bool const& self, requires_arch) noexcept; + + template + XSIMD_INLINE uint64_t mask(batch_bool const& self, requires_arch) noexcept + { + XSIMD_IF_CONSTEXPR((8 * sizeof(T)) >= batch_bool::size) + { + // (A) Easy case: the number of slots fits in T. + const auto zero = detail::broadcast, types::detail::rvv_width_m1>(T(0)); + auto ones = detail::broadcast, A::width>(1); + auto iota = detail::rvvid(as_unsigned_integer_t {}); + auto upowers = detail::rvvsll(ones, iota); + auto r = __riscv_vredor(self.data.as_mask(), upowers, (typename decltype(zero)::register_type)zero, batch_bool::size); + return detail::reduce_scalar>(r); + } + else XSIMD_IF_CONSTEXPR((2 * 8 * sizeof(T)) == batch_bool::size) + { + // (B) We need two rounds, one for the low part, one for the high part. + + struct LowerHalf + { + static constexpr bool get(unsigned i, unsigned n) { return i < n / 2; } + }; + struct UpperHalf + { + static constexpr bool get(unsigned i, unsigned n) { return i >= n / 2; } + }; + + // The low part is similar to the approach in (A). + const auto zero = detail::broadcast, types::detail::rvv_width_m1>(T(0)); + auto ones = detail::broadcast, A::width>(1); + auto iota = detail::rvvid(as_unsigned_integer_t {}); + auto upowers = detail::rvvsll(ones, iota); + auto low_mask = self & make_batch_bool_constant(); + auto r_low = __riscv_vredor(low_mask.data.as_mask(), upowers, (typename decltype(zero)::register_type)zero, batch_bool::size); + + // The high part requires to slide the upower filter to match the high mask. + upowers = detail::rvvslideup(upowers, upowers, 8 * sizeof(T)); + auto high_mask = self & make_batch_bool_constant(); + auto r_high = __riscv_vredor(high_mask.data.as_mask(), upowers, (typename decltype(zero)::register_type)zero, batch_bool::size); + + // Concatenate the two parts. + return (uint64_t)detail::reduce_scalar>(r_low) | ((uint64_t)detail::reduce_scalar>(r_high) << (8 * sizeof(T))); + } + else + { + // (C) we could generalize (B) but we already cover a lot of case now. + return mask(self, common {}); + } + } } // namespace kernel } // namespace xsimd diff --git a/include/xsimd/types/xsimd_rvv_register.hpp b/include/xsimd/types/xsimd_rvv_register.hpp index 8ebeda48f..83f8bfcfd 100644 --- a/include/xsimd/types/xsimd_rvv_register.hpp +++ b/include/xsimd/types/xsimd_rvv_register.hpp @@ -391,6 +391,7 @@ namespace xsimd { } operator type() const noexcept { return bool_info::bitcast(value); } + type as_mask() const noexcept { return (type) * this; } }; template