Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 57 additions & 6 deletions include/xsimd/arch/xsimd_rvv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <type_traits>
#include <utility>

#include "../types/xsimd_batch_constant.hpp"
#include "../types/xsimd_rvv_register.hpp"
#include "xsimd_constants.hpp"

Expand Down Expand Up @@ -135,10 +136,9 @@
template <class Ret, class... Args> \
struct impl<KEY, Ret(Args...)> \
{ \
using ctx = ctx<KEY>; \
constexpr Ret operator()(Args... args) const noexcept \
{ \
return CALLEE(args..., ctx::vl); \
return CALLEE(args..., ctx<KEY>::vl); \
}; \
};
#define XSIMD_RVV_WRAPPER_NOVL(KEY, CALLEE, ...) \
Expand All @@ -154,20 +154,18 @@
template <class Ret, class First, class... Args> \
struct impl<KEY, Ret(First, Args...)> \
{ \
using ctx = ctx<KEY>; \
constexpr Ret operator()(First, Args... args) const noexcept \
{ \
return CALLEE(args..., ctx::vl); \
return CALLEE(args..., ctx<KEY>::vl); \
}; \
};
#define XSIMD_RVV_WRAPPER_DROP_1ST_CUSTOM_ARGS(KEY, CALLEE, SIGNATURE, ...) \
template <class Ret, class First, class... Args> \
struct impl<KEY, Ret(First, Args...)> \
{ \
using ctx = ctx<KEY>; \
constexpr Ret operator()(First, Args... args) const noexcept \
{ \
return CALLEE(__VA_ARGS__, ctx::vl); \
return CALLEE(__VA_ARGS__, ctx<KEY>::vl); \
}; \
};
#define XSIMD_RVV_WRAPPER_DROP_1ST_CUSTOM_ARGS_NOVL(KEY, CALLEE, SIGNATURE, ...) \
Expand Down Expand Up @@ -1507,6 +1505,59 @@ namespace xsimd
const auto mask = abs(arg) < constants::maxflint<batch<T, A>>();
return select(mask, to_float(detail::rvvfcvt_default(arg)), arg, rvv {});
}

// mask
template <class A, class T>
XSIMD_INLINE uint64_t mask(batch_bool<T, A> const& self, requires_arch<common>) noexcept;

template <class A, class T>
XSIMD_INLINE uint64_t mask(batch_bool<T, A> const& self, requires_arch<rvv>) noexcept
{
XSIMD_IF_CONSTEXPR((8 * sizeof(T)) >= batch_bool<T, A>::size)
{
// (A) Easy case: the number of slots fits in T.
const auto zero = detail::broadcast<as_unsigned_integer_t<T>, types::detail::rvv_width_m1>(T(0));
auto ones = detail::broadcast<as_unsigned_integer_t<T>, A::width>(1);
auto iota = detail::rvvid(as_unsigned_integer_t<T> {});
auto upowers = detail::rvvsll(ones, iota);
auto r = __riscv_vredor(self.data.as_mask(), upowers, (typename decltype(zero)::register_type)zero, batch_bool<T, A>::size);
return detail::reduce_scalar<A, as_unsigned_integer_t<T>>(r);
}
else XSIMD_IF_CONSTEXPR((2 * 8 * sizeof(T)) == batch_bool<T, A>::size)
{
// (B) We need two rounds, one for the low part, one for the high part.

struct LowerHalf
{
static constexpr bool get(unsigned i, unsigned n) { return i < n / 2; }
};
struct UpperHalf
{
static constexpr bool get(unsigned i, unsigned n) { return i >= n / 2; }
};

// The low part is similar to the approach in (A).
const auto zero = detail::broadcast<as_unsigned_integer_t<T>, types::detail::rvv_width_m1>(T(0));
auto ones = detail::broadcast<as_unsigned_integer_t<T>, A::width>(1);
auto iota = detail::rvvid(as_unsigned_integer_t<T> {});
auto upowers = detail::rvvsll(ones, iota);
auto low_mask = self & make_batch_bool_constant<T, LowerHalf, A>();
auto r_low = __riscv_vredor(low_mask.data.as_mask(), upowers, (typename decltype(zero)::register_type)zero, batch_bool<T, A>::size);

// The high part requires to slide the upower filter to match the high mask.
upowers = detail::rvvslideup(upowers, upowers, 8 * sizeof(T));
auto high_mask = self & make_batch_bool_constant<T, UpperHalf, A>();
auto r_high = __riscv_vredor(high_mask.data.as_mask(), upowers, (typename decltype(zero)::register_type)zero, batch_bool<T, A>::size);

// Concatenate the two parts.
return (uint64_t)detail::reduce_scalar<A, as_unsigned_integer_t<T>>(r_low) | ((uint64_t)detail::reduce_scalar<A, as_unsigned_integer_t<T>>(r_high) << (8 * sizeof(T)));
}
else
{
// (C) we could generalize (B) but we already cover a lot of case now.
return mask(self, common {});
}
}
} // namespace kernel
} // namespace xsimd

Expand Down
1 change: 1 addition & 0 deletions include/xsimd/types/xsimd_rvv_register.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,7 @@ namespace xsimd
{
}
operator type() const noexcept { return bool_info::bitcast(value); }
type as_mask() const noexcept { return (type) * this; }
};

template <class T, size_t Width = XSIMD_RVV_BITS>
Expand Down