Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/xsimd/arch/common/xsimd_common_math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2117,7 +2117,7 @@ namespace xsimd
template <class Op, class A, class T>
XSIMD_INLINE T reduce(Op, batch<T, A> const& self, std::integral_constant<unsigned, 1>) noexcept
{
return self.get(0);
return ::xsimd::kernel::first(self, A {});
}

template <class Op, class A, class T, unsigned Lvl>
Expand Down
7 changes: 7 additions & 0 deletions include/xsimd/arch/xsimd_emulated.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,13 @@ namespace xsimd
return r;
}

// first
template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
T XSIMD_INLINE first(batch<T, A> const& self, requires_arch<emulated<N>>) noexcept
{
return self.data[0];
}

#if 0
// count
template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
Expand Down
13 changes: 13 additions & 0 deletions include/xsimd/arch/xsimd_rvv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1335,6 +1335,19 @@ namespace xsimd
return result;
}

// first
template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE T first(batch<T, A> const& arg, requires_arch<rvv>) noexcept
{
return detail::rvvmv_lane0(arg);
}

template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE std::complex<T> first(batch<std::complex<T>, A> const& arg, requires_arch<rvv>) noexcept
{
return std::complex<T> { detail::rvvmv_lane0(arg.real()), detail::rvvmv_lane0(arg.imag()) };
}

// insert
template <class A, class T, size_t I, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& arg, T val, index<I>, requires_arch<rvv>) noexcept
Expand Down
102 changes: 51 additions & 51 deletions include/xsimd/arch/xsimd_sse2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,53 @@ namespace xsimd
return _mm_castsi128_pd(_mm_cmpeq_epi32(_mm_castpd_si128(self), _mm_castpd_si128(other)));
}

// first
template <class A>
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<sse2>) noexcept
{
return _mm_cvtss_f32(self);
}

template <class A>
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<sse2>) noexcept
{
return _mm_cvtsd_f64(self);
}

template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<sse2>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFF);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFFFF);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{
return static_cast<T>(_mm_cvtsi128_si32(self));
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{
#if defined(__x86_64__)
return static_cast<T>(_mm_cvtsi128_si64(self));
#else
__m128i m;
_mm_storel_epi64(&m, self);
int64_t i;
std::memcpy(&i, &m, sizeof(i));
return i;
#endif
}
else
{
assert(false && "unsupported arch/op combination");
return {};
}
}

// from_mask
template <class A>
XSIMD_INLINE batch_bool<float, A> from_mask(batch_bool<float, A> const&, uint64_t mask, requires_arch<sse2>) noexcept
Expand Down Expand Up @@ -1269,10 +1316,10 @@ namespace xsimd
batch<T, A> step2 = _mm_shufflelo_epi16(acc1, mask2);
batch<T, A> acc2 = max(acc1, step2);
if (sizeof(T) == 2)
return acc2.get(0);
return first(acc2, A {});
batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t>(acc2) >> 8);
batch<T, A> acc3 = max(acc2, step3);
return acc3.get(0);
return first(acc3, A {});
}

// reduce_min
Expand All @@ -1291,10 +1338,10 @@ namespace xsimd
batch<T, A> step2 = _mm_shufflelo_epi16(acc1, mask2);
batch<T, A> acc2 = min(acc1, step2);
if (sizeof(T) == 2)
return acc2.get(0);
return first(acc2, A {});
batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t>(acc2) >> 8);
batch<T, A> acc3 = min(acc2, step3);
return acc3.get(0);
return first(acc3, A {});
}

// rsqrt
Expand Down Expand Up @@ -1783,53 +1830,6 @@ namespace xsimd
return _mm_unpacklo_pd(self, other);
}

// first
template <class A>
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<sse2>) noexcept
{
return _mm_cvtss_f32(self);
}

template <class A>
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<sse2>) noexcept
{
return _mm_cvtsd_f64(self);
}

template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<sse2>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFF);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFFFF);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{
return static_cast<T>(_mm_cvtsi128_si32(self));
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{
#if defined(__x86_64__)
return static_cast<T>(_mm_cvtsi128_si64(self));
#else
__m128i m;
_mm_storel_epi64(&m, self);
int64_t i;
std::memcpy(&i, &m, sizeof(i));
return i;
#endif
}
else
{
assert(false && "unsupported arch/op combination");
return {};
}
}

}
}

Expand Down
7 changes: 7 additions & 0 deletions include/xsimd/arch/xsimd_sve.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -949,6 +949,13 @@ namespace xsimd
return svsel(index_predicate, broadcast<A, T>(val, sve {}), arg);
}

// first
template <class A, class T, detail::sve_enable_all_t<T> = 0>
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<sve>) noexcept
{
return self.data[0];
}

// all
template <class A, class T, detail::sve_enable_all_t<T> = 0>
XSIMD_INLINE bool all(batch_bool<T, A> const& arg, requires_arch<sve>) noexcept
Expand Down
36 changes: 36 additions & 0 deletions include/xsimd/arch/xsimd_wasm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,42 @@ namespace xsimd
static_cast<int32_t>(wasm_f32x4_extract_lane(self, 3)));
}
}
// first
template <class A>
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<wasm>) noexcept
{
return wasm_f32x4_extract_lane(self, 0);
}
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE T first(batch<T, A> const& self, requires_arch<wasm>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
return wasm_i8x16_extract_lane(self, 0);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
return wasm_i16x8_extract_lane(self, 0);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{
return wasm_i32x4_extract_lane(self, 0);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{
return wasm_i64x2_extract_lane(self, 0);
}
else
{
assert(false && "unsupported arch/op combination");
return {};
}
}
template <class A>
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<wasm>) noexcept
{
return wasm_f64x2_extract_lane(self, 0);
}

// floor
template <class A>
Expand Down