Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 0 additions & 24 deletions include/xsimd/arch/xsimd_avx512bw.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -684,18 +684,6 @@ namespace xsimd
return bitwise_cast<int16_t>(swizzle(bitwise_cast<uint16_t>(self), mask, avx512bw {}));
}

template <class A>
XSIMD_INLINE batch<uint8_t, A> swizzle(batch<uint8_t, A> const& self, batch<uint8_t, A> mask, requires_arch<avx512bw>) noexcept
{
return _mm512_shuffle_epi8(self, mask);
}

template <class A>
XSIMD_INLINE batch<int8_t, A> swizzle(batch<int8_t, A> const& self, batch<uint8_t, A> mask, requires_arch<avx512bw>) noexcept
{
return bitwise_cast<int8_t>(swizzle(bitwise_cast<uint8_t>(self), mask, avx512bw {}));
}

// swizzle (static version)
template <class A, uint16_t... Vs>
XSIMD_INLINE batch<uint16_t, A> swizzle(batch<uint16_t, A> const& self, batch_constant<uint16_t, A, Vs...> mask, requires_arch<avx512bw>) noexcept
Expand All @@ -709,18 +697,6 @@ namespace xsimd
return swizzle(self, mask.as_batch(), avx512bw {});
}

template <class A, uint8_t... Vs>
XSIMD_INLINE batch<uint8_t, A> swizzle(batch<uint8_t, A> const& self, batch_constant<uint8_t, A, Vs...> mask, requires_arch<avx512bw>) noexcept
{
return swizzle(self, mask.as_batch(), avx512bw {});
}

template <class A, uint8_t... Vs>
XSIMD_INLINE batch<int8_t, A> swizzle(batch<int8_t, A> const& self, batch_constant<uint8_t, A, Vs...> mask, requires_arch<avx512bw>) noexcept
{
return swizzle(self, mask.as_batch(), avx512bw {});
}

// zip_hi
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> zip_hi(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
Expand Down
25 changes: 25 additions & 0 deletions include/xsimd/arch/xsimd_avx512vbmi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,31 @@ namespace xsimd
return _mm512_maskz_permutexvar_epi8(mask, _mm512_load_epi32(slide_pattern.data()), x);
}

// swizzle (dynamic version)
template <class A>
XSIMD_INLINE batch<uint8_t, A> swizzle(batch<uint8_t, A> const& self, batch<uint8_t, A> mask, requires_arch<avx512vbmi>) noexcept
{
return _mm512_permutexvar_epi8(mask, self);
}

template <class A>
XSIMD_INLINE batch<int8_t, A> swizzle(batch<int8_t, A> const& self, batch<uint8_t, A> mask, requires_arch<avx512vbmi>) noexcept
{
return bitwise_cast<int8_t>(swizzle(bitwise_cast<uint8_t>(self), mask, avx512vbmi {}));
}

// swizzle (static version)
template <class A, uint8_t... Vs>
XSIMD_INLINE batch<uint8_t, A> swizzle(batch<uint8_t, A> const& self, batch_constant<uint8_t, A, Vs...> mask, requires_arch<avx512vbmi>) noexcept
{
return swizzle(self, mask.as_batch(), avx512vbmi {});
}

template <class A, uint8_t... Vs>
XSIMD_INLINE batch<int8_t, A> swizzle(batch<int8_t, A> const& self, batch_constant<uint8_t, A, Vs...> mask, requires_arch<avx512vbmi>) noexcept
{
return swizzle(self, mask.as_batch(), avx512vbmi {});
}
}
}

Expand Down
73 changes: 73 additions & 0 deletions include/xsimd/arch/xsimd_avx512vbmi2.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/***************************************************************************
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
* Martin Renou *
* Copyright (c) QuantStack *
* Copyright (c) Serge Guelton *
* *
* Distributed under the terms of the BSD 3-Clause License. *
* *
* The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/

#ifndef XSIMD_AVX512VBMI2_HPP
#define XSIMD_AVX512VBMI2_HPP

#include <array>
#include <type_traits>

#include "../types/xsimd_avx512vbmi2_register.hpp"

namespace xsimd
{

namespace kernel
{
using namespace types;

// compress
template <class A>
XSIMD_INLINE batch<int16_t, A> compress(batch<int16_t, A> const& self, batch_bool<int16_t, A> const& mask, requires_arch<avx512vbmi2>) noexcept
{
return _mm512_maskz_compress_epi16(mask.mask(), self);
}
template <class A>
XSIMD_INLINE batch<uint16_t, A> compress(batch<uint16_t, A> const& self, batch_bool<uint16_t, A> const& mask, requires_arch<avx512vbmi2>) noexcept
{
return _mm512_maskz_compress_epi16(mask.mask(), self);
}
template <class A>
XSIMD_INLINE batch<int8_t, A> compress(batch<int8_t, A> const& self, batch_bool<int8_t, A> const& mask, requires_arch<avx512vbmi2>) noexcept
{
return _mm512_maskz_compress_epi8(mask.mask(), self);
}
template <class A>
XSIMD_INLINE batch<uint8_t, A> compress(batch<uint8_t, A> const& self, batch_bool<uint8_t, A> const& mask, requires_arch<avx512vbmi2>) noexcept
{
return _mm512_maskz_compress_epi8(mask.mask(), self);
}

// expand
template <class A>
XSIMD_INLINE batch<int16_t, A> expand(batch<int16_t, A> const& self, batch_bool<int16_t, A> const& mask, requires_arch<avx512vbmi2>) noexcept
{
return _mm512_maskz_expand_epi16(mask.mask(), self);
}
template <class A>
XSIMD_INLINE batch<uint16_t, A> expand(batch<uint16_t, A> const& self, batch_bool<uint16_t, A> const& mask, requires_arch<avx512vbmi2>) noexcept
{
return _mm512_maskz_expand_epi16(mask.mask(), self);
}
template <class A>
XSIMD_INLINE batch<int8_t, A> expand(batch<int8_t, A> const& self, batch_bool<int8_t, A> const& mask, requires_arch<avx512vbmi2>) noexcept
{
return _mm512_maskz_expand_epi8(mask.mask(), self);
}
template <class A>
XSIMD_INLINE batch<uint8_t, A> expand(batch<uint8_t, A> const& self, batch_bool<uint8_t, A> const& mask, requires_arch<avx512vbmi2>) noexcept
{
return _mm512_maskz_expand_epi8(mask.mask(), self);
}
}
}

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@
* The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/

#ifndef XSIMD_AVX512VNNI_AVX512VBMI_HPP
#define XSIMD_AVX512VNNI_AVX512VBMI_HPP
#ifndef XSIMD_AVX512VNNI_AVX512VBMI2_HPP
#define XSIMD_AVX512VNNI_AVX512VBMI2_HPP

#include <array>
#include <type_traits>

#include "../types/xsimd_avx512vnni_avx512vbmi_register.hpp"
#include "../types/xsimd_avx512vnni_avx512vbmi2_register.hpp"

#endif
8 changes: 6 additions & 2 deletions include/xsimd/arch/xsimd_isa.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,16 @@
#include "./xsimd_avx512vbmi.hpp"
#endif

#if XSIMD_WITH_AVX512VBMI2
#include "./xsimd_avx512vbmi2.hpp"
#endif

#if XSIMD_WITH_AVX512VNNI_AVX512BW
#include "./xsimd_avx512vnni_avx512bw.hpp"
#endif

#if XSIMD_WITH_AVX512VNNI_AVX512VBMI
#include "./xsimd_avx512vnni_avx512vbmi.hpp"
#if XSIMD_WITH_AVX512VNNI_AVX512VBMI2
#include "./xsimd_avx512vnni_avx512vbmi2.hpp"
#endif

#if XSIMD_WITH_NEON
Expand Down
2 changes: 1 addition & 1 deletion include/xsimd/config/xsimd_arch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ namespace xsimd
} // namespace detail

using all_x86_architectures = arch_list<
avx512vnni<avx512vbmi>, avx512vbmi, avx512ifma, avx512pf, avx512vnni<avx512bw>, avx512bw, avx512er, avx512dq, avx512cd, avx512f,
avx512vnni<avx512vbmi2>, avx512vbmi2, avx512vbmi, avx512ifma, avx512pf, avx512vnni<avx512bw>, avx512bw, avx512er, avx512dq, avx512cd, avx512f,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you motivate this change? Why would composition between avx512 VNNI be more legit with VMBI2 rather than VBMI?

Copy link
Contributor Author

@junparser junparser Apr 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made this change based on https://en.wikichip.org/wiki/x86/avx512_vnni. The table shows that all of the arch have vbmi2 as well as vnni.

avxvnni, fma3<avx2>, avx2, fma3<avx>, avx, fma4, fma3<sse4_2>,
sse4_2, sse4_1, /*sse4a,*/ ssse3, sse3, sse2>;

Expand Down
21 changes: 16 additions & 5 deletions include/xsimd/config/xsimd_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,24 +299,35 @@
#define XSIMD_WITH_AVX512VBMI 0
#endif

/**
* @ingroup xsimd_config_macro
*
* Set to 1 if AVX512VBMI2 is available at compile-time, to 0 otherwise.
*/
#ifdef __AVX512VBMI2__
#define XSIMD_WITH_AVX512VBMI2 XSIMD_WITH_AVX512F
#else
#define XSIMD_WITH_AVX512VBMI2 0
#endif

/**
* @ingroup xsimd_config_macro
*
* Set to 1 if AVX512VNNI is available at compile-time, to 0 otherwise.
*/
#ifdef __AVX512VNNI__

#if XSIMD_WITH_AVX512VBMI
#define XSIMD_WITH_AVX512VNNI_AVX512VBMI XSIMD_WITH_AVX512F
#if XSIMD_WITH_AVX512VBMI2
#define XSIMD_WITH_AVX512VNNI_AVX512VBMI2 XSIMD_WITH_AVX512F
#define XSIMD_WITH_AVX512VNNI_AVX512BW XSIMD_WITH_AVX512F
#else
#define XSIMD_WITH_AVX512VNNI_AVX512VBMI 0
#define XSIMD_WITH_AVX512VNNI_AVX512VBMI2 0
#define XSIMD_WITH_AVX512VNNI_AVX512BW XSIMD_WITH_AVX512F
#endif

#else

#define XSIMD_WITH_AVX512VNNI_AVX512VBMI 0
#define XSIMD_WITH_AVX512VNNI_AVX512VBMI2 0
#define XSIMD_WITH_AVX512VNNI_AVX512BW 0

#endif
Expand Down Expand Up @@ -455,7 +466,7 @@

#endif

#if !XSIMD_WITH_SSE2 && !XSIMD_WITH_SSE3 && !XSIMD_WITH_SSSE3 && !XSIMD_WITH_SSE4_1 && !XSIMD_WITH_SSE4_2 && !XSIMD_WITH_AVX && !XSIMD_WITH_AVX2 && !XSIMD_WITH_AVXVNNI && !XSIMD_WITH_FMA3_SSE && !XSIMD_WITH_FMA4 && !XSIMD_WITH_FMA3_AVX && !XSIMD_WITH_FMA3_AVX2 && !XSIMD_WITH_AVX512F && !XSIMD_WITH_AVX512CD && !XSIMD_WITH_AVX512DQ && !XSIMD_WITH_AVX512BW && !XSIMD_WITH_AVX512ER && !XSIMD_WITH_AVX512PF && !XSIMD_WITH_AVX512IFMA && !XSIMD_WITH_AVX512VBMI && !XSIMD_WITH_NEON && !XSIMD_WITH_NEON64 && !XSIMD_WITH_SVE && !XSIMD_WITH_RVV && !XSIMD_WITH_WASM
#if !XSIMD_WITH_SSE2 && !XSIMD_WITH_SSE3 && !XSIMD_WITH_SSSE3 && !XSIMD_WITH_SSE4_1 && !XSIMD_WITH_SSE4_2 && !XSIMD_WITH_AVX && !XSIMD_WITH_AVX2 && !XSIMD_WITH_AVXVNNI && !XSIMD_WITH_FMA3_SSE && !XSIMD_WITH_FMA4 && !XSIMD_WITH_FMA3_AVX && !XSIMD_WITH_FMA3_AVX2 && !XSIMD_WITH_AVX512F && !XSIMD_WITH_AVX512CD && !XSIMD_WITH_AVX512DQ && !XSIMD_WITH_AVX512BW && !XSIMD_WITH_AVX512ER && !XSIMD_WITH_AVX512PF && !XSIMD_WITH_AVX512IFMA && !XSIMD_WITH_AVX512VBMI && !XSIMD_WITH_AVX512VBMI2 && !XSIMD_WITH_NEON && !XSIMD_WITH_NEON64 && !XSIMD_WITH_SVE && !XSIMD_WITH_RVV && !XSIMD_WITH_WASM
#define XSIMD_NO_SUPPORTED_ARCHITECTURE
#endif

Expand Down
6 changes: 4 additions & 2 deletions include/xsimd/config/xsimd_cpuid.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,9 @@ namespace xsimd
ARCH_FIELD(avx512pf)
ARCH_FIELD(avx512ifma)
ARCH_FIELD(avx512vbmi)
ARCH_FIELD(avx512vbmi2)
ARCH_FIELD_EX(avx512vnni<::xsimd::avx512bw>, avx512vnni_bw)
ARCH_FIELD_EX(avx512vnni<::xsimd::avx512vbmi>, avx512vnni_vbmi)
ARCH_FIELD_EX(avx512vnni<::xsimd::avx512vbmi2>, avx512vnni_vbmi2)
ARCH_FIELD(neon)
ARCH_FIELD(neon64)
ARCH_FIELD_EX(i8mm<::xsimd::neon64>, i8mm_neon64)
Expand Down Expand Up @@ -245,8 +246,9 @@ namespace xsimd
avx512pf = regs7[1] >> 26 & avx512_state_os_enabled;
avx512ifma = regs7[1] >> 21 & avx512_state_os_enabled;
avx512vbmi = regs7[2] >> 1 & avx512_state_os_enabled;
avx512vbmi2 = regs7[2] >> 6 & avx512_state_os_enabled;
avx512vnni_bw = regs7[2] >> 11 & avx512_state_os_enabled;
avx512vnni_vbmi = avx512vbmi && avx512vnni_bw;
avx512vnni_vbmi2 = avx512vbmi2 && avx512vnni_bw;
#endif
}
};
Expand Down
3 changes: 2 additions & 1 deletion include/xsimd/types/xsimd_all_registers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@
#include "xsimd_fma3_avx_register.hpp"

#include "xsimd_avx512vnni_avx512bw_register.hpp"
#include "xsimd_avx512vnni_avx512vbmi_register.hpp"
#include "xsimd_avx512vnni_avx512vbmi2_register.hpp"

#include "xsimd_avx512ifma_register.hpp"
#include "xsimd_avx512vbmi2_register.hpp"
#include "xsimd_avx512vbmi_register.hpp"

#include "xsimd_avx512er_register.hpp"
Expand Down
51 changes: 51 additions & 0 deletions include/xsimd/types/xsimd_avx512vbmi2_register.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/***************************************************************************
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
* Martin Renou *
* Copyright (c) QuantStack *
* Copyright (c) Serge Guelton *
* *
* Distributed under the terms of the BSD 3-Clause License. *
* *
* The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/

#ifndef XSIMD_AVX512VBMI2_REGISTER_HPP
#define XSIMD_AVX512VBMI2_REGISTER_HPP

#include "./xsimd_avx512vbmi_register.hpp"

namespace xsimd
{

/**
* @ingroup architectures
*
* AVX512VBMI instructions
*/
struct avx512vbmi2 : avx512vbmi
{
static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VBMI2; }
static constexpr bool available() noexcept { return true; }
static constexpr char const* name() noexcept { return "avx512vbmi2"; }
};

#if XSIMD_WITH_AVX512VBMI2

#if !XSIMD_WITH_AVX512VBMI
#error "architecture inconsistency: avx512vbmi2 requires avx512vbmi"
#endif

namespace types
{
template <class T>
struct get_bool_simd_register<T, avx512vbmi2>
{
using type = simd_avx512_bool_register<T>;
};

XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vbmi2, avx512vbmi);

}
#endif
}
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
* The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/

#ifndef XSIMD_AVX512VNNI_AVX512VBMI_REGISTER_HPP
#define XSIMD_AVX512VNNI_AVX512VBMI_REGISTER_HPP
#ifndef XSIMD_AVX512VNNI_AVX512VBMI2_REGISTER_HPP
#define XSIMD_AVX512VNNI_AVX512VBMI2_REGISTER_HPP

#include "./xsimd_avx512vbmi_register.hpp"
#include "./xsimd_avx512vbmi2_register.hpp"

namespace xsimd
{
Expand All @@ -25,29 +25,28 @@ namespace xsimd
* AVX512VNNI instructions
*/
template <>
struct avx512vnni<avx512vbmi> : avx512vbmi
struct avx512vnni<avx512vbmi2> : avx512vbmi2
{
static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VNNI_AVX512VBMI; }
static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VNNI_AVX512VBMI2; }
static constexpr bool available() noexcept { return true; }
static constexpr char const* name() noexcept { return "avx512vnni+avx512vbmi"; }
static constexpr char const* name() noexcept { return "avx512vnni+avx512vbmi2"; }
};

#if XSIMD_WITH_AVX512VNNI_AVX512VBMI
#if XSIMD_WITH_AVX512VNNI_AVX512VBMI2

#if !XSIMD_WITH_AVX512VBMI
#error "architecture inconsistency: avx512vnni+avx512vbmi requires avx512vbmi"
#if !XSIMD_WITH_AVX512VBMI2
#error "architecture inconsistency: avx512vnni+avx512vbmi2 requires avx512vbmi2"
#endif

namespace types
{
template <class T>
struct get_bool_simd_register<T, avx512vnni<avx512vbmi>>
struct get_bool_simd_register<T, avx512vnni<avx512vbmi2>>
{
using type = simd_avx512_bool_register<T>;
};

XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vnni<avx512vbmi>, avx512vbmi);

XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vnni<avx512vbmi2>, avx512vbmi2);
}
#endif
}
Expand Down
4 changes: 2 additions & 2 deletions test/test_shuffle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ struct compress_test
}
};

TEST_CASE_TEMPLATE("[compress]", B, BATCH_FLOAT_TYPES, xsimd::batch<uint32_t>, xsimd::batch<int32_t>, xsimd::batch<uint64_t>, xsimd::batch<int64_t>)
TEST_CASE_TEMPLATE("[compress]", B, BATCH_FLOAT_TYPES, xsimd::batch<uint8_t>, xsimd::batch<int8_t>, xsimd::batch<uint16_t>, xsimd::batch<int16_t>, xsimd::batch<uint32_t>, xsimd::batch<int32_t>, xsimd::batch<uint64_t>, xsimd::batch<int64_t>)
{
compress_test<B> Test;
SUBCASE("empty")
Expand Down Expand Up @@ -443,7 +443,7 @@ struct expand_test
}
};

TEST_CASE_TEMPLATE("[expand]", B, BATCH_FLOAT_TYPES, xsimd::batch<uint32_t>, xsimd::batch<int32_t>, xsimd::batch<uint64_t>, xsimd::batch<int64_t>)
TEST_CASE_TEMPLATE("[expand]", B, BATCH_FLOAT_TYPES, xsimd::batch<uint8_t>, xsimd::batch<int8_t>, xsimd::batch<uint16_t>, xsimd::batch<int16_t>, xsimd::batch<uint32_t>, xsimd::batch<int32_t>, xsimd::batch<uint64_t>, xsimd::batch<int64_t>)
{
expand_test<B> Test;
SUBCASE("empty")
Expand Down