diff --git a/include/xsimd/arch/xsimd_avx512bw.hpp b/include/xsimd/arch/xsimd_avx512bw.hpp index 1c9416d4d..136adf1fe 100644 --- a/include/xsimd/arch/xsimd_avx512bw.hpp +++ b/include/xsimd/arch/xsimd_avx512bw.hpp @@ -684,18 +684,6 @@ namespace xsimd return bitwise_cast(swizzle(bitwise_cast(self), mask, avx512bw {})); } - template - XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept - { - return _mm512_shuffle_epi8(self, mask); - } - - template - XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept - { - return bitwise_cast(swizzle(bitwise_cast(self), mask, avx512bw {})); - } - // swizzle (static version) template XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept @@ -709,18 +697,6 @@ namespace xsimd return swizzle(self, mask.as_batch(), avx512bw {}); } - template - XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept - { - return swizzle(self, mask.as_batch(), avx512bw {}); - } - - template - XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept - { - return swizzle(self, mask.as_batch(), avx512bw {}); - } - // zip_hi template ::value, void>::type> XSIMD_INLINE batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept diff --git a/include/xsimd/arch/xsimd_avx512vbmi.hpp b/include/xsimd/arch/xsimd_avx512vbmi.hpp index d17b674c7..680b23836 100644 --- a/include/xsimd/arch/xsimd_avx512vbmi.hpp +++ b/include/xsimd/arch/xsimd_avx512vbmi.hpp @@ -74,6 +74,31 @@ namespace xsimd return _mm512_maskz_permutexvar_epi8(mask, _mm512_load_epi32(slide_pattern.data()), x); } + // swizzle (dynamic version) + template + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept + { + return _mm512_permutexvar_epi8(mask, self); + } + + template + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept + { + return bitwise_cast(swizzle(bitwise_cast(self), mask, avx512vbmi {})); + } + + // swizzle (static version) + template + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + { + return swizzle(self, mask.as_batch(), avx512vbmi {}); + } + + template + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + { + return swizzle(self, mask.as_batch(), avx512vbmi {}); + } } } diff --git a/include/xsimd/arch/xsimd_avx512vbmi2.hpp b/include/xsimd/arch/xsimd_avx512vbmi2.hpp new file mode 100644 index 000000000..4c6353c32 --- /dev/null +++ b/include/xsimd/arch/xsimd_avx512vbmi2.hpp @@ -0,0 +1,73 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_AVX512VBMI2_HPP +#define XSIMD_AVX512VBMI2_HPP + +#include +#include + +#include "../types/xsimd_avx512vbmi2_register.hpp" + +namespace xsimd +{ + + namespace kernel + { + using namespace types; + + // compress + template + XSIMD_INLINE batch compress(batch const& self, batch_bool const& mask, requires_arch) noexcept + { + return _mm512_maskz_compress_epi16(mask.mask(), self); + } + template + XSIMD_INLINE batch compress(batch const& self, batch_bool const& mask, requires_arch) noexcept + { + return _mm512_maskz_compress_epi16(mask.mask(), self); + } + template + XSIMD_INLINE batch compress(batch const& self, batch_bool const& mask, requires_arch) noexcept + { + return _mm512_maskz_compress_epi8(mask.mask(), self); + } + template + XSIMD_INLINE batch compress(batch const& self, batch_bool const& mask, requires_arch) noexcept + { + return _mm512_maskz_compress_epi8(mask.mask(), self); + } + + // expand + template + XSIMD_INLINE batch expand(batch const& self, batch_bool const& mask, requires_arch) noexcept + { + return _mm512_maskz_expand_epi16(mask.mask(), self); + } + template + XSIMD_INLINE batch expand(batch const& self, batch_bool const& mask, requires_arch) noexcept + { + return _mm512_maskz_expand_epi16(mask.mask(), self); + } + template + XSIMD_INLINE batch expand(batch const& self, batch_bool const& mask, requires_arch) noexcept + { + return _mm512_maskz_expand_epi8(mask.mask(), self); + } + template + XSIMD_INLINE batch expand(batch const& self, batch_bool const& mask, requires_arch) noexcept + { + return _mm512_maskz_expand_epi8(mask.mask(), self); + } + } +} + +#endif diff --git a/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi.hpp b/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp similarity index 85% rename from include/xsimd/arch/xsimd_avx512vnni_avx512vbmi.hpp rename to include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp index a70d30fad..0b4ffd2e4 100644 --- a/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi.hpp +++ b/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp @@ -9,12 +9,12 @@ * The full license is in the file LICENSE, distributed with this software. * ****************************************************************************/ -#ifndef XSIMD_AVX512VNNI_AVX512VBMI_HPP -#define XSIMD_AVX512VNNI_AVX512VBMI_HPP +#ifndef XSIMD_AVX512VNNI_AVX512VBMI2_HPP +#define XSIMD_AVX512VNNI_AVX512VBMI2_HPP #include #include -#include "../types/xsimd_avx512vnni_avx512vbmi_register.hpp" +#include "../types/xsimd_avx512vnni_avx512vbmi2_register.hpp" #endif diff --git a/include/xsimd/arch/xsimd_isa.hpp b/include/xsimd/arch/xsimd_isa.hpp index 5b714b299..a617f836d 100644 --- a/include/xsimd/arch/xsimd_isa.hpp +++ b/include/xsimd/arch/xsimd_isa.hpp @@ -92,12 +92,16 @@ #include "./xsimd_avx512vbmi.hpp" #endif +#if XSIMD_WITH_AVX512VBMI2 +#include "./xsimd_avx512vbmi2.hpp" +#endif + #if XSIMD_WITH_AVX512VNNI_AVX512BW #include "./xsimd_avx512vnni_avx512bw.hpp" #endif -#if XSIMD_WITH_AVX512VNNI_AVX512VBMI -#include "./xsimd_avx512vnni_avx512vbmi.hpp" +#if XSIMD_WITH_AVX512VNNI_AVX512VBMI2 +#include "./xsimd_avx512vnni_avx512vbmi2.hpp" #endif #if XSIMD_WITH_NEON diff --git a/include/xsimd/config/xsimd_arch.hpp b/include/xsimd/config/xsimd_arch.hpp index 39d0d581d..89fc6783d 100644 --- a/include/xsimd/config/xsimd_arch.hpp +++ b/include/xsimd/config/xsimd_arch.hpp @@ -162,7 +162,7 @@ namespace xsimd } // namespace detail using all_x86_architectures = arch_list< - avx512vnni, avx512vbmi, avx512ifma, avx512pf, avx512vnni, avx512bw, avx512er, avx512dq, avx512cd, avx512f, + avx512vnni, avx512vbmi2, avx512vbmi, avx512ifma, avx512pf, avx512vnni, avx512bw, avx512er, avx512dq, avx512cd, avx512f, avxvnni, fma3, avx2, fma3, avx, fma4, fma3, sse4_2, sse4_1, /*sse4a,*/ ssse3, sse3, sse2>; diff --git a/include/xsimd/config/xsimd_config.hpp b/include/xsimd/config/xsimd_config.hpp index 985a70a9b..2655ca936 100644 --- a/include/xsimd/config/xsimd_config.hpp +++ b/include/xsimd/config/xsimd_config.hpp @@ -299,6 +299,17 @@ #define XSIMD_WITH_AVX512VBMI 0 #endif +/** + * @ingroup xsimd_config_macro + * + * Set to 1 if AVX512VBMI2 is available at compile-time, to 0 otherwise. + */ +#ifdef __AVX512VBMI2__ +#define XSIMD_WITH_AVX512VBMI2 XSIMD_WITH_AVX512F +#else +#define XSIMD_WITH_AVX512VBMI2 0 +#endif + /** * @ingroup xsimd_config_macro * @@ -306,17 +317,17 @@ */ #ifdef __AVX512VNNI__ -#if XSIMD_WITH_AVX512VBMI -#define XSIMD_WITH_AVX512VNNI_AVX512VBMI XSIMD_WITH_AVX512F +#if XSIMD_WITH_AVX512VBMI2 +#define XSIMD_WITH_AVX512VNNI_AVX512VBMI2 XSIMD_WITH_AVX512F #define XSIMD_WITH_AVX512VNNI_AVX512BW XSIMD_WITH_AVX512F #else -#define XSIMD_WITH_AVX512VNNI_AVX512VBMI 0 +#define XSIMD_WITH_AVX512VNNI_AVX512VBMI2 0 #define XSIMD_WITH_AVX512VNNI_AVX512BW XSIMD_WITH_AVX512F #endif #else -#define XSIMD_WITH_AVX512VNNI_AVX512VBMI 0 +#define XSIMD_WITH_AVX512VNNI_AVX512VBMI2 0 #define XSIMD_WITH_AVX512VNNI_AVX512BW 0 #endif @@ -455,7 +466,7 @@ #endif -#if !XSIMD_WITH_SSE2 && !XSIMD_WITH_SSE3 && !XSIMD_WITH_SSSE3 && !XSIMD_WITH_SSE4_1 && !XSIMD_WITH_SSE4_2 && !XSIMD_WITH_AVX && !XSIMD_WITH_AVX2 && !XSIMD_WITH_AVXVNNI && !XSIMD_WITH_FMA3_SSE && !XSIMD_WITH_FMA4 && !XSIMD_WITH_FMA3_AVX && !XSIMD_WITH_FMA3_AVX2 && !XSIMD_WITH_AVX512F && !XSIMD_WITH_AVX512CD && !XSIMD_WITH_AVX512DQ && !XSIMD_WITH_AVX512BW && !XSIMD_WITH_AVX512ER && !XSIMD_WITH_AVX512PF && !XSIMD_WITH_AVX512IFMA && !XSIMD_WITH_AVX512VBMI && !XSIMD_WITH_NEON && !XSIMD_WITH_NEON64 && !XSIMD_WITH_SVE && !XSIMD_WITH_RVV && !XSIMD_WITH_WASM +#if !XSIMD_WITH_SSE2 && !XSIMD_WITH_SSE3 && !XSIMD_WITH_SSSE3 && !XSIMD_WITH_SSE4_1 && !XSIMD_WITH_SSE4_2 && !XSIMD_WITH_AVX && !XSIMD_WITH_AVX2 && !XSIMD_WITH_AVXVNNI && !XSIMD_WITH_FMA3_SSE && !XSIMD_WITH_FMA4 && !XSIMD_WITH_FMA3_AVX && !XSIMD_WITH_FMA3_AVX2 && !XSIMD_WITH_AVX512F && !XSIMD_WITH_AVX512CD && !XSIMD_WITH_AVX512DQ && !XSIMD_WITH_AVX512BW && !XSIMD_WITH_AVX512ER && !XSIMD_WITH_AVX512PF && !XSIMD_WITH_AVX512IFMA && !XSIMD_WITH_AVX512VBMI && !XSIMD_WITH_AVX512VBMI2 && !XSIMD_WITH_NEON && !XSIMD_WITH_NEON64 && !XSIMD_WITH_SVE && !XSIMD_WITH_RVV && !XSIMD_WITH_WASM #define XSIMD_NO_SUPPORTED_ARCHITECTURE #endif diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp index 8021fceb8..7b940f655 100644 --- a/include/xsimd/config/xsimd_cpuid.hpp +++ b/include/xsimd/config/xsimd_cpuid.hpp @@ -71,8 +71,9 @@ namespace xsimd ARCH_FIELD(avx512pf) ARCH_FIELD(avx512ifma) ARCH_FIELD(avx512vbmi) + ARCH_FIELD(avx512vbmi2) ARCH_FIELD_EX(avx512vnni<::xsimd::avx512bw>, avx512vnni_bw) - ARCH_FIELD_EX(avx512vnni<::xsimd::avx512vbmi>, avx512vnni_vbmi) + ARCH_FIELD_EX(avx512vnni<::xsimd::avx512vbmi2>, avx512vnni_vbmi2) ARCH_FIELD(neon) ARCH_FIELD(neon64) ARCH_FIELD_EX(i8mm<::xsimd::neon64>, i8mm_neon64) @@ -245,8 +246,9 @@ namespace xsimd avx512pf = regs7[1] >> 26 & avx512_state_os_enabled; avx512ifma = regs7[1] >> 21 & avx512_state_os_enabled; avx512vbmi = regs7[2] >> 1 & avx512_state_os_enabled; + avx512vbmi2 = regs7[2] >> 6 & avx512_state_os_enabled; avx512vnni_bw = regs7[2] >> 11 & avx512_state_os_enabled; - avx512vnni_vbmi = avx512vbmi && avx512vnni_bw; + avx512vnni_vbmi2 = avx512vbmi2 && avx512vnni_bw; #endif } }; diff --git a/include/xsimd/types/xsimd_all_registers.hpp b/include/xsimd/types/xsimd_all_registers.hpp index 6d024a167..a652061a8 100644 --- a/include/xsimd/types/xsimd_all_registers.hpp +++ b/include/xsimd/types/xsimd_all_registers.hpp @@ -23,9 +23,10 @@ #include "xsimd_fma3_avx_register.hpp" #include "xsimd_avx512vnni_avx512bw_register.hpp" -#include "xsimd_avx512vnni_avx512vbmi_register.hpp" +#include "xsimd_avx512vnni_avx512vbmi2_register.hpp" #include "xsimd_avx512ifma_register.hpp" +#include "xsimd_avx512vbmi2_register.hpp" #include "xsimd_avx512vbmi_register.hpp" #include "xsimd_avx512er_register.hpp" diff --git a/include/xsimd/types/xsimd_avx512vbmi2_register.hpp b/include/xsimd/types/xsimd_avx512vbmi2_register.hpp new file mode 100644 index 000000000..c2b0f0f76 --- /dev/null +++ b/include/xsimd/types/xsimd_avx512vbmi2_register.hpp @@ -0,0 +1,51 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_AVX512VBMI2_REGISTER_HPP +#define XSIMD_AVX512VBMI2_REGISTER_HPP + +#include "./xsimd_avx512vbmi_register.hpp" + +namespace xsimd +{ + + /** + * @ingroup architectures + * + * AVX512VBMI instructions + */ + struct avx512vbmi2 : avx512vbmi + { + static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VBMI2; } + static constexpr bool available() noexcept { return true; } + static constexpr char const* name() noexcept { return "avx512vbmi2"; } + }; + +#if XSIMD_WITH_AVX512VBMI2 + +#if !XSIMD_WITH_AVX512VBMI +#error "architecture inconsistency: avx512vbmi2 requires avx512vbmi" +#endif + + namespace types + { + template + struct get_bool_simd_register + { + using type = simd_avx512_bool_register; + }; + + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vbmi2, avx512vbmi); + + } +#endif +} +#endif diff --git a/include/xsimd/types/xsimd_avx512vnni_avx512vbmi_register.hpp b/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp similarity index 70% rename from include/xsimd/types/xsimd_avx512vnni_avx512vbmi_register.hpp rename to include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp index adfaff5b2..114a0b8a0 100644 --- a/include/xsimd/types/xsimd_avx512vnni_avx512vbmi_register.hpp +++ b/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp @@ -9,10 +9,10 @@ * The full license is in the file LICENSE, distributed with this software. * ****************************************************************************/ -#ifndef XSIMD_AVX512VNNI_AVX512VBMI_REGISTER_HPP -#define XSIMD_AVX512VNNI_AVX512VBMI_REGISTER_HPP +#ifndef XSIMD_AVX512VNNI_AVX512VBMI2_REGISTER_HPP +#define XSIMD_AVX512VNNI_AVX512VBMI2_REGISTER_HPP -#include "./xsimd_avx512vbmi_register.hpp" +#include "./xsimd_avx512vbmi2_register.hpp" namespace xsimd { @@ -25,29 +25,28 @@ namespace xsimd * AVX512VNNI instructions */ template <> - struct avx512vnni : avx512vbmi + struct avx512vnni : avx512vbmi2 { - static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VNNI_AVX512VBMI; } + static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VNNI_AVX512VBMI2; } static constexpr bool available() noexcept { return true; } - static constexpr char const* name() noexcept { return "avx512vnni+avx512vbmi"; } + static constexpr char const* name() noexcept { return "avx512vnni+avx512vbmi2"; } }; -#if XSIMD_WITH_AVX512VNNI_AVX512VBMI +#if XSIMD_WITH_AVX512VNNI_AVX512VBMI2 -#if !XSIMD_WITH_AVX512VBMI -#error "architecture inconsistency: avx512vnni+avx512vbmi requires avx512vbmi" +#if !XSIMD_WITH_AVX512VBMI2 +#error "architecture inconsistency: avx512vnni+avx512vbmi2 requires avx512vbmi2" #endif namespace types { template - struct get_bool_simd_register> + struct get_bool_simd_register> { using type = simd_avx512_bool_register; }; - XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vnni, avx512vbmi); - + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vnni, avx512vbmi2); } #endif } diff --git a/test/test_shuffle.cpp b/test/test_shuffle.cpp index df4bf783d..a2f66157d 100644 --- a/test/test_shuffle.cpp +++ b/test/test_shuffle.cpp @@ -347,7 +347,7 @@ struct compress_test } }; -TEST_CASE_TEMPLATE("[compress]", B, BATCH_FLOAT_TYPES, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch) +TEST_CASE_TEMPLATE("[compress]", B, BATCH_FLOAT_TYPES, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch) { compress_test Test; SUBCASE("empty") @@ -443,7 +443,7 @@ struct expand_test } }; -TEST_CASE_TEMPLATE("[expand]", B, BATCH_FLOAT_TYPES, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch) +TEST_CASE_TEMPLATE("[expand]", B, BATCH_FLOAT_TYPES, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch) { expand_test Test; SUBCASE("empty")