From b900bf4fd9df1018387f44e68d9a0a6808dda381 Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Mon, 21 Jul 2025 17:24:10 +0200 Subject: [PATCH] Fix AVX512DQ. This file was completely disabled and never included, and so not tested and broken. This patches fixes: * Missing include in the _isa.hpp * Typo in the include guard * Variable name within fast_cast * Requirements to avx512dq --- include/xsimd/arch/xsimd_avx512dq.hpp | 14 +++++++------- include/xsimd/arch/xsimd_isa.hpp | 4 ++++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/include/xsimd/arch/xsimd_avx512dq.hpp b/include/xsimd/arch/xsimd_avx512dq.hpp index 4788d19e9..c5ca19729 100644 --- a/include/xsimd/arch/xsimd_avx512dq.hpp +++ b/include/xsimd/arch/xsimd_avx512dq.hpp @@ -9,8 +9,8 @@ * The full license is in the file LICENSE, distributed with this software. * ****************************************************************************/ -#ifndef XSIMD_AVX512_DQHPP -#define XSIMD_AVX512_D_HPP +#ifndef XSIMD_AVX512DQ_HPP +#define XSIMD_AVX512DQ_HPP #include "../types/xsimd_avx512dq_register.hpp" @@ -47,12 +47,12 @@ namespace xsimd // bitwise_not template - XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept { return _mm512_xor_ps(self, _mm512_castsi512_ps(_mm512_set1_epi32(-1))); } template - XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept { return _mm512_xor_pd(self, _mm512_castsi512_pd(_mm512_set1_epi32(-1))); } @@ -96,7 +96,7 @@ namespace xsimd // tmp1 = [a0..8, b0..8] // tmp2 = [a8..f, b8..f] #define XSIMD_AVX512_HADDP_STEP1(I, a, b) \ - batch res##I; \ + batch res##I; \ { \ auto tmp1 = _mm512_shuffle_f32x4(a, b, _MM_SHUFFLE(1, 0, 1, 0)); \ auto tmp2 = _mm512_shuffle_f32x4(a, b, _MM_SHUFFLE(3, 2, 3, 2)); \ @@ -180,7 +180,7 @@ namespace xsimd // reduce_add template - XSIMD_INLINE float reduce_add(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE float reduce_add(batch const& rhs, requires_arch) noexcept { __m256 tmp1 = _mm512_extractf32x8_ps(rhs, 1); __m256 tmp2 = _mm512_extractf32x8_ps(rhs, 0); @@ -192,7 +192,7 @@ namespace xsimd namespace detail { template - XSIMD_INLINE batch fast_cast(batch const& x, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm512_cvtepi64_pd(self); } diff --git a/include/xsimd/arch/xsimd_isa.hpp b/include/xsimd/arch/xsimd_isa.hpp index f88d94f93..398d22511 100644 --- a/include/xsimd/arch/xsimd_isa.hpp +++ b/include/xsimd/arch/xsimd_isa.hpp @@ -72,6 +72,10 @@ #include "./xsimd_avx512f.hpp" #endif +#if XSIMD_WITH_AVX512DQ +#include "./xsimd_avx512dq.hpp" +#endif + #if XSIMD_WITH_AVX512BW #include "./xsimd_avx512bw.hpp" #endif