@@ -11,61 +11,90 @@ limitations under the License. */
1111
1212#pragma once
1313
14- #include < iostream>
1514#include " DisableCopy.h"
1615
1716namespace paddle {
1817
18+ // clang-format off
19+ enum simd_t {
20+ SIMD_NONE = 0 , // /< None
21+ SIMD_SSE = 1 << 0 , // /< SSE
22+ SIMD_SSE2 = 1 << 1 , // /< SSE 2
23+ SIMD_SSE3 = 1 << 2 , // /< SSE 3
24+ SIMD_SSSE3 = 1 << 3 , // /< SSSE 3
25+ SIMD_SSE41 = 1 << 4 , // /< SSE 4.1
26+ SIMD_SSE42 = 1 << 5 , // /< SSE 4.2
27+ SIMD_FMA3 = 1 << 6 , // /< FMA 3
28+ SIMD_FMA4 = 1 << 7 , // /< FMA 4
29+ SIMD_AVX = 1 << 8 , // /< AVX
30+ SIMD_AVX2 = 1 << 9 , // /< AVX 2
31+ SIMD_AVX512 = 1 << 10 , // /< AVX 512
32+ };
33+ // clang-format on
34+
1935class SIMDFlags final {
2036public:
2137 DISABLE_COPY (SIMDFlags);
2238
2339 SIMDFlags ();
2440
25- static SIMDFlags* instance ();
41+ static SIMDFlags const * instance ();
2642
27- inline bool isSSE () const { return simd_flags_ & SIMD_SSE; }
28- inline bool isSSE2 () const { return simd_flags_ & SIMD_SSE2; }
29- inline bool isSSE3 () const { return simd_flags_ & SIMD_SSE3; }
30- inline bool isSSSE3 () const { return simd_flags_ & SIMD_SSSE3; }
31- inline bool isSSE41 () const { return simd_flags_ & SIMD_SSE41; }
32- inline bool isSSE42 () const { return simd_flags_ & SIMD_SSE42; }
33- inline bool isFMA3 () const { return simd_flags_ & SIMD_FMA3; }
34- inline bool isFMA4 () const { return simd_flags_ & SIMD_FMA4; }
35- inline bool isAVX () const { return simd_flags_ & SIMD_AVX; }
36- inline bool isAVX2 () const { return simd_flags_ & SIMD_AVX2; }
37- inline bool isAVX512 () const { return simd_flags_ & SIMD_AVX512; }
43+ inline bool check (int flags) const {
44+ return !((simd_flags_ & flags) ^ flags);
45+ }
3846
3947private:
40- enum simd_t {
41- SIMD_NONE = 0 , // /< None
42- SIMD_SSE = 1 << 0 , // /< SSE
43- SIMD_SSE2 = 1 << 1 , // /< SSE 2
44- SIMD_SSE3 = 1 << 2 , // /< SSE 3
45- SIMD_SSSE3 = 1 << 3 , // /< SSSE 3
46- SIMD_SSE41 = 1 << 4 , // /< SSE 4.1
47- SIMD_SSE42 = 1 << 5 , // /< SSE 4.2
48- SIMD_FMA3 = 1 << 6 , // /< FMA 3
49- SIMD_FMA4 = 1 << 7 , // /< FMA 4
50- SIMD_AVX = 1 << 8 , // /< AVX
51- SIMD_AVX2 = 1 << 9 , // /< AVX 2
52- SIMD_AVX512 = 1 << 10 , // /< AVX 512
53- };
54-
55- // / simd flags
5648 int simd_flags_ = SIMD_NONE;
5749};
5850
59- #define HAS_SSE SIMDFlags::instance ()->isSSE()
60- #define HAS_SSE2 SIMDFlags::instance ()->isSSE2()
61- #define HAS_SSE3 SIMDFlags::instance ()->isSSE3()
62- #define HAS_SSSE3 SIMDFlags::instance ()->isSSSE3()
63- #define HAS_SSE41 SIMDFlags::instance ()->isSSE41()
64- #define HAS_SSE42 SIMDFlags::instance ()->isSSE42()
65- #define HAS_FMA3 SIMDFlags::instance ()->isFMA3()
66- #define HAS_FMA4 SIMDFlags::instance ()->isFMA4()
67- #define HAS_AVX SIMDFlags::instance ()->isAVX()
68- #define HAS_AVX2 SIMDFlags::instance ()->isAVX2()
69- #define HAS_AVX512 SIMDFlags::instance ()->isAVX512()
51+ /* *
52+ * @brief Check SIMD flags at runtime.
53+ *
54+ * For example.
55+ * @code{.cpp}
56+ *
57+ * if (HAS_SIMD(SIMD_AVX2 | SIMD_FMA4)) {
58+ * avx2_fm4_stub();
59+ * } else if (HAS_SIMD(SIMD_AVX)) {
60+ * avx_stub();
61+ * }
62+ *
63+ * @endcode
64+ */
65+ #define HAS_SIMD (__flags ) SIMDFlags::instance()->check (__flags)
66+
67+ /* *
68+ * @brief Check SIMD flags at runtime.
69+ *
70+ * 1. Check all SIMD flags at runtime:
71+ *
72+ * @code{.cpp}
73+ * if (HAS_AVX && HAS_AVX2) {
74+ * avx2_stub();
75+ * }
76+ * @endcod
77+ *
78+ * 2. Check one SIMD flag at runtime:
79+ *
80+ * @code{.cpp}
81+ * if (HAS_SSE41 || HAS_SSE42) {
82+ * sse4_stub();
83+ * }
84+ * @endcode
85+ */
86+ // clang-format off
87+ #define HAS_SSE HAS_SIMD (SIMD_SSE)
88+ #define HAS_SSE2 HAS_SIMD (SIMD_SSE2)
89+ #define HAS_SSE3 HAS_SIMD (SIMD_SSE3)
90+ #define HAS_SSSE3 HAS_SIMD (SIMD_SSSE3)
91+ #define HAS_SSE41 HAS_SIMD (SIMD_SSE41)
92+ #define HAS_SSE42 HAS_SIMD (SIMD_SSE42)
93+ #define HAS_FMA3 HAS_SIMD (SIMD_FMA3)
94+ #define HAS_FMA4 HAS_SIMD (SIMD_FMA4)
95+ #define HAS_AVX HAS_SIMD (SIMD_AVX)
96+ #define HAS_AVX2 HAS_SIMD (SIMD_AVX2)
97+ #define HAS_AVX512 HAS_SIMD (SIMD_AVX512)
98+ // clang-format on
7099
71100} // namespace paddle
0 commit comments