Skip to content

Commit f540a60

Browse files
committed
AVX512-VNNI implementation of brightness filter.
1 parent 75d2b02 commit f540a60

File tree

4 files changed

+151
-1
lines changed

4 files changed

+151
-1
lines changed

SerialPrograms/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -769,6 +769,7 @@ file(GLOB MAIN_SOURCES
769769
Source/Kernels/ImageFilters/RGB32_Brightness/Kernels_ImageFilter_RGB32_Brightness.cpp
770770
Source/Kernels/ImageFilters/RGB32_Brightness/Kernels_ImageFilter_RGB32_Brightness.h
771771
Source/Kernels/ImageFilters/RGB32_Brightness/Kernels_ImageFilter_RGB32_Brightness_Default.cpp
772+
Source/Kernels/ImageFilters/RGB32_Brightness/Kernels_ImageFilter_RGB32_Brightness_x64_AVX512-VNNI.cpp
772773
Source/Kernels/ImageFilters/RGB32_EuclideanDistance/Kernels_ImageFilter_RGB32_Euclidean.cpp
773774
Source/Kernels/ImageFilters/RGB32_EuclideanDistance/Kernels_ImageFilter_RGB32_Euclidean.h
774775
Source/Kernels/ImageFilters/RGB32_EuclideanDistance/Kernels_ImageFilter_RGB32_Euclidean_ARM64_NEON.cpp
@@ -2483,6 +2484,7 @@ SET_SOURCE_FILES_PROPERTIES(
24832484
endif()
24842485
if (ARCH_FLAGS_19_IceLake)
24852486
SET_SOURCE_FILES_PROPERTIES(
2487+
Source/Kernels/ImageFilters/RGB32_Brightness/Kernels_ImageFilter_RGB32_Brightness_x64_AVX512-VNNI.cpp
24862488
Source/Kernels/Waterfill/Kernels_Waterfill_Core_64x32_x64_AVX512-GF.cpp
24872489
Source/Kernels/Waterfill/Kernels_Waterfill_Core_64x64_x64_AVX512-GF.cpp
24882490
PROPERTIES COMPILE_FLAGS ${ARCH_FLAGS_19_IceLake}

SerialPrograms/SerialPrograms.pro

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,7 @@ SOURCES += \
376376
Source/Kernels/ImageFilters/Kernels_ImageFilter_Green_Default.cpp \
377377
Source/Kernels/ImageFilters/RGB32_Brightness/Kernels_ImageFilter_RGB32_Brightness.cpp \
378378
Source/Kernels/ImageFilters/RGB32_Brightness/Kernels_ImageFilter_RGB32_Brightness_Default.cpp \
379+
Source/Kernels/ImageFilters/RGB32_Brightness/Kernels_ImageFilter_RGB32_Brightness_x64_AVX512-VNNI.cpp \
379380
Source/Kernels/ImageFilters/RGB32_EuclideanDistance/Kernels_ImageFilter_RGB32_Euclidean.cpp \
380381
Source/Kernels/ImageFilters/RGB32_EuclideanDistance/Kernels_ImageFilter_RGB32_Euclidean_ARM64_NEON.cpp \
381382
Source/Kernels/ImageFilters/RGB32_EuclideanDistance/Kernels_ImageFilter_RGB32_Euclidean_Default.cpp \

SerialPrograms/Source/Kernels/ImageFilters/RGB32_Brightness/Kernels_ImageFilter_RGB32_Brightness.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,12 @@ size_t filter_rgb32_brightness(
3939

4040

4141

42+
size_t to_blackwhite_rgb32_brightness_x64_AVX512VNNI(
43+
const uint32_t* in, size_t in_bytes_per_row, size_t width, size_t height,
44+
uint32_t* out, size_t out_bytes_per_row,
45+
bool in_range_black,
46+
uint32_t min_brightness, uint32_t max_brightness
47+
);
4248
size_t to_blackwhite_rgb32_brightness_Default(
4349
const uint32_t* in, size_t in_bytes_per_row, size_t width, size_t height,
4450
uint32_t* out, size_t out_bytes_per_row,
@@ -54,7 +60,16 @@ size_t to_blackwhite_rgb32_brightness(
5460
if (width * height > 0xffffffff){
5561
throw InternalProgramError(nullptr, PA_CURRENT_FUNCTION, "Image is too large. more than 2^32 pixels.");
5662
}
57-
63+
#ifdef PA_AutoDispatch_x64_19_IceLake
64+
if (CPU_CAPABILITY_CURRENT.OK_19_IceLake){
65+
return to_blackwhite_rgb32_brightness_x64_AVX512VNNI(
66+
in, in_bytes_per_row, width, height,
67+
out, out_bytes_per_row,
68+
in_range_black,
69+
min_brightness, max_brightness
70+
);
71+
}
72+
#endif
5873
return to_blackwhite_rgb32_brightness_Default(
5974
in, in_bytes_per_row, width, height,
6075
out, out_bytes_per_row,
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
/* Image Filters RGB32 Brightness
2+
*
3+
* From: https://github.com/PokemonAutomation/
4+
*
5+
*/
6+
7+
#ifdef PA_AutoDispatch_x64_19_IceLake
8+
9+
#include <immintrin.h>
10+
#include "Kernels/ImageFilters/Kernels_ImageFilter_Basic_Routines.h"
11+
#include "Kernels_ImageFilter_RGB32_Brightness.h"
12+
13+
//#include <iostream>
14+
//using std::cout;
15+
//using std::endl;
16+
17+
namespace PokemonAutomation{
18+
namespace Kernels{
19+
20+
21+
namespace{
22+
23+
struct PartialWordMask{
24+
__mmask16 m;
25+
26+
PA_FORCE_INLINE PartialWordMask(size_t left)
27+
: m(((__mmask16)1 << left) - 1)
28+
{}
29+
};
30+
31+
}
32+
33+
34+
35+
36+
class ToBlackWhite_RgbBrightness_x64_AVX512{
37+
public:
38+
static const size_t VECTOR_SIZE = 16;
39+
using Mask = PartialWordMask;
40+
41+
public:
42+
ToBlackWhite_RgbBrightness_x64_AVX512(
43+
bool in_range_black,
44+
uint32_t min_brightness, uint32_t max_brightness
45+
)
46+
: m_in_range_black(in_range_black ? 0xffff : 0)
47+
, m_min_brightness(_mm512_set1_epi32(min_brightness))
48+
, m_threshold(_mm512_set1_epi32(max_brightness - min_brightness))
49+
, m_count(_mm512_setzero_si512())
50+
{}
51+
52+
PA_FORCE_INLINE size_t count() const{
53+
return _mm512_reduce_add_epi32(m_count);
54+
}
55+
56+
PA_FORCE_INLINE void process_full(uint32_t* out, const uint32_t* in){
57+
__m512i pixel = _mm512_loadu_si512((const __m512i*)in);
58+
__mmask16 in_range_pixels = process_word(pixel);
59+
m_count = _mm512_mask_sub_epi32(m_count, in_range_pixels, m_count, _mm512_set1_epi32(-1));
60+
_mm512_storeu_si512((__m512i*)out, pixel);
61+
}
62+
PA_FORCE_INLINE void process_partial(uint32_t* out, const uint32_t* in, const Mask& mask){
63+
__m512i pixel = _mm512_maskz_loadu_epi32(mask.m, in);
64+
__mmask16 in_range_pixels = process_word(pixel);
65+
in_range_pixels &= mask.m;
66+
m_count = _mm512_mask_sub_epi32(m_count, in_range_pixels, m_count, _mm512_set1_epi32(-1));
67+
_mm512_mask_storeu_epi32(out, mask.m, pixel);
68+
}
69+
70+
private:
71+
PA_FORCE_INLINE __mmask16 process_word(__m512i& pixel){
72+
// cout << "asdf" << endl;
73+
// __m512i r = _mm512_and_si512(_mm512_srli_epi32(pixel, 16), _mm512_set1_epi32(0x000000ff));
74+
// __m512i g = _mm512_and_si512(_mm512_srli_epi32(pixel, 8), _mm512_set1_epi32(0x000000ff));
75+
// __m512i b = _mm512_and_si512(pixel, _mm512_set1_epi32(0x000000ff));
76+
77+
// Remove the alpha channel.
78+
pixel = _mm512_and_si512(pixel, _mm512_set1_epi32(0x00ffffff));
79+
80+
// Horizontally sum up 4 x 8-bit integers in each 32-bit pixel.
81+
pixel = _mm512_dpbusd_epi32(_mm512_setzero_si512(), pixel, _mm512_set1_epi8(1));
82+
83+
// Find the ones that are in range.
84+
pixel = _mm512_sub_epi32(pixel, m_min_brightness);
85+
__mmask16 cmp16 = _mm512_cmple_epu32_mask(pixel, m_threshold);
86+
87+
// Set to black or white.
88+
pixel = _mm512_mask_blend_epi32(cmp16 ^ m_in_range_black, _mm512_set1_epi32(0xff000000), _mm512_set1_epi32(-1));
89+
90+
return cmp16;
91+
}
92+
93+
private:
94+
const __mmask16 m_in_range_black;
95+
const __m512i m_min_brightness;
96+
const __m512i m_threshold;
97+
__m512i m_count;
98+
};
99+
size_t to_blackwhite_rgb32_brightness_x64_AVX512VNNI(
100+
const uint32_t* in, size_t in_bytes_per_row, size_t width, size_t height,
101+
uint32_t* out, size_t out_bytes_per_row,
102+
bool in_range_black,
103+
uint32_t min_brightness, uint32_t max_brightness
104+
){
105+
ToBlackWhite_RgbBrightness_x64_AVX512 filter(in_range_black, min_brightness, max_brightness);
106+
filter_per_pixel(in, in_bytes_per_row, width, height, filter, out, out_bytes_per_row);
107+
return filter.count();
108+
}
109+
110+
111+
112+
113+
114+
115+
116+
117+
118+
119+
120+
121+
122+
123+
124+
125+
126+
127+
128+
129+
130+
}
131+
}
132+
#endif

0 commit comments

Comments
 (0)