Skip to content

Commit 6279806

Browse files
committed
Refactor out ToBlackWhite routines.
1 parent 7907968 commit 6279806

19 files changed

+842
-139
lines changed

Common/Cpp/PixelRGB32.h

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/* Pixel: RGB32
2+
*
3+
* From: https://github.com/PokemonAutomation/
4+
*
5+
* Perform a filter over an image and replace pixels that match the filter.
6+
*
7+
*/
8+
9+
#ifndef PokemonAutomation_Pixel_RGB32_H
10+
#define PokemonAutomation_Pixel_RGB32_H
11+
12+
#include <stdint.h>
13+
14+
namespace PokemonAutomation{
15+
16+
17+
18+
19+
20+
union PixelRGB32{
21+
struct{
22+
uint8_t blue;
23+
uint8_t green;
24+
uint8_t red;
25+
uint8_t alpha;
26+
} parts;
27+
uint32_t u32;
28+
29+
PixelRGB32(uint32_t value)
30+
: u32(value)
31+
{}
32+
PixelRGB32(uint8_t red, uint8_t green, uint8_t blue){
33+
parts.alpha = 0xff;
34+
parts.red = red;
35+
parts.green = green;
36+
parts.blue = blue;
37+
}
38+
PixelRGB32(uint8_t alpha, uint8_t red, uint8_t green, uint8_t blue){
39+
parts.alpha = alpha;
40+
parts.red = red;
41+
parts.green = green;
42+
parts.blue = blue;
43+
}
44+
};
45+
46+
47+
48+
49+
50+
}
51+
#endif

SerialPrograms/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,7 @@ file(GLOB MAIN_SOURCES
762762
Source/Kernels/ImageFilters/Kernels_ImageFilter_Basic.h
763763
Source/Kernels/ImageFilters/Kernels_ImageFilter_Basic_Default.cpp
764764
Source/Kernels/ImageFilters/Kernels_ImageFilter_Basic_Routines.h
765+
Source/Kernels/ImageFilters/Kernels_ImageFilter_Basic_Routines_ARM64_NEON.h
765766
Source/Kernels/ImageFilters/Kernels_ImageFilter_Basic_Routines_Default.h
766767
Source/Kernels/ImageFilters/Kernels_ImageFilter_Basic_Routines_x64_AVX2.h
767768
Source/Kernels/ImageFilters/Kernels_ImageFilter_Basic_Routines_x64_AVX512.h

SerialPrograms/SerialPrograms.pro

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1587,6 +1587,7 @@ HEADERS += \
15871587
Source/Kernels/BinaryMatrix/Kernels_SparseBinaryMatrixCore.h \
15881588
Source/Kernels/BinaryMatrix/Kernels_SparseBinaryMatrixCore.tpp \
15891589
Source/Kernels/ImageFilters/Kernels_ImageFilter_Basic.h \
1590+
Source/Kernels/ImageFilters/Kernels_ImageFilter_Basic_Routines_ARM64_NEON.h \
15901591
Source/Kernels/ImageFilters/Kernels_ImageFilter_Basic_Routines_Default.h \
15911592
Source/Kernels/ImageFilters/Kernels_ImageFilter_Basic_Routines_x64_AVX2.h \
15921593
Source/Kernels/ImageFilters/Kernels_ImageFilter_Basic_Routines_x64_AVX512.h \
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/* Image Filters Basic Routines
2+
*
3+
* From: https://github.com/PokemonAutomation/
4+
*
5+
*/
6+
7+
#include <stdint.h>
8+
#include "Common/Compiler.h"
9+
#include "Kernels/PartialWordAccess/Kernels_PartialWordAccess_arm64_NEON.h"
10+
#include "Kernels/Kernels_arm64_NEON.h"
11+
12+
namespace PokemonAutomation{
13+
namespace Kernels{
14+
15+
16+
17+
template <typename PixelTester>
18+
class ToBlackWhite_Rgb32_ARM64_NEON{
19+
public:
20+
static const size_t VECTOR_SIZE = 4;
21+
using Mask = size_t;
22+
23+
public:
24+
ToBlackWhite_Rgb32_ARM64_NEON(
25+
const PixelTester& tester,
26+
bool in_range_black
27+
)
28+
: m_tester(tester)
29+
, m_in_range_color_u32(vdupq_n_u32(in_range_black ? 0xFF000000 : 0xFFFFFFFF))
30+
, m_out_of_range_color_u32(vdupq_n_u32(in_range_black ? 0xFFFFFFFF : 0xFF000000))
31+
, m_count_u32(vdupq_n_u32(0))
32+
{}
33+
34+
PA_FORCE_INLINE size_t count() const{
35+
uint64x2_t sum_u64 = vpaddlq_u32(m_count_u32);
36+
return sum_u64[0] + sum_u64[1];
37+
}
38+
39+
// Given 4 pixels from in[4], apply color range comparison and count the pixels that are in range.
40+
// The counts are stored in m_count_u32.
41+
// If a per-pixel mask, cmp_mask_u32 is not nullptr, it only counts the pixels covered by the mask.
42+
// It also changes pixels into black or white depending on whether they are in range.
43+
// The resulting pixels are saved in out[4]
44+
PA_FORCE_INLINE void process_full(uint32_t* out, const uint32_t* in){
45+
uint32x4_t pixel = vld1q_u32(in);
46+
uint32x4_t cmp_u32 = process_word(pixel);
47+
if (cmp_mask_u32) {
48+
cmp_u32 = vandq_u32(cmp_u32, *cmp_mask_u32);
49+
}
50+
// Increase count for each pixel in range. Each uint32 lane is counted separately.
51+
// We achieve +=1 by substracting 0xFFFFFFFF
52+
m_count_u32 = vsubq_u32(m_count_u32, cmp_u32);
53+
// select replacement color or in_u8 based on cmp_u32:
54+
uint32x4_t out_u32;
55+
// vbslq_u32(a, b, c) for 1 bits in a, choose b; for 0 bits in a, choose c
56+
out_u32 = vbslq_u32(cmp_u32, m_in_range_color_u32, m_out_of_range_color_u32);
57+
58+
vst1q_u32(out, out_u32);
59+
}
60+
61+
// Same as `process_full()` but only process `left` (< 4) pixels
62+
PA_FORCE_INLINE void process_partial(uint32_t* out, const uint32_t* in, size_t left){
63+
uint32x4_t cmp_mask_u32 = vreinterpretq_u32_u8(PartialWordAccess_arm64_NEON::create_front_mask(left * 4));
64+
uint32_t buffer_in[4], buffer_out[4];
65+
memcpy(buffer_in, in, sizeof(uint32_t) * left);
66+
process_full(buffer_out, buffer_in, &cmp_mask_u32);
67+
memcpy(out, buffer_out, sizeof(uint32_t) * left);
68+
}
69+
70+
private:
71+
const PixelTester m_tester;
72+
uint32x4_t m_in_range_color_u32;
73+
uint32x4_t m_out_of_range_color_u32;
74+
uint32x4_t m_count_u32;
75+
};
76+
77+
78+
79+
80+
81+
}
82+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/* Image Filters Basic Routines
2+
*
3+
* From: https://github.com/PokemonAutomation/
4+
*
5+
*/
6+
7+
#include <stdint.h>
8+
#include "Common/Compiler.h"
9+
10+
namespace PokemonAutomation{
11+
namespace Kernels{
12+
13+
14+
15+
template <typename PixelTester>
16+
class ToBlackWhite_Rgb32_Default{
17+
public:
18+
static const size_t VECTOR_SIZE = 1;
19+
using Mask = size_t;
20+
21+
public:
22+
ToBlackWhite_Rgb32_Default(
23+
const PixelTester& tester,
24+
bool in_range_black
25+
)
26+
: m_tester(tester)
27+
, m_in_range_black(in_range_black ? 1 : 0)
28+
, m_count(0)
29+
{}
30+
31+
PA_FORCE_INLINE size_t count() const{
32+
return m_count;
33+
}
34+
35+
PA_FORCE_INLINE void process_full(uint32_t* out, const uint32_t* in){
36+
uint32_t pixel = in[0];
37+
bool passed = m_tester.test_word(pixel);
38+
m_count += passed;
39+
passed ^= m_in_range_black;
40+
out[0] = passed ? 0xffffffff : 0xff000000;
41+
}
42+
PA_FORCE_INLINE void process_partial(uint32_t* out, const uint32_t* in, size_t left){
43+
process_full(out, in);
44+
}
45+
46+
private:
47+
const PixelTester m_tester;
48+
const bool m_in_range_black;
49+
size_t m_count;
50+
};
51+
52+
53+
54+
55+
}
56+
}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/* Image Filters Basic Routines
2+
*
3+
* From: https://github.com/PokemonAutomation/
4+
*
5+
*/
6+
7+
#include <stdint.h>
8+
#include <immintrin.h>
9+
#include "Common/Compiler.h"
10+
#include "Kernels/Kernels_x64_AVX2.h"
11+
#include "Kernels/PartialWordAccess/Kernels_PartialWordAccess_x64_AVX2.h"
12+
13+
namespace PokemonAutomation{
14+
namespace Kernels{
15+
16+
17+
18+
19+
template <typename PixelTester>
20+
class ToBlackWhite_Rgb32_x64_AVX2{
21+
public:
22+
static const size_t VECTOR_SIZE = 8;
23+
using Mask = PartialWordAccess32_x64_AVX2;
24+
25+
public:
26+
ToBlackWhite_Rgb32_x64_AVX2(
27+
const PixelTester& tester,
28+
bool in_range_black
29+
)
30+
: m_tester(tester)
31+
, m_in_range_black(_mm256_set1_epi32(in_range_black ? -1 : 0))
32+
, m_count(_mm256_setzero_si256())
33+
{}
34+
35+
PA_FORCE_INLINE size_t count() const{
36+
return reduce_add32_x64_AVX2(m_count);
37+
}
38+
39+
PA_FORCE_INLINE void process_full(uint32_t* out, const uint32_t* in){
40+
__m256i pixel = _mm256_loadu_si256((const __m256i*)in);
41+
__m256i in_range_pixels = process_word(pixel);
42+
m_count = _mm256_sub_epi32(m_count, in_range_pixels);
43+
_mm256_storeu_si256((__m256i*)out, pixel);
44+
}
45+
PA_FORCE_INLINE void process_partial(uint32_t* out, const uint32_t* in, const Mask& mask){
46+
__m256i pixel = mask.load_i32(in);
47+
__m256i in_range_pixels = process_word(pixel);
48+
in_range_pixels = _mm256_and_si256(in_range_pixels, mask.mask());
49+
m_count = _mm256_sub_epi32(m_count, in_range_pixels);
50+
mask.store(out, pixel);
51+
}
52+
53+
private:
54+
// Process the pixel in-place.
55+
// Return a mask indicating which lanes are in range.
56+
PA_FORCE_INLINE __m256i process_word(__m256i& pixel) const{
57+
__m256i mask = m_tester.test_word(pixel);
58+
pixel = _mm256_or_si256(
59+
_mm256_xor_si256(mask, m_in_range_black),
60+
_mm256_set1_epi32(0xff000000)
61+
);
62+
return mask;
63+
}
64+
65+
private:
66+
const PixelTester m_tester;
67+
const __m256i m_in_range_black;
68+
__m256i m_count;
69+
};
70+
71+
72+
73+
74+
}
75+
}
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/* Image Filters Basic Routines
2+
*
3+
* From: https://github.com/PokemonAutomation/
4+
*
5+
*/
6+
7+
#include <stdint.h>
8+
#include <immintrin.h>
9+
#include "Common/Compiler.h"
10+
11+
namespace PokemonAutomation{
12+
namespace Kernels{
13+
14+
15+
16+
struct PartialWordMask{
17+
__mmask16 m;
18+
19+
PA_FORCE_INLINE PartialWordMask(size_t left)
20+
: m(((__mmask16)1 << left) - 1)
21+
{}
22+
};
23+
24+
25+
26+
27+
template <typename PixelTester>
28+
class ToBlackWhite_Rgb32_x64_AVX512{
29+
public:
30+
static const size_t VECTOR_SIZE = 16;
31+
using Mask = PartialWordMask;
32+
33+
public:
34+
ToBlackWhite_Rgb32_x64_AVX512(
35+
const PixelTester& tester,
36+
bool in_range_black
37+
)
38+
: m_tester(tester)
39+
, m_in_range_color(_mm512_set1_epi32(in_range_black ? 0xff000000 : 0xffffffff))
40+
, m_out_range_color(_mm512_set1_epi32(in_range_black ? 0xffffffff : 0xff000000))
41+
, m_count(_mm512_setzero_si512())
42+
{}
43+
44+
PA_FORCE_INLINE size_t count() const{
45+
return _mm512_reduce_add_epi32(m_count);
46+
}
47+
48+
PA_FORCE_INLINE void process_full(uint32_t* out, const uint32_t* in){
49+
__m512i pixel = _mm512_loadu_si512((const __m512i*)in);
50+
__mmask16 in_range_pixels = process_word(pixel);
51+
m_count = _mm512_mask_sub_epi32(m_count, in_range_pixels, m_count, _mm512_set1_epi32(-1));
52+
_mm512_storeu_si512((__m512i*)out, pixel);
53+
}
54+
PA_FORCE_INLINE void process_partial(uint32_t* out, const uint32_t* in, const Mask& mask){
55+
__m512i pixel = _mm512_maskz_loadu_epi32(mask.m, in);
56+
__mmask16 in_range_pixels = process_word(pixel);
57+
in_range_pixels &= mask.m;
58+
m_count = _mm512_mask_sub_epi32(m_count, in_range_pixels, m_count, _mm512_set1_epi32(-1));
59+
_mm512_mask_storeu_epi32(out, mask.m, pixel);
60+
}
61+
62+
private:
63+
// Process the pixel in-place.
64+
// Return a mask indicating which lanes are in range.
65+
PA_FORCE_INLINE __mmask16 process_word(__m512i& pixel) const{
66+
__mmask16 mask = m_tester.test_word(pixel);
67+
pixel = _mm512_mask_blend_epi32(
68+
mask,
69+
m_out_range_color,
70+
m_in_range_color
71+
);
72+
return mask;
73+
}
74+
75+
private:
76+
const PixelTester m_tester;
77+
const __m512i m_in_range_color;
78+
const __m512i m_out_range_color;
79+
__m512i m_count;
80+
};
81+
82+
83+
84+
85+
86+
}
87+
}

0 commit comments

Comments
 (0)