Skip to content

Commit 163aa8f

Browse files
author
Gin
committed
fix neon bug
1 parent 67a0212 commit 163aa8f

File tree

3 files changed

+13
-10
lines changed

3 files changed

+13
-10
lines changed

SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters_Core_64x8_arm64_NEON.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "Kernels_BinaryImage_BasicFilters_Routines.h"
1111
#include "Kernels_BinaryImage_BasicFilters_arm64_NEON.h"
1212

13+
1314
namespace PokemonAutomation{
1415
namespace Kernels{
1516

@@ -24,6 +25,8 @@ void filter_by_mask_64x8_arm64_NEON(
2425
}
2526

2627

28+
29+
2730
void compress_rgb32_to_binary_range_64x8_arm64_NEON(
2831
const uint32_t* image, size_t bytes_per_row,
2932
PackedBinaryMatrix_IB& matrix0, uint32_t mins0, uint32_t maxs0

SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters_arm64_NEON.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99

1010
#include "Kernels/PartialWordAccess/Kernels_PartialWordAccess_arm64_NEON.h"
1111

12-
#include <iostream>
13-
using std::cout;
14-
using std::endl;
12+
// #include <iostream>
13+
// using std::cout;
14+
// using std::endl;
1515

1616
namespace PokemonAutomation{
1717
namespace Kernels{
@@ -149,8 +149,6 @@ class FilterByMask_arm64_NEON{
149149
const uint32x4_t m_zeros;
150150
};
151151

152-
153-
154152
// Compress given pixels buffer (of up to 64-pixel long) into bit map and store in one uint64_t.
155153
class Compressor_RgbRange_arm64_NEON{
156154
public:
@@ -178,13 +176,14 @@ class Compressor_RgbRange_arm64_NEON{
178176
bits |= convert16(pixels + c) << c;
179177
c += 16;
180178
}
181-
for(; c < count; c += 4){
179+
180+
count %= 16;
181+
for(size_t i = 0; i < count / 4; i++, c+=4){
182182
const uint8x16_t pixel = vld1q_u8((const uint8_t*)(pixels + c));
183183
bits |= convert4(pixel) << c;
184184
}
185185
count %= 4;
186186
if (count){
187-
c -= 4;
188187
PartialWordAccess_arm64_NEON loader(count * sizeof(uint32_t));
189188
const uint8x16_t pixel = loader.load(pixels + c);
190189
const uint64_t mask = ((uint64_t)1 << count) - 1;
@@ -273,6 +272,7 @@ class Compressor_RgbEuclidean_arm64_NEON{
273272
for(size_t i = 0; i < count / 4; i++, c+=4){
274273
bits |= convert4(pixels + c) << c;
275274
}
275+
count %= 4;
276276
if (count){
277277
PartialWordAccess_arm64_NEON loader(count * sizeof(uint32_t));
278278
const uint8x16_t pixel = loader.load(pixels + c);

SerialPrograms/Source/Kernels/Waterfill/Kernels_Waterfill_Session.tpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
#include "Kernels/BinaryMatrix/Kernels_SparseBinaryMatrixCore.h"
1717
#include "Kernels_Waterfill_Session.h"
1818

19-
//#include <iostream>
20-
//using std::cout;
21-
//using std::endl;
19+
// #include <iostream>
20+
// using std::cout;
21+
// using std::endl;
2222

2323
namespace PokemonAutomation{
2424
namespace Kernels{

0 commit comments

Comments
 (0)