Skip to content

Commit 604b8cc

Browse files
author
Gin
committed
fixing ocr number multifilter
1 parent 7c56d5a commit 604b8cc

File tree

4 files changed

+81
-63
lines changed

4 files changed

+81
-63
lines changed

SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp

Lines changed: 48 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -101,43 +101,21 @@ int read_number(Logger& logger, const ImageViewRGB32& image, Language language){
101101
return number;
102102
}
103103

104-
int read_number_waterfill(
105-
Logger& logger, const ImageViewRGB32& image,
106-
uint32_t rgb32_min, uint32_t rgb32_max,
107-
bool text_inside_range,
108-
int8_t line_index
109-
){
110-
std::string ocr_text = read_number_waterfill_no_normalization(
111-
logger,
112-
image,
113-
rgb32_min, rgb32_max,
114-
text_inside_range
115-
);
116-
117-
std::string normalized = run_number_normalization(ocr_text);
118-
119-
std::string line_index_str = "";
120-
if (line_index != -1){
121-
line_index_str = "Line " + std::to_string(line_index) + ": ";
122-
}
123-
if (normalized.empty()){
124-
logger.log(line_index_str + "OCR Text: \"" + ocr_text + "\" -> \"" + normalized + "\" -> Unable to read.", COLOR_RED);
125-
return -1;
126-
}
127-
128-
int number = std::atoi(normalized.c_str());
129-
logger.log(line_index_str + "OCR Text: \"" + ocr_text + "\" -> \"" + normalized + "\" -> " + std::to_string(number));
130-
131-
return number;
132-
}
133-
134104

105+
// Run OCR on each individual character in the string of numbers.
106+
// Return empty string if OCR fails.
107+
//
108+
// text_inside_range: binary filter is applied to the image so that any pixels within the color range will be turned black, and everything else will be white
109+
// width_max: return empty string if any character's width is greater than width_max (likely means that two characters are touching, and so are treated as one large character)
110+
// min_digit_area: if a character has area (aka pixel count) smaller than this value (likely noise or punctuations), skip this character
111+
// check_empty_string: if set to true, return empty string (and stop evaluation) if any character returns an empty string from OCR
135112
std::string read_number_waterfill_no_normalization(
136113
Logger& logger, const ImageViewRGB32& image,
137114
uint32_t rgb32_min, uint32_t rgb32_max,
138-
bool text_inside_range,
139-
size_t width_max,
140-
bool check_empty_string
115+
bool text_inside_range = true,
116+
size_t width_max = (size_t)-1,
117+
size_t min_digit_area = 20,
118+
bool check_empty_string = false
141119
){
142120
using namespace Kernels::Waterfill;
143121

@@ -159,7 +137,7 @@ std::string read_number_waterfill_no_normalization(
159137
std::map<size_t, WaterfillObject> map;
160138
{
161139
std::unique_ptr<WaterfillSession> session = make_WaterfillSession(matrix);
162-
auto iter = session->make_iterator(20);
140+
auto iter = session->make_iterator(min_digit_area);
163141
WaterfillObject object;
164142
while (map.size() < 16 && iter->find_next(object, true)){
165143
if (object.width() > width_max){
@@ -205,12 +183,45 @@ bool is_digits(const std::string &str)
205183
return std::all_of(str.begin(), str.end(), ::isdigit);
206184
}
207185

186+
187+
int read_number_waterfill(
188+
Logger& logger, const ImageViewRGB32& image,
189+
uint32_t rgb32_min, uint32_t rgb32_max,
190+
bool text_inside_range,
191+
int8_t line_index
192+
){
193+
std::string ocr_text = read_number_waterfill_no_normalization(
194+
logger,
195+
image,
196+
rgb32_min, rgb32_max,
197+
text_inside_range
198+
);
199+
200+
std::string normalized = run_number_normalization(ocr_text);
201+
202+
std::string line_index_str = "";
203+
if (line_index != -1){
204+
line_index_str = "Line " + std::to_string(line_index) + ": ";
205+
}
206+
if (normalized.empty()){
207+
logger.log(line_index_str + "OCR Text: \"" + ocr_text + "\" -> \"" + normalized + "\" -> Unable to read.", COLOR_RED);
208+
return -1;
209+
}
210+
211+
int number = std::atoi(normalized.c_str());
212+
logger.log(line_index_str + "OCR Text: \"" + ocr_text + "\" -> \"" + normalized + "\" -> " + std::to_string(number));
213+
214+
return number;
215+
}
216+
217+
208218
int read_number_waterfill_multifilter(
209219
Logger& logger, const ImageViewRGB32& image,
210220
std::vector<std::pair<uint32_t, uint32_t>> filters,
211-
size_t width_max,
212221
bool text_inside_range,
213222
bool prioritize_numeric_only_results,
223+
size_t width_max,
224+
size_t min_digit_area,
214225
int8_t line_index
215226
){
216227
std::string line_index_str = "";
@@ -226,13 +237,14 @@ int read_number_waterfill_multifilter(
226237

227238
uint32_t rgb32_min = filter.first;
228239
uint32_t rgb32_max = filter.second;
229-
bool check_empty_string = false;
240+
bool check_empty_string = true;
230241
std::string ocr_text = read_number_waterfill_no_normalization(
231242
logger,
232243
image,
233244
rgb32_min, rgb32_max,
234245
text_inside_range,
235246
width_max,
247+
min_digit_area,
236248
check_empty_string
237249
);
238250

SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -36,35 +36,25 @@ int read_number_waterfill(
3636
int8_t line_index = -1
3737
);
3838

39-
// run OCR on each individual character in the string of numbers.
40-
// return empty string if OCR fails
41-
//
42-
// text_inside_range: binary filter is applied to the image so that any pixels within the color range will be turned black, and everything else will be white
43-
// width_max: return empty string if any character's width is greater than width_max (likely means that two characters are touching, and so are treated as one large character)
44-
// check_empty_string: if set to true, return empty string (and stop evaluation) if any character returns an empty string from OCR
45-
std::string read_number_waterfill_no_normalization(
46-
Logger& logger, const ImageViewRGB32& image,
47-
uint32_t rgb32_min, uint32_t rgb32_max,
48-
bool text_inside_range = true,
49-
size_t width_max = (size_t)-1,
50-
bool check_empty_string = false
51-
);
5239

5340
// Try OCR with all the given color filters. still running OCR on each individual character
5441
// Return the best majority candidate. return -1 if failed to read.
42+
// width_max: if a character width is greater than this value (likely means two characters are touching, will cause bad read), skip this filter.
43+
// min_digit_area: if a character has area (aka pixel count) smaller than this value (likely noise or punctuations), skip this character
5544
//
5645
// prioritize_numeric_only_results:
5746
// - if true: if OCR reads only numeric characters, the candidate gets 2 votes. If OCR reads non-numeric characters, the candidate gets only 1 vote.
5847
// - if false: all reads only get 1 vote
5948
//
60-
// line_index: specifies the current number's row. for logging purposes, when multithreaded.
49+
// log_line_index: adds an index prefix to the logging lines for logging purposes when calling this function in parallel.
6150
int read_number_waterfill_multifilter(
6251
Logger& logger, const ImageViewRGB32& image,
6352
std::vector<std::pair<uint32_t, uint32_t>> filters,
64-
size_t width_max = (size_t)-1,
6553
bool text_inside_range = true,
6654
bool prioritize_numeric_only_results = true,
67-
int8_t line_index = -1
55+
size_t width_max = (size_t)-1,
56+
size_t min_digit_area = 20,
57+
int8_t log_line_index = -1
6858
);
6959

7060

SerialPrograms/Source/PokemonLZA/Inference/Boxes/PokemonLZA_BoxInfoDetector.cpp

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -205,19 +205,32 @@ void BoxDexNumberDetector::make_overlays(VideoOverlaySet& items) const{
205205
bool BoxDexNumberDetector::detect(const ImageViewRGB32& screen){
206206
const size_t max_dex_number = std::max(LUMIOSE_DEX_SLUGS().size(), HYPERSPACE_DEX_SLUGS().size());
207207

208-
const ImageViewRGB32 dex_image_crop = extract_box_reference(screen, m_dex_number_box);
209208
// const bool in_range_black = false;
210209
// const ImageRGB32 black_white_dex_image_crop = to_blackwhite_rgb32_range(dex_image_crop, in_range_black, 0xff808080, 0xffffffff);
211210
// black_white_dex_image_crop.save("blackwhite_number.png");
212211
// const int dex_number = OCR::read_number(m_logger, black_white_dex_image_crop);
213212
// const int dex_number = OCR::read_number_waterfill(m_logger, dex_image_crop, 0xff808080, 0xffffffff, false);
214-
const int dex_number = OCR::read_number_waterfill_multifilter(m_logger, dex_image_crop, {
215-
{0x0, 0xff202020},
216-
{0x0, 0xff404040},
217-
{0x0, 0xff606060},
218-
{0x0, 0xff808080},
219-
{0x0, 0xffA0A0A0},
220-
});
213+
214+
const int dex_number = [&](){
215+
const ImageViewRGB32 dex_image_crop = extract_box_reference(screen, m_dex_number_box);
216+
const bool text_inside_range = true;
217+
const bool prioritize_numeric_only_results = true;
218+
const size_t width_max = SIZE_MAX;
219+
// To accomodate the dex number "No. xxx" for all language, we have to make the dex number crop to cover the "dot" character
220+
// for some languages. We have to use `min_digit_area` to filter out the dot when doing OCR.
221+
// The min digit area computation is that any dot with size smaller than image_crop.height()/5 is filtered out when OCR.
222+
const size_t min_digit_area = dex_image_crop.height()*dex_image_crop.height() / 25;
223+
return OCR::read_number_waterfill_multifilter(m_logger, dex_image_crop,
224+
{
225+
{0x0, 0xff202020},
226+
{0x0, 0xff404040},
227+
{0x0, 0xff606060},
228+
{0x0, 0xff808080},
229+
{0x0, 0xffA0A0A0},
230+
},
231+
text_inside_range, prioritize_numeric_only_results, width_max, min_digit_area
232+
);
233+
}();
221234
if (dex_number <= 0 || dex_number > static_cast<int>(max_dex_number)) {
222235
m_dex_number = 0;
223236
m_dex_number_when_error = dex_number;

SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,13 +136,16 @@ int16_t ItemPrinterMaterialDetector::read_number(
136136
}
137137
}();
138138

139-
size_t max_width = (size_t)((double)24 * screen.width() / 1080);
140-
139+
const bool text_inside_range = true;
140+
const bool prioritize_numeric_only_results = true;
141+
const size_t max_width = (size_t)((double)24 * screen.width() / 1080);
142+
const size_t min_digit_area = 20;
141143
int16_t number = (int16_t)OCR::read_number_waterfill_multifilter(
142144
logger,
143145
cropped, filters,
146+
text_inside_range, prioritize_numeric_only_results,
144147
max_width,
145-
true, true,
148+
min_digit_area,
146149
row_index
147150
);
148151

0 commit comments

Comments
 (0)