From a1763495d3821ce7103bf6cc7d994d1ebd86af0f Mon Sep 17 00:00:00 2001 From: jw098 Date: Sat, 29 Mar 2025 00:23:09 -0700 Subject: [PATCH 1/8] read_number_waterfill() with multiple filters --- .../CommonTools/OCR/OCR_NumberReader.cpp | 82 ++++++++++++++++++- .../Source/CommonTools/OCR/OCR_NumberReader.h | 10 +++ 2 files changed, 89 insertions(+), 3 deletions(-) diff --git a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp index ab5ac2b3fc..2250055451 100644 --- a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp +++ b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp @@ -17,9 +17,9 @@ #include "OCR_RawOCR.h" #include "OCR_NumberReader.h" -// #include -// using std::cout; -// using std::endl; +#include +using std::cout; +using std::endl; namespace PokemonAutomation{ namespace OCR{ @@ -152,6 +152,82 @@ int read_number_waterfill( return number; } +int read_number_waterfill( + Logger& logger, const ImageViewRGB32& image, + std::vector> filters, + uint32_t width_max, + bool text_inside_range +){ + using namespace Kernels::Waterfill; + + + + for (std::pair filter : filters){ + + uint32_t rgb32_min = filter.first; + uint32_t rgb32_max = filter.second; + + // Direct OCR is unreliable. Instead, we will waterfill each character + // to isolate them, then OCR them individually. + + ImageRGB32 filtered = to_blackwhite_rgb32_range(image, rgb32_min, rgb32_max, text_inside_range); + + // static int c = 0; + // static int i = 0; + // filtered.save("test-" + std::to_string(c++) + ".png"); + + PackedBinaryMatrix matrix = compress_rgb32_to_binary_range(filtered, 0xff000000, 0xff7f7f7f); + + std::map map; + { + std::unique_ptr session = make_WaterfillSession(matrix); + auto iter = session->make_iterator(20); + WaterfillObject object; + bool exceed_width_max = false; + while (map.size() < 16 && iter->find_next(object, true)){ + if (object.width() > width_max){ + exceed_width_max = true; + break; + } + map.emplace(object.min_x, std::move(object)); + } + if (exceed_width_max){ + // try the next color filter + continue; + } + } + + std::string ocr_text; + for (const auto& item : map){ + const WaterfillObject& object = item.second; + ImageRGB32 cropped = extract_box_reference(filtered, object).copy(); + PackedBinaryMatrix tmp(object.packed_matrix()); + filter_by_mask(tmp, cropped, Color(0xffffffff), true); + ImageRGB32 padded = pad_image(cropped, cropped.width(), 0xffffffff); + std::string ocr = OCR::ocr_read(Language::English, padded); + // padded.save("test-cropped" + std::to_string(c) + "-" + std::to_string(i++) + ".png"); + // std::cout << ocr << std::endl; + if (!ocr.empty()){ + ocr_text += ocr[0]; + } + } + + std::string normalized = run_number_normalization(ocr_text); + + if (normalized.empty()){ + logger.log("OCR Text: \"" + ocr_text + "\" -> \"" + normalized + "\" -> Unable to read.", COLOR_RED); + return -1; + } + + int number = std::atoi(normalized.c_str()); + logger.log("OCR Text: \"" + ocr_text + "\" -> \"" + normalized + "\" -> " + std::to_string(number)); + + return number; + } + + return -1; +} + diff --git a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h index 99093a0863..1580dd5db8 100644 --- a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h +++ b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h @@ -8,6 +8,7 @@ #define PokemonAutomation_CommonTools_OCR_NumberReader_H #include +#include #include "CommonFramework/Language.h" namespace PokemonAutomation{ @@ -32,6 +33,15 @@ int read_number_waterfill( bool text_inside_range = true ); +// applies color filters on the text, until each individual waterfilled character/number is less than width_max. then apply OCR. +// this solves the problem where characters are too close to each other and touch, causing more than one character to be OCR'ed +// the filters should be arranged in order of preference. (probably should be arranged broadest to narrowest) +int read_number_waterfill( + Logger& logger, const ImageViewRGB32& image, + std::vector> filters, + uint32_t width_max, + bool text_inside_range = true + ); } From fa2b35947607be59375e50f988b479d375f1ed61 Mon Sep 17 00:00:00 2001 From: jw098 Date: Sat, 29 Mar 2025 01:13:47 -0700 Subject: [PATCH 2/8] read_number_waterfill: try different filter if OCR results in empty string --- .../Source/CommonTools/OCR/OCR_NumberReader.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp index 2250055451..74c8c77be0 100644 --- a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp +++ b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp @@ -39,6 +39,7 @@ std::string run_number_normalization(const std::string& input){ {'9', '9'}, // Common misreads. + {'|', '1'}, {']', '1'}, {'l', '1'}, {'i', '1'}, @@ -173,7 +174,7 @@ int read_number_waterfill( ImageRGB32 filtered = to_blackwhite_rgb32_range(image, rgb32_min, rgb32_max, text_inside_range); // static int c = 0; - // static int i = 0; + // int i = 0; // filtered.save("test-" + std::to_string(c++) + ".png"); PackedBinaryMatrix matrix = compress_rgb32_to_binary_range(filtered, 0xff000000, 0xff7f7f7f); @@ -198,6 +199,7 @@ int read_number_waterfill( } std::string ocr_text; + bool empty_char = false; for (const auto& item : map){ const WaterfillObject& object = item.second; ImageRGB32 cropped = extract_box_reference(filtered, object).copy(); @@ -206,11 +208,18 @@ int read_number_waterfill( ImageRGB32 padded = pad_image(cropped, cropped.width(), 0xffffffff); std::string ocr = OCR::ocr_read(Language::English, padded); // padded.save("test-cropped" + std::to_string(c) + "-" + std::to_string(i++) + ".png"); - // std::cout << ocr << std::endl; + // std::cout << ocr[0] << std::endl; if (!ocr.empty()){ ocr_text += ocr[0]; + }else{ + empty_char = true; + break; } } + if (empty_char){ + // try the next color filter + continue; + } std::string normalized = run_number_normalization(ocr_text); From 3f33ff4b2d23312822953fefeb6195d3c7954626 Mon Sep 17 00:00:00 2001 From: jw098 Date: Sat, 29 Mar 2025 01:16:49 -0700 Subject: [PATCH 3/8] update item printer material detector to use updated read_number_waterfill() with multiple filters --- .../DevPrograms/TestProgramSwitch.cpp | 19 ++++++++++++ .../PokemonSV_ItemPrinterMaterialDetector.cpp | 29 +++++++++++++++++-- .../PokemonSV_ItemPrinterMaterialDetector.h | 10 +++---- 3 files changed, 50 insertions(+), 8 deletions(-) diff --git a/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.cpp b/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.cpp index a80f3cdb49..b8a28b3506 100644 --- a/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.cpp +++ b/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.cpp @@ -313,12 +313,31 @@ void TestProgram::program(MultiSwitchProgramEnvironment& env, CancellableScope& // std::terminate(); +#if 1 + ImageRGB32 image("720p1.png"); + // auto image = feed.snapshot(); + + ItemPrinterMaterialDetector detector(COLOR_RED, Language::English); + + std::vector boxes = { + {0.485,0.176758,0.037,0.05}, {0.485,0.250977,0.037,0.05}, {0.485,0.325196,0.037,0.05}, {0.485,0.399415,0.037,0.05}, {0.485,0.473634,0.037,0.05}, {0.485,0.547853,0.037,0.05}, {0.485,0.622072,0.037,0.05}, {0.485,0.696291,0.037,0.05}, {0.485,0.77051,0.037,0.05}, {0.485,0.844729,0.037,0.05}, + // {0.39,0.176758,0.025,0.05}, {0.39,0.250977,0.025,0.05}, {0.39,0.325196,0.025,0.05}, {0.39,0.399415,0.025,0.05}, {0.39,0.473634,0.025,0.05}, {0.39,0.547853,0.025,0.05}, {0.39,0.622072,0.025,0.05}, {0.39,0.696291,0.025,0.05}, {0.39,0.77051,0.025,0.05}, {0.39,0.844729,0.025,0.05}, + }; + for (ImageFloatBox box : boxes){ + detector.read_number(console.logger(), env.inference_dispatcher(), image, box); + } + +#endif + +#if 0 + ImageRGB32 image("20250323-011605651979.png"); DialogBoxDetector detector; detector.make_overlays(overlays); cout << detector.detect(image) << endl; +#endif #if 0 diff --git a/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.cpp b/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.cpp index 0edd1160fd..aa963a5de0 100644 --- a/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.cpp +++ b/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.cpp @@ -18,7 +18,7 @@ #include "NintendoSwitch/Commands/NintendoSwitch_Commands_PushButtons.h" #include "PokemonSV_ItemPrinterMaterialDetector.h" -//#include +// #include namespace PokemonAutomation{ namespace NintendoSwitch{ @@ -69,7 +69,9 @@ std::array ItemPrinterMaterialDetector::Material_Boxes(ImageF for (size_t i = 0; i < 10; i++){ double y = initial_y + i*y_spacing; material_boxes[i] = ImageFloatBox(x, y, width, height); + // std::cout << "{" << x << "," << y << "," << width << "," << height << "}, "; } + // std::cout << std::endl; return material_boxes; } @@ -98,9 +100,30 @@ int16_t ItemPrinterMaterialDetector::read_number( int16_t number; if (is_dark_text_light_background){ - number = (int16_t)OCR::read_number_waterfill(logger, cropped, 0xff000000, 0xff808080); + const std::vector> filters = { + {0xff000000, 0xff909090}, + {0xff000000, 0xff808080}, + {0xff000000, 0xff707070}, + {0xff000000, 0xff606060}, + {0xff000000, 0xff505050}, + {0xff000000, 0xff404040}, + {0xff000000, 0xff303030}, + {0xff000000, 0xff202020}, + {0xff000000, 0xff101010}, + }; + number = (int16_t)OCR::read_number_waterfill(logger, cropped, filters, 24); }else{ - number = (int16_t)OCR::read_number_waterfill(logger, cropped, 0xff808080, 0xffffffff); + const std::vector> filters = { + // {0xff808080, 0xffffffff}, + {0xff909090, 0xffffffff}, + {0xffa0a0a0, 0xffffffff}, + {0xffb0b0b0, 0xffffffff}, + {0xffc0c0c0, 0xffffffff}, + {0xffd0d0d0, 0xffffffff}, + {0xffe0e0e0, 0xffffffff}, + {0xfff0f0f0, 0xffffffff}, + }; + number = (int16_t)OCR::read_number_waterfill(logger, cropped, filters, 24); } if (number < 1 || number > 999){ diff --git a/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.h b/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.h index d96de3b170..f99c4ea356 100644 --- a/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.h +++ b/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.h @@ -74,8 +74,6 @@ class ItemPrinterMaterialDetector{ int8_t row_index ) const; - -private: int16_t read_number( Logger& logger, AsyncDispatcher& dispatcher, const ImageViewRGB32& screen, const ImageFloatBox& box @@ -85,9 +83,11 @@ class ItemPrinterMaterialDetector{ private: Color m_color; Language m_language; - std::array m_box_mat_value; - std::array m_box_mat_quantity; - std::array m_box_mat_name; + std::array m_box_mat_value; // {0.39,0.176758,0.025,0.05}, {0.39,0.250977,0.025,0.05}, {0.39,0.325196,0.025,0.05}, {0.39,0.399415,0.025,0.05}, {0.39,0.473634,0.025,0.05}, {0.39,0.547853,0.025,0.05}, {0.39,0.622072,0.025,0.05}, {0.39,0.696291,0.025,0.05}, {0.39,0.77051,0.025,0.05}, {0.39,0.844729,0.025,0.05}, + std::array m_box_mat_quantity; // {0.485,0.176758,0.037,0.05}, {0.485,0.250977,0.037,0.05}, {0.485,0.325196,0.037,0.05}, {0.485,0.399415,0.037,0.05}, {0.485,0.473634,0.037,0.05}, {0.485,0.547853,0.037,0.05}, {0.485,0.622072,0.037,0.05}, {0.485,0.696291,0.037,0.05}, {0.485,0.77051,0.037,0.05}, {0.485,0.844729,0.037,0.05}, + std::array m_box_mat_name; // {0.09,0.176758,0.275,0.05}, {0.09,0.250977,0.275,0.05}, {0.09,0.325196,0.275,0.05}, {0.09,0.399415,0.275,0.05}, {0.09,0.473634,0.275,0.05}, {0.09,0.547853,0.275,0.05}, {0.09,0.622072,0.275,0.05}, {0.09,0.696291,0.275,0.05}, {0.09,0.77051,0.275,0.05}, {0.09,0.844729,0.275,0.05}, + + }; From 3972b8c28f6163b66b773296c70d7223bd363309 Mon Sep 17 00:00:00 2001 From: jw098 Date: Sat, 29 Mar 2025 01:47:10 -0700 Subject: [PATCH 4/8] refactor read_number_waterfill() --- .../CommonTools/OCR/OCR_NumberReader.cpp | 50 ++----------------- .../Source/CommonTools/OCR/OCR_NumberReader.h | 1 + 2 files changed, 5 insertions(+), 46 deletions(-) diff --git a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp index 74c8c77be0..8ae9fe0204 100644 --- a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp +++ b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp @@ -105,52 +105,10 @@ int read_number_waterfill( uint32_t rgb32_min, uint32_t rgb32_max, bool text_inside_range ){ - using namespace Kernels::Waterfill; - - // Direct OCR is unreliable. Instead, we will waterfill each character - // to isolate them, then OCR them individually. - - ImageRGB32 filtered = to_blackwhite_rgb32_range(image, rgb32_min, rgb32_max, text_inside_range); - -// static int c = 0; -// filtered.save("test-" + std::to_string(c++) + ".png"); - - PackedBinaryMatrix matrix = compress_rgb32_to_binary_range(filtered, 0xff000000, 0xff7f7f7f); - - std::map map; - { - std::unique_ptr session = make_WaterfillSession(matrix); - auto iter = session->make_iterator(20); - WaterfillObject object; - while (map.size() < 16 && iter->find_next(object, true)){ - map.emplace(object.min_x, std::move(object)); - } - } - - std::string ocr_text; - for (const auto& item : map){ - const WaterfillObject& object = item.second; - ImageRGB32 cropped = extract_box_reference(filtered, object).copy(); - PackedBinaryMatrix tmp(object.packed_matrix()); - filter_by_mask(tmp, cropped, Color(0xffffffff), true); - ImageRGB32 padded = pad_image(cropped, cropped.width(), 0xffffffff); - std::string ocr = OCR::ocr_read(Language::English, padded); - if (!ocr.empty()){ - ocr_text += ocr[0]; - } - } - - std::string normalized = run_number_normalization(ocr_text); - - if (normalized.empty()){ - logger.log("OCR Text: \"" + ocr_text + "\" -> \"" + normalized + "\" -> Unable to read.", COLOR_RED); - return -1; - } - - int number = std::atoi(normalized.c_str()); - logger.log("OCR Text: \"" + ocr_text + "\" -> \"" + normalized + "\" -> " + std::to_string(number)); - - return number; + const std::vector> filters = { + {rgb32_min, rgb32_max} + }; + return read_number_waterfill(logger, image, filters, UINT32_MAX, text_inside_range); } int read_number_waterfill( diff --git a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h index 1580dd5db8..2667fa17b5 100644 --- a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h +++ b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h @@ -35,6 +35,7 @@ int read_number_waterfill( // applies color filters on the text, until each individual waterfilled character/number is less than width_max. then apply OCR. // this solves the problem where characters are too close to each other and touch, causing more than one character to be OCR'ed +// also, if running OCR on a character results in an empty string, try the next color filter // the filters should be arranged in order of preference. (probably should be arranged broadest to narrowest) int read_number_waterfill( Logger& logger, const ImageViewRGB32& image, From db560e7409234ecf35d5b6437f13b38583c91687 Mon Sep 17 00:00:00 2001 From: jw098 Date: Sat, 29 Mar 2025 12:29:42 -0700 Subject: [PATCH 5/8] update read_number_waterfill() --- .../CommonTools/OCR/OCR_NumberReader.cpp | 151 +++++++++--------- .../Source/CommonTools/OCR/OCR_NumberReader.h | 4 +- .../PokemonSV_ItemPrinterMaterialDetector.cpp | 6 +- 3 files changed, 85 insertions(+), 76 deletions(-) diff --git a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp index 8ae9fe0204..d3511b75c2 100644 --- a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp +++ b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp @@ -102,97 +102,104 @@ int read_number(Logger& logger, const ImageViewRGB32& image, Language language){ int read_number_waterfill( Logger& logger, const ImageViewRGB32& image, - uint32_t rgb32_min, uint32_t rgb32_max, - bool text_inside_range -){ - const std::vector> filters = { - {rgb32_min, rgb32_max} - }; - return read_number_waterfill(logger, image, filters, UINT32_MAX, text_inside_range); -} - -int read_number_waterfill( - Logger& logger, const ImageViewRGB32& image, - std::vector> filters, + uint32_t rgb32_min, uint32_t rgb32_max, + bool text_inside_range, uint32_t width_max, - bool text_inside_range + bool check_empty_string ){ using namespace Kernels::Waterfill; + // Direct OCR is unreliable. Instead, we will waterfill each character + // to isolate them, then OCR them individually. + ImageRGB32 filtered = to_blackwhite_rgb32_range(image, rgb32_min, rgb32_max, text_inside_range); - for (std::pair filter : filters){ +// static int c = 0; +// int i = 0; +// filtered.save("test-" + std::to_string(c++) + ".png"); - uint32_t rgb32_min = filter.first; - uint32_t rgb32_max = filter.second; - - // Direct OCR is unreliable. Instead, we will waterfill each character - // to isolate them, then OCR them individually. - - ImageRGB32 filtered = to_blackwhite_rgb32_range(image, rgb32_min, rgb32_max, text_inside_range); - - // static int c = 0; - // int i = 0; - // filtered.save("test-" + std::to_string(c++) + ".png"); - - PackedBinaryMatrix matrix = compress_rgb32_to_binary_range(filtered, 0xff000000, 0xff7f7f7f); - - std::map map; - { - std::unique_ptr session = make_WaterfillSession(matrix); - auto iter = session->make_iterator(20); - WaterfillObject object; - bool exceed_width_max = false; - while (map.size() < 16 && iter->find_next(object, true)){ - if (object.width() > width_max){ - exceed_width_max = true; - break; - } - map.emplace(object.min_x, std::move(object)); - } - if (exceed_width_max){ - // try the next color filter - continue; + PackedBinaryMatrix matrix = compress_rgb32_to_binary_range(filtered, 0xff000000, 0xff7f7f7f); + + std::map map; + { + std::unique_ptr session = make_WaterfillSession(matrix); + auto iter = session->make_iterator(20); + WaterfillObject object; + while (map.size() < 16 && iter->find_next(object, true)){ + if (object.width() > width_max){ + logger.log("Skipped this filter: character exceeded max width."); + return -1; } + map.emplace(object.min_x, std::move(object)); } + } - std::string ocr_text; - bool empty_char = false; - for (const auto& item : map){ - const WaterfillObject& object = item.second; - ImageRGB32 cropped = extract_box_reference(filtered, object).copy(); - PackedBinaryMatrix tmp(object.packed_matrix()); - filter_by_mask(tmp, cropped, Color(0xffffffff), true); - ImageRGB32 padded = pad_image(cropped, cropped.width(), 0xffffffff); - std::string ocr = OCR::ocr_read(Language::English, padded); - // padded.save("test-cropped" + std::to_string(c) + "-" + std::to_string(i++) + ".png"); - // std::cout << ocr[0] << std::endl; - if (!ocr.empty()){ - ocr_text += ocr[0]; - }else{ - empty_char = true; - break; + std::string ocr_text; + for (const auto& item : map){ + const WaterfillObject& object = item.second; + ImageRGB32 cropped = extract_box_reference(filtered, object).copy(); + PackedBinaryMatrix tmp(object.packed_matrix()); + filter_by_mask(tmp, cropped, Color(0xffffffff), true); + ImageRGB32 padded = pad_image(cropped, cropped.width(), 0xffffffff); + std::string ocr = OCR::ocr_read(Language::English, padded); + // padded.save("test-cropped" + std::to_string(c) + "-" + std::to_string(i++) + ".png"); + // std::cout << ocr[0] << std::endl; + if (!ocr.empty()){ + ocr_text += ocr[0]; + }else{ + if (check_empty_string){ + logger.log("Skipped this filter: empty string."); + return -1; } } - if (empty_char){ - // try the next color filter - continue; - } + } + + std::string normalized = run_number_normalization(ocr_text); + + if (normalized.empty()){ + logger.log("OCR Text: \"" + ocr_text + "\" -> \"" + normalized + "\" -> Unable to read.", COLOR_RED); + return -1; + } + + int number = std::atoi(normalized.c_str()); + logger.log("OCR Text: \"" + ocr_text + "\" -> \"" + normalized + "\" -> " + std::to_string(number)); + + return number; +} + +int read_number_waterfill( + Logger& logger, const ImageViewRGB32& image, + std::vector> filters, + uint32_t width_max, + bool text_inside_range +){ + - std::string normalized = run_number_normalization(ocr_text); + std::map candidates; + for (std::pair filter : filters){ - if (normalized.empty()){ - logger.log("OCR Text: \"" + ocr_text + "\" -> \"" + normalized + "\" -> Unable to read.", COLOR_RED); - return -1; + uint32_t rgb32_min = filter.first; + uint32_t rgb32_max = filter.second; + int candidate = read_number_waterfill(logger, image, rgb32_min, rgb32_max, text_inside_range, width_max, true); + if (candidate != -1){ + candidates[candidate]++; } + } - int number = std::atoi(normalized.c_str()); - logger.log("OCR Text: \"" + ocr_text + "\" -> \"" + normalized + "\" -> " + std::to_string(number)); + if (candidates.empty()){ + logger.log("Unable to read number."); + return -1; + } - return number; + std::pair best; + for (const auto& item : candidates){ + if (item.second > best.second){ + best = item; + } } - return -1; + logger.log("Best candidate: --------------------------> " + std::to_string(best.first)); + return best.first; } diff --git a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h index 2667fa17b5..5bc0e3faad 100644 --- a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h +++ b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h @@ -30,7 +30,9 @@ int read_number(Logger& logger, const ImageViewRGB32& image, Language language = int read_number_waterfill( Logger& logger, const ImageViewRGB32& image, uint32_t rgb32_min, uint32_t rgb32_max, - bool text_inside_range = true + bool text_inside_range = true, + uint32_t width_max = UINT32_MAX, + bool check_empty_string = false ); // applies color filters on the text, until each individual waterfilled character/number is less than width_max. then apply OCR. diff --git a/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.cpp b/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.cpp index aa963a5de0..c8f690dd7a 100644 --- a/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.cpp +++ b/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.cpp @@ -119,9 +119,9 @@ int16_t ItemPrinterMaterialDetector::read_number( {0xffa0a0a0, 0xffffffff}, {0xffb0b0b0, 0xffffffff}, {0xffc0c0c0, 0xffffffff}, - {0xffd0d0d0, 0xffffffff}, - {0xffe0e0e0, 0xffffffff}, - {0xfff0f0f0, 0xffffffff}, + // {0xffd0d0d0, 0xffffffff}, + // {0xffe0e0e0, 0xffffffff}, + // {0xfff0f0f0, 0xffffffff}, }; number = (int16_t)OCR::read_number_waterfill(logger, cropped, filters, 24); } From 7537c66a80a045d1423b167facb2e3b086b47a32 Mon Sep 17 00:00:00 2001 From: jw098 Date: Sat, 29 Mar 2025 13:17:37 -0700 Subject: [PATCH 6/8] update item printer material detector color filters for OCR --- .../CommonTools/OCR/OCR_NumberReader.cpp | 5 ++-- .../DevPrograms/TestProgramSwitch.cpp | 10 ++++--- .../DevPrograms/TestProgramSwitch.h | 2 ++ .../PokemonSV_ItemPrinterMaterialDetector.cpp | 26 ++++++++++++------- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp index d3511b75c2..7fae456198 100644 --- a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp +++ b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp @@ -48,6 +48,7 @@ std::string run_number_normalization(const std::string& input){ {'S', '5'}, {'s', '5'}, {'/', '7'}, + {'g', '9'}, // Japanese OCR likes to do this. {U'🄋', '0'}, @@ -115,8 +116,8 @@ int read_number_waterfill( ImageRGB32 filtered = to_blackwhite_rgb32_range(image, rgb32_min, rgb32_max, text_inside_range); // static int c = 0; +// filtered.save("zztest-" + std::to_string(c++) + ".png"); // int i = 0; -// filtered.save("test-" + std::to_string(c++) + ".png"); PackedBinaryMatrix matrix = compress_rgb32_to_binary_range(filtered, 0xff000000, 0xff7f7f7f); @@ -142,7 +143,7 @@ int read_number_waterfill( filter_by_mask(tmp, cropped, Color(0xffffffff), true); ImageRGB32 padded = pad_image(cropped, cropped.width(), 0xffffffff); std::string ocr = OCR::ocr_read(Language::English, padded); - // padded.save("test-cropped" + std::to_string(c) + "-" + std::to_string(i++) + ".png"); + // padded.save("zztest-cropped" + std::to_string(c) + "-" + std::to_string(i++) + ".png"); // std::cout << ocr[0] << std::endl; if (!ocr.empty()){ ocr_text += ocr[0]; diff --git a/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.cpp b/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.cpp index b8a28b3506..40ed48e189 100644 --- a/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.cpp +++ b/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.cpp @@ -219,6 +219,7 @@ TestProgram::TestProgram() LockMode::LOCK_WHILE_RUNNING, false ) + , IMAGE_PATH(false, "Path to image for testing", LockMode::UNLOCK_WHILE_RUNNING, "default.png", "default.png") , STATIC_TEXT("Test text...") , SELECT("String Select", test_database(), LockMode::LOCK_WHILE_RUNNING, 0) // , PLAYER_LIST("Test Table", LockMode::UNLOCK_WHILE_RUNNING, "Notes") @@ -258,6 +259,7 @@ TestProgram::TestProgram() PA_ADD_OPTION(BUTTON0); PA_ADD_OPTION(BUTTON1); PA_ADD_OPTION(LANGUAGE); + PA_ADD_OPTION(IMAGE_PATH); PA_ADD_OPTION(STATIC_TEXT); PA_ADD_OPTION(SELECT); // PA_ADD_OPTION(PLAYER_LIST); @@ -314,14 +316,14 @@ void TestProgram::program(MultiSwitchProgramEnvironment& env, CancellableScope& // std::terminate(); #if 1 - ImageRGB32 image("720p1.png"); - // auto image = feed.snapshot(); + // ImageRGB32 image(IMAGE_PATH); + auto image = feed.snapshot(); ItemPrinterMaterialDetector detector(COLOR_RED, Language::English); std::vector boxes = { - {0.485,0.176758,0.037,0.05}, {0.485,0.250977,0.037,0.05}, {0.485,0.325196,0.037,0.05}, {0.485,0.399415,0.037,0.05}, {0.485,0.473634,0.037,0.05}, {0.485,0.547853,0.037,0.05}, {0.485,0.622072,0.037,0.05}, {0.485,0.696291,0.037,0.05}, {0.485,0.77051,0.037,0.05}, {0.485,0.844729,0.037,0.05}, - // {0.39,0.176758,0.025,0.05}, {0.39,0.250977,0.025,0.05}, {0.39,0.325196,0.025,0.05}, {0.39,0.399415,0.025,0.05}, {0.39,0.473634,0.025,0.05}, {0.39,0.547853,0.025,0.05}, {0.39,0.622072,0.025,0.05}, {0.39,0.696291,0.025,0.05}, {0.39,0.77051,0.025,0.05}, {0.39,0.844729,0.025,0.05}, + // {0.485,0.176758,0.037,0.05}, {0.485,0.250977,0.037,0.05}, {0.485,0.325196,0.037,0.05}, {0.485,0.399415,0.037,0.05}, {0.485,0.473634,0.037,0.05}, {0.485,0.547853,0.037,0.05}, {0.485,0.622072,0.037,0.05}, {0.485,0.696291,0.037,0.05}, {0.485,0.77051,0.037,0.05}, {0.485,0.844729,0.037,0.05}, + {0.39,0.176758,0.025,0.05}, {0.39,0.250977,0.025,0.05}, {0.39,0.325196,0.025,0.05}, {0.39,0.399415,0.025,0.05}, {0.39,0.473634,0.025,0.05}, {0.39,0.547853,0.025,0.05}, {0.39,0.622072,0.025,0.05}, {0.39,0.696291,0.025,0.05}, {0.39,0.77051,0.025,0.05}, {0.39,0.844729,0.025,0.05}, }; for (ImageFloatBox box : boxes){ detector.read_number(console.logger(), env.inference_dispatcher(), image, box); diff --git a/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.h b/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.h index b5f31c5d8c..16a4546305 100644 --- a/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.h +++ b/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.h @@ -68,6 +68,8 @@ class TestProgram : public MultiSwitchProgramInstance, public ButtonListener{ OCR::LanguageOCROption LANGUAGE; + StringOption IMAGE_PATH; + StaticTextOption STATIC_TEXT; StringSelectOption SELECT; diff --git a/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.cpp b/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.cpp index c8f690dd7a..c291d5e9eb 100644 --- a/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.cpp +++ b/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.cpp @@ -101,24 +101,30 @@ int16_t ItemPrinterMaterialDetector::read_number( int16_t number; if (is_dark_text_light_background){ const std::vector> filters = { + // {0xff000000, 0xffb0b0b0}, + {0xff000000, 0xffa0a0a0}, + {0xff000000, 0xff959595}, {0xff000000, 0xff909090}, + {0xff000000, 0xff858585}, {0xff000000, 0xff808080}, - {0xff000000, 0xff707070}, - {0xff000000, 0xff606060}, - {0xff000000, 0xff505050}, - {0xff000000, 0xff404040}, - {0xff000000, 0xff303030}, - {0xff000000, 0xff202020}, - {0xff000000, 0xff101010}, + // {0xff000000, 0xff707070}, + // {0xff000000, 0xff606060}, + // {0xff000000, 0xff505050}, + // {0xff000000, 0xff404040}, + // {0xff000000, 0xff303030}, + // {0xff000000, 0xff202020}, + // {0xff000000, 0xff101010}, }; number = (int16_t)OCR::read_number_waterfill(logger, cropped, filters, 24); }else{ const std::vector> filters = { - // {0xff808080, 0xffffffff}, + {0xff808080, 0xffffffff}, + {0xff858585, 0xffffffff}, {0xff909090, 0xffffffff}, + {0xff959595, 0xffffffff}, {0xffa0a0a0, 0xffffffff}, - {0xffb0b0b0, 0xffffffff}, - {0xffc0c0c0, 0xffffffff}, + // {0xffb0b0b0, 0xffffffff}, + // {0xffc0c0c0, 0xffffffff}, // {0xffd0d0d0, 0xffffffff}, // {0xffe0e0e0, 0xffffffff}, // {0xfff0f0f0, 0xffffffff}, From 50e040a6d6a658db34bfa8f7df0302cdd83918ee Mon Sep 17 00:00:00 2001 From: jw098 Date: Sat, 29 Mar 2025 13:18:44 -0700 Subject: [PATCH 7/8] minor change --- SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp index 7fae456198..e393c12bf5 100644 --- a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp +++ b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp @@ -188,7 +188,7 @@ int read_number_waterfill( } if (candidates.empty()){ - logger.log("Unable to read number."); + logger.log("No valid OCR candidates. Unable to read number."); return -1; } From 0bbeeeeac6649968dfe4ec4a8be64b3f022fe206 Mon Sep 17 00:00:00 2001 From: jw098 Date: Sat, 29 Mar 2025 13:28:20 -0700 Subject: [PATCH 8/8] update comments --- .../Source/CommonTools/OCR/OCR_NumberReader.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h index 5bc0e3faad..93b1bda8ab 100644 --- a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h +++ b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h @@ -27,6 +27,10 @@ int read_number(Logger& logger, const ImageViewRGB32& image, Language language = // This version attempts to improve reliability by first isolating each number // via waterfill. Then it OCRs each number by itself and recombines them at the // end. This requires specifying the color range for the text. +// +// text_inside_range: binary filter is applied to the image so that any pixels within the color range will be turned black, and everything else will be white +// width_max: return -1 if any character's width is greater than width_max (likely means that two characters are touching, and so are treated as one large character) +// check_empty_string: if set to true, return -1 (and stop evaluation) if any character returns an empty string from OCR int read_number_waterfill( Logger& logger, const ImageViewRGB32& image, uint32_t rgb32_min, uint32_t rgb32_max, @@ -35,10 +39,8 @@ int read_number_waterfill( bool check_empty_string = false ); -// applies color filters on the text, until each individual waterfilled character/number is less than width_max. then apply OCR. -// this solves the problem where characters are too close to each other and touch, causing more than one character to be OCR'ed -// also, if running OCR on a character results in an empty string, try the next color filter -// the filters should be arranged in order of preference. (probably should be arranged broadest to narrowest) +// Try OCR with all the given color filters. +// Return the best majority candidate int read_number_waterfill( Logger& logger, const ImageViewRGB32& image, std::vector> filters,