diff --git a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp index ab5ac2b3fc..e393c12bf5 100644 --- a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp +++ b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp @@ -17,9 +17,9 @@ #include "OCR_RawOCR.h" #include "OCR_NumberReader.h" -// #include -// using std::cout; -// using std::endl; +#include +using std::cout; +using std::endl; namespace PokemonAutomation{ namespace OCR{ @@ -39,6 +39,7 @@ std::string run_number_normalization(const std::string& input){ {'9', '9'}, // Common misreads. + {'|', '1'}, {']', '1'}, {'l', '1'}, {'i', '1'}, @@ -47,6 +48,7 @@ std::string run_number_normalization(const std::string& input){ {'S', '5'}, {'s', '5'}, {'/', '7'}, + {'g', '9'}, // Japanese OCR likes to do this. {U'🄋', '0'}, @@ -101,8 +103,10 @@ int read_number(Logger& logger, const ImageViewRGB32& image, Language language){ int read_number_waterfill( Logger& logger, const ImageViewRGB32& image, - uint32_t rgb32_min, uint32_t rgb32_max, - bool text_inside_range + uint32_t rgb32_min, uint32_t rgb32_max, + bool text_inside_range, + uint32_t width_max, + bool check_empty_string ){ using namespace Kernels::Waterfill; @@ -112,7 +116,8 @@ int read_number_waterfill( ImageRGB32 filtered = to_blackwhite_rgb32_range(image, rgb32_min, rgb32_max, text_inside_range); // static int c = 0; -// filtered.save("test-" + std::to_string(c++) + ".png"); +// filtered.save("zztest-" + std::to_string(c++) + ".png"); +// int i = 0; PackedBinaryMatrix matrix = compress_rgb32_to_binary_range(filtered, 0xff000000, 0xff7f7f7f); @@ -122,6 +127,10 @@ int read_number_waterfill( auto iter = session->make_iterator(20); WaterfillObject object; while (map.size() < 16 && iter->find_next(object, true)){ + if (object.width() > width_max){ + logger.log("Skipped this filter: character exceeded max width."); + return -1; + } map.emplace(object.min_x, std::move(object)); } } @@ -129,13 +138,20 @@ int read_number_waterfill( std::string ocr_text; for (const auto& item : map){ const WaterfillObject& object = item.second; - ImageRGB32 cropped = extract_box_reference(filtered, object).copy(); + ImageRGB32 cropped = extract_box_reference(filtered, object).copy(); PackedBinaryMatrix tmp(object.packed_matrix()); filter_by_mask(tmp, cropped, Color(0xffffffff), true); ImageRGB32 padded = pad_image(cropped, cropped.width(), 0xffffffff); std::string ocr = OCR::ocr_read(Language::English, padded); + // padded.save("zztest-cropped" + std::to_string(c) + "-" + std::to_string(i++) + ".png"); + // std::cout << ocr[0] << std::endl; if (!ocr.empty()){ ocr_text += ocr[0]; + }else{ + if (check_empty_string){ + logger.log("Skipped this filter: empty string."); + return -1; + } } } @@ -152,6 +168,41 @@ int read_number_waterfill( return number; } +int read_number_waterfill( + Logger& logger, const ImageViewRGB32& image, + std::vector> filters, + uint32_t width_max, + bool text_inside_range +){ + + + std::map candidates; + for (std::pair filter : filters){ + + uint32_t rgb32_min = filter.first; + uint32_t rgb32_max = filter.second; + int candidate = read_number_waterfill(logger, image, rgb32_min, rgb32_max, text_inside_range, width_max, true); + if (candidate != -1){ + candidates[candidate]++; + } + } + + if (candidates.empty()){ + logger.log("No valid OCR candidates. Unable to read number."); + return -1; + } + + std::pair best; + for (const auto& item : candidates){ + if (item.second > best.second){ + best = item; + } + } + + logger.log("Best candidate: --------------------------> " + std::to_string(best.first)); + return best.first; +} + diff --git a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h index 99093a0863..93b1bda8ab 100644 --- a/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h +++ b/SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h @@ -8,6 +8,7 @@ #define PokemonAutomation_CommonTools_OCR_NumberReader_H #include +#include #include "CommonFramework/Language.h" namespace PokemonAutomation{ @@ -26,12 +27,26 @@ int read_number(Logger& logger, const ImageViewRGB32& image, Language language = // This version attempts to improve reliability by first isolating each number // via waterfill. Then it OCRs each number by itself and recombines them at the // end. This requires specifying the color range for the text. +// +// text_inside_range: binary filter is applied to the image so that any pixels within the color range will be turned black, and everything else will be white +// width_max: return -1 if any character's width is greater than width_max (likely means that two characters are touching, and so are treated as one large character) +// check_empty_string: if set to true, return -1 (and stop evaluation) if any character returns an empty string from OCR int read_number_waterfill( Logger& logger, const ImageViewRGB32& image, uint32_t rgb32_min, uint32_t rgb32_max, - bool text_inside_range = true + bool text_inside_range = true, + uint32_t width_max = UINT32_MAX, + bool check_empty_string = false ); +// Try OCR with all the given color filters. +// Return the best majority candidate +int read_number_waterfill( + Logger& logger, const ImageViewRGB32& image, + std::vector> filters, + uint32_t width_max, + bool text_inside_range = true + ); } diff --git a/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.cpp b/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.cpp index a80f3cdb49..40ed48e189 100644 --- a/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.cpp +++ b/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.cpp @@ -219,6 +219,7 @@ TestProgram::TestProgram() LockMode::LOCK_WHILE_RUNNING, false ) + , IMAGE_PATH(false, "Path to image for testing", LockMode::UNLOCK_WHILE_RUNNING, "default.png", "default.png") , STATIC_TEXT("Test text...") , SELECT("String Select", test_database(), LockMode::LOCK_WHILE_RUNNING, 0) // , PLAYER_LIST("Test Table", LockMode::UNLOCK_WHILE_RUNNING, "Notes") @@ -258,6 +259,7 @@ TestProgram::TestProgram() PA_ADD_OPTION(BUTTON0); PA_ADD_OPTION(BUTTON1); PA_ADD_OPTION(LANGUAGE); + PA_ADD_OPTION(IMAGE_PATH); PA_ADD_OPTION(STATIC_TEXT); PA_ADD_OPTION(SELECT); // PA_ADD_OPTION(PLAYER_LIST); @@ -313,12 +315,31 @@ void TestProgram::program(MultiSwitchProgramEnvironment& env, CancellableScope& // std::terminate(); +#if 1 + // ImageRGB32 image(IMAGE_PATH); + auto image = feed.snapshot(); + + ItemPrinterMaterialDetector detector(COLOR_RED, Language::English); + + std::vector boxes = { + // {0.485,0.176758,0.037,0.05}, {0.485,0.250977,0.037,0.05}, {0.485,0.325196,0.037,0.05}, {0.485,0.399415,0.037,0.05}, {0.485,0.473634,0.037,0.05}, {0.485,0.547853,0.037,0.05}, {0.485,0.622072,0.037,0.05}, {0.485,0.696291,0.037,0.05}, {0.485,0.77051,0.037,0.05}, {0.485,0.844729,0.037,0.05}, + {0.39,0.176758,0.025,0.05}, {0.39,0.250977,0.025,0.05}, {0.39,0.325196,0.025,0.05}, {0.39,0.399415,0.025,0.05}, {0.39,0.473634,0.025,0.05}, {0.39,0.547853,0.025,0.05}, {0.39,0.622072,0.025,0.05}, {0.39,0.696291,0.025,0.05}, {0.39,0.77051,0.025,0.05}, {0.39,0.844729,0.025,0.05}, + }; + for (ImageFloatBox box : boxes){ + detector.read_number(console.logger(), env.inference_dispatcher(), image, box); + } + +#endif + +#if 0 + ImageRGB32 image("20250323-011605651979.png"); DialogBoxDetector detector; detector.make_overlays(overlays); cout << detector.detect(image) << endl; +#endif #if 0 diff --git a/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.h b/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.h index b5f31c5d8c..16a4546305 100644 --- a/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.h +++ b/SerialPrograms/Source/NintendoSwitch/DevPrograms/TestProgramSwitch.h @@ -68,6 +68,8 @@ class TestProgram : public MultiSwitchProgramInstance, public ButtonListener{ OCR::LanguageOCROption LANGUAGE; + StringOption IMAGE_PATH; + StaticTextOption STATIC_TEXT; StringSelectOption SELECT; diff --git a/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.cpp b/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.cpp index 0edd1160fd..c291d5e9eb 100644 --- a/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.cpp +++ b/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.cpp @@ -18,7 +18,7 @@ #include "NintendoSwitch/Commands/NintendoSwitch_Commands_PushButtons.h" #include "PokemonSV_ItemPrinterMaterialDetector.h" -//#include +// #include namespace PokemonAutomation{ namespace NintendoSwitch{ @@ -69,7 +69,9 @@ std::array ItemPrinterMaterialDetector::Material_Boxes(ImageF for (size_t i = 0; i < 10; i++){ double y = initial_y + i*y_spacing; material_boxes[i] = ImageFloatBox(x, y, width, height); + // std::cout << "{" << x << "," << y << "," << width << "," << height << "}, "; } + // std::cout << std::endl; return material_boxes; } @@ -98,9 +100,36 @@ int16_t ItemPrinterMaterialDetector::read_number( int16_t number; if (is_dark_text_light_background){ - number = (int16_t)OCR::read_number_waterfill(logger, cropped, 0xff000000, 0xff808080); + const std::vector> filters = { + // {0xff000000, 0xffb0b0b0}, + {0xff000000, 0xffa0a0a0}, + {0xff000000, 0xff959595}, + {0xff000000, 0xff909090}, + {0xff000000, 0xff858585}, + {0xff000000, 0xff808080}, + // {0xff000000, 0xff707070}, + // {0xff000000, 0xff606060}, + // {0xff000000, 0xff505050}, + // {0xff000000, 0xff404040}, + // {0xff000000, 0xff303030}, + // {0xff000000, 0xff202020}, + // {0xff000000, 0xff101010}, + }; + number = (int16_t)OCR::read_number_waterfill(logger, cropped, filters, 24); }else{ - number = (int16_t)OCR::read_number_waterfill(logger, cropped, 0xff808080, 0xffffffff); + const std::vector> filters = { + {0xff808080, 0xffffffff}, + {0xff858585, 0xffffffff}, + {0xff909090, 0xffffffff}, + {0xff959595, 0xffffffff}, + {0xffa0a0a0, 0xffffffff}, + // {0xffb0b0b0, 0xffffffff}, + // {0xffc0c0c0, 0xffffffff}, + // {0xffd0d0d0, 0xffffffff}, + // {0xffe0e0e0, 0xffffffff}, + // {0xfff0f0f0, 0xffffffff}, + }; + number = (int16_t)OCR::read_number_waterfill(logger, cropped, filters, 24); } if (number < 1 || number > 999){ diff --git a/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.h b/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.h index d96de3b170..f99c4ea356 100644 --- a/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.h +++ b/SerialPrograms/Source/PokemonSV/Inference/ItemPrinter/PokemonSV_ItemPrinterMaterialDetector.h @@ -74,8 +74,6 @@ class ItemPrinterMaterialDetector{ int8_t row_index ) const; - -private: int16_t read_number( Logger& logger, AsyncDispatcher& dispatcher, const ImageViewRGB32& screen, const ImageFloatBox& box @@ -85,9 +83,11 @@ class ItemPrinterMaterialDetector{ private: Color m_color; Language m_language; - std::array m_box_mat_value; - std::array m_box_mat_quantity; - std::array m_box_mat_name; + std::array m_box_mat_value; // {0.39,0.176758,0.025,0.05}, {0.39,0.250977,0.025,0.05}, {0.39,0.325196,0.025,0.05}, {0.39,0.399415,0.025,0.05}, {0.39,0.473634,0.025,0.05}, {0.39,0.547853,0.025,0.05}, {0.39,0.622072,0.025,0.05}, {0.39,0.696291,0.025,0.05}, {0.39,0.77051,0.025,0.05}, {0.39,0.844729,0.025,0.05}, + std::array m_box_mat_quantity; // {0.485,0.176758,0.037,0.05}, {0.485,0.250977,0.037,0.05}, {0.485,0.325196,0.037,0.05}, {0.485,0.399415,0.037,0.05}, {0.485,0.473634,0.037,0.05}, {0.485,0.547853,0.037,0.05}, {0.485,0.622072,0.037,0.05}, {0.485,0.696291,0.037,0.05}, {0.485,0.77051,0.037,0.05}, {0.485,0.844729,0.037,0.05}, + std::array m_box_mat_name; // {0.09,0.176758,0.275,0.05}, {0.09,0.250977,0.275,0.05}, {0.09,0.325196,0.275,0.05}, {0.09,0.399415,0.275,0.05}, {0.09,0.473634,0.275,0.05}, {0.09,0.547853,0.275,0.05}, {0.09,0.622072,0.275,0.05}, {0.09,0.696291,0.275,0.05}, {0.09,0.77051,0.275,0.05}, {0.09,0.844729,0.275,0.05}, + + };