Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 58 additions & 7 deletions SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
#include "OCR_RawOCR.h"
#include "OCR_NumberReader.h"

// #include <iostream>
// using std::cout;
// using std::endl;
#include <iostream>
using std::cout;
using std::endl;

namespace PokemonAutomation{
namespace OCR{
Expand All @@ -39,6 +39,7 @@ std::string run_number_normalization(const std::string& input){
{'9', '9'},

// Common misreads.
{'|', '1'},
{']', '1'},
{'l', '1'},
{'i', '1'},
Expand All @@ -47,6 +48,7 @@ std::string run_number_normalization(const std::string& input){
{'S', '5'},
{'s', '5'},
{'/', '7'},
{'g', '9'},

// Japanese OCR likes to do this.
{U'🄋', '0'},
Expand Down Expand Up @@ -101,8 +103,10 @@ int read_number(Logger& logger, const ImageViewRGB32& image, Language language){

int read_number_waterfill(
Logger& logger, const ImageViewRGB32& image,
uint32_t rgb32_min, uint32_t rgb32_max,
bool text_inside_range
uint32_t rgb32_min, uint32_t rgb32_max,
bool text_inside_range,
uint32_t width_max,
bool check_empty_string
){
using namespace Kernels::Waterfill;

Expand All @@ -112,7 +116,8 @@ int read_number_waterfill(
ImageRGB32 filtered = to_blackwhite_rgb32_range(image, rgb32_min, rgb32_max, text_inside_range);

// static int c = 0;
// filtered.save("test-" + std::to_string(c++) + ".png");
// filtered.save("zztest-" + std::to_string(c++) + ".png");
// int i = 0;

PackedBinaryMatrix matrix = compress_rgb32_to_binary_range(filtered, 0xff000000, 0xff7f7f7f);

Expand All @@ -122,20 +127,31 @@ int read_number_waterfill(
auto iter = session->make_iterator(20);
WaterfillObject object;
while (map.size() < 16 && iter->find_next(object, true)){
if (object.width() > width_max){
logger.log("Skipped this filter: character exceeded max width.");
return -1;
}
map.emplace(object.min_x, std::move(object));
}
}

std::string ocr_text;
for (const auto& item : map){
const WaterfillObject& object = item.second;
ImageRGB32 cropped = extract_box_reference(filtered, object).copy();
ImageRGB32 cropped = extract_box_reference(filtered, object).copy();
PackedBinaryMatrix tmp(object.packed_matrix());
filter_by_mask(tmp, cropped, Color(0xffffffff), true);
ImageRGB32 padded = pad_image(cropped, cropped.width(), 0xffffffff);
std::string ocr = OCR::ocr_read(Language::English, padded);
// padded.save("zztest-cropped" + std::to_string(c) + "-" + std::to_string(i++) + ".png");
// std::cout << ocr[0] << std::endl;
if (!ocr.empty()){
ocr_text += ocr[0];
}else{
if (check_empty_string){
logger.log("Skipped this filter: empty string.");
return -1;
}
}
}

Expand All @@ -152,6 +168,41 @@ int read_number_waterfill(
return number;
}

int read_number_waterfill(
Logger& logger, const ImageViewRGB32& image,
std::vector<std::pair<uint32_t, uint32_t>> filters,
uint32_t width_max,
bool text_inside_range
){


std::map<int, uint8_t> candidates;
for (std::pair<uint32_t, uint32_t> filter : filters){

uint32_t rgb32_min = filter.first;
uint32_t rgb32_max = filter.second;
int candidate = read_number_waterfill(logger, image, rgb32_min, rgb32_max, text_inside_range, width_max, true);
if (candidate != -1){
candidates[candidate]++;
}
}

if (candidates.empty()){
logger.log("No valid OCR candidates. Unable to read number.");
return -1;
}

std::pair<int, uint8_t> best;
for (const auto& item : candidates){
if (item.second > best.second){
best = item;
}
}

logger.log("Best candidate: --------------------------> " + std::to_string(best.first));
return best.first;
}




Expand Down
17 changes: 16 additions & 1 deletion SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#define PokemonAutomation_CommonTools_OCR_NumberReader_H

#include <stdint.h>
#include <vector>
#include "CommonFramework/Language.h"

namespace PokemonAutomation{
Expand All @@ -26,12 +27,26 @@ int read_number(Logger& logger, const ImageViewRGB32& image, Language language =
// This version attempts to improve reliability by first isolating each number
// via waterfill. Then it OCRs each number by itself and recombines them at the
// end. This requires specifying the color range for the text.
//
// text_inside_range: binary filter is applied to the image so that any pixels within the color range will be turned black, and everything else will be white
// width_max: return -1 if any character's width is greater than width_max (likely means that two characters are touching, and so are treated as one large character)
// check_empty_string: if set to true, return -1 (and stop evaluation) if any character returns an empty string from OCR
int read_number_waterfill(
Logger& logger, const ImageViewRGB32& image,
uint32_t rgb32_min, uint32_t rgb32_max,
bool text_inside_range = true
bool text_inside_range = true,
uint32_t width_max = UINT32_MAX,
bool check_empty_string = false
);

// Try OCR with all the given color filters.
// Return the best majority candidate
int read_number_waterfill(
Logger& logger, const ImageViewRGB32& image,
std::vector<std::pair<uint32_t, uint32_t>> filters,
uint32_t width_max,
bool text_inside_range = true
);


}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ TestProgram::TestProgram()
LockMode::LOCK_WHILE_RUNNING,
false
)
, IMAGE_PATH(false, "Path to image for testing", LockMode::UNLOCK_WHILE_RUNNING, "default.png", "default.png")
, STATIC_TEXT("Test text...")
, SELECT("String Select", test_database(), LockMode::LOCK_WHILE_RUNNING, 0)
// , PLAYER_LIST("Test Table", LockMode::UNLOCK_WHILE_RUNNING, "Notes")
Expand Down Expand Up @@ -258,6 +259,7 @@ TestProgram::TestProgram()
PA_ADD_OPTION(BUTTON0);
PA_ADD_OPTION(BUTTON1);
PA_ADD_OPTION(LANGUAGE);
PA_ADD_OPTION(IMAGE_PATH);
PA_ADD_OPTION(STATIC_TEXT);
PA_ADD_OPTION(SELECT);
// PA_ADD_OPTION(PLAYER_LIST);
Expand Down Expand Up @@ -313,12 +315,31 @@ void TestProgram::program(MultiSwitchProgramEnvironment& env, CancellableScope&

// std::terminate();

#if 1
// ImageRGB32 image(IMAGE_PATH);
auto image = feed.snapshot();

ItemPrinterMaterialDetector detector(COLOR_RED, Language::English);

std::vector<ImageFloatBox> boxes = {
// {0.485,0.176758,0.037,0.05}, {0.485,0.250977,0.037,0.05}, {0.485,0.325196,0.037,0.05}, {0.485,0.399415,0.037,0.05}, {0.485,0.473634,0.037,0.05}, {0.485,0.547853,0.037,0.05}, {0.485,0.622072,0.037,0.05}, {0.485,0.696291,0.037,0.05}, {0.485,0.77051,0.037,0.05}, {0.485,0.844729,0.037,0.05},
{0.39,0.176758,0.025,0.05}, {0.39,0.250977,0.025,0.05}, {0.39,0.325196,0.025,0.05}, {0.39,0.399415,0.025,0.05}, {0.39,0.473634,0.025,0.05}, {0.39,0.547853,0.025,0.05}, {0.39,0.622072,0.025,0.05}, {0.39,0.696291,0.025,0.05}, {0.39,0.77051,0.025,0.05}, {0.39,0.844729,0.025,0.05},
};
for (ImageFloatBox box : boxes){
detector.read_number(console.logger(), env.inference_dispatcher(), image, box);
}

#endif

#if 0

ImageRGB32 image("20250323-011605651979.png");

DialogBoxDetector detector;
detector.make_overlays(overlays);
cout << detector.detect(image) << endl;

#endif


#if 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ class TestProgram : public MultiSwitchProgramInstance, public ButtonListener{

OCR::LanguageOCROption LANGUAGE;

StringOption IMAGE_PATH;

StaticTextOption STATIC_TEXT;

StringSelectOption SELECT;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include "NintendoSwitch/Commands/NintendoSwitch_Commands_PushButtons.h"
#include "PokemonSV_ItemPrinterMaterialDetector.h"

//#include <iostream>
// #include <iostream>

namespace PokemonAutomation{
namespace NintendoSwitch{
Expand Down Expand Up @@ -69,7 +69,9 @@ std::array<ImageFloatBox, 10> ItemPrinterMaterialDetector::Material_Boxes(ImageF
for (size_t i = 0; i < 10; i++){
double y = initial_y + i*y_spacing;
material_boxes[i] = ImageFloatBox(x, y, width, height);
// std::cout << "{" << x << "," << y << "," << width << "," << height << "}, ";
}
// std::cout << std::endl;
return material_boxes;
}

Expand Down Expand Up @@ -98,9 +100,36 @@ int16_t ItemPrinterMaterialDetector::read_number(

int16_t number;
if (is_dark_text_light_background){
number = (int16_t)OCR::read_number_waterfill(logger, cropped, 0xff000000, 0xff808080);
const std::vector<std::pair<uint32_t, uint32_t>> filters = {
// {0xff000000, 0xffb0b0b0},
{0xff000000, 0xffa0a0a0},
{0xff000000, 0xff959595},
{0xff000000, 0xff909090},
{0xff000000, 0xff858585},
{0xff000000, 0xff808080},
// {0xff000000, 0xff707070},
// {0xff000000, 0xff606060},
// {0xff000000, 0xff505050},
// {0xff000000, 0xff404040},
// {0xff000000, 0xff303030},
// {0xff000000, 0xff202020},
// {0xff000000, 0xff101010},
};
number = (int16_t)OCR::read_number_waterfill(logger, cropped, filters, 24);
}else{
number = (int16_t)OCR::read_number_waterfill(logger, cropped, 0xff808080, 0xffffffff);
const std::vector<std::pair<uint32_t, uint32_t>> filters = {
{0xff808080, 0xffffffff},
{0xff858585, 0xffffffff},
{0xff909090, 0xffffffff},
{0xff959595, 0xffffffff},
{0xffa0a0a0, 0xffffffff},
// {0xffb0b0b0, 0xffffffff},
// {0xffc0c0c0, 0xffffffff},
// {0xffd0d0d0, 0xffffffff},
// {0xffe0e0e0, 0xffffffff},
// {0xfff0f0f0, 0xffffffff},
};
number = (int16_t)OCR::read_number_waterfill(logger, cropped, filters, 24);
}

if (number < 1 || number > 999){
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,6 @@ class ItemPrinterMaterialDetector{
int8_t row_index
) const;


private:
int16_t read_number(
Logger& logger, AsyncDispatcher& dispatcher,
const ImageViewRGB32& screen, const ImageFloatBox& box
Expand All @@ -85,9 +83,11 @@ class ItemPrinterMaterialDetector{
private:
Color m_color;
Language m_language;
std::array<ImageFloatBox, 10> m_box_mat_value;
std::array<ImageFloatBox, 10> m_box_mat_quantity;
std::array<ImageFloatBox, 10> m_box_mat_name;
std::array<ImageFloatBox, 10> m_box_mat_value; // {0.39,0.176758,0.025,0.05}, {0.39,0.250977,0.025,0.05}, {0.39,0.325196,0.025,0.05}, {0.39,0.399415,0.025,0.05}, {0.39,0.473634,0.025,0.05}, {0.39,0.547853,0.025,0.05}, {0.39,0.622072,0.025,0.05}, {0.39,0.696291,0.025,0.05}, {0.39,0.77051,0.025,0.05}, {0.39,0.844729,0.025,0.05},
std::array<ImageFloatBox, 10> m_box_mat_quantity; // {0.485,0.176758,0.037,0.05}, {0.485,0.250977,0.037,0.05}, {0.485,0.325196,0.037,0.05}, {0.485,0.399415,0.037,0.05}, {0.485,0.473634,0.037,0.05}, {0.485,0.547853,0.037,0.05}, {0.485,0.622072,0.037,0.05}, {0.485,0.696291,0.037,0.05}, {0.485,0.77051,0.037,0.05}, {0.485,0.844729,0.037,0.05},
std::array<ImageFloatBox, 10> m_box_mat_name; // {0.09,0.176758,0.275,0.05}, {0.09,0.250977,0.275,0.05}, {0.09,0.325196,0.275,0.05}, {0.09,0.399415,0.275,0.05}, {0.09,0.473634,0.275,0.05}, {0.09,0.547853,0.275,0.05}, {0.09,0.622072,0.275,0.05}, {0.09,0.696291,0.275,0.05}, {0.09,0.77051,0.275,0.05}, {0.09,0.844729,0.275,0.05},


};


Expand Down