Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 80 additions & 37 deletions SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
#include "OCR_RawOCR.h"
#include "OCR_NumberReader.h"

// #include <iostream>
// using std::cout;
// using std::endl;
#include <iostream>
using std::cout;
using std::endl;

namespace PokemonAutomation{
namespace OCR{
Expand All @@ -39,6 +39,7 @@ std::string run_number_normalization(const std::string& input){
{'9', '9'},

// Common misreads.
{'|', '1'},
{']', '1'},
{'l', '1'},
{'i', '1'},
Expand Down Expand Up @@ -104,52 +105,94 @@ int read_number_waterfill(
uint32_t rgb32_min, uint32_t rgb32_max,
bool text_inside_range
){
using namespace Kernels::Waterfill;
const std::vector<std::pair<uint32_t, uint32_t>> filters = {
{rgb32_min, rgb32_max}
};
return read_number_waterfill(logger, image, filters, UINT32_MAX, text_inside_range);
}

// Direct OCR is unreliable. Instead, we will waterfill each character
// to isolate them, then OCR them individually.
int read_number_waterfill(
Logger& logger, const ImageViewRGB32& image,
std::vector<std::pair<uint32_t, uint32_t>> filters,
uint32_t width_max,
bool text_inside_range
){
using namespace Kernels::Waterfill;

ImageRGB32 filtered = to_blackwhite_rgb32_range(image, rgb32_min, rgb32_max, text_inside_range);

// static int c = 0;
// filtered.save("test-" + std::to_string(c++) + ".png");

PackedBinaryMatrix matrix = compress_rgb32_to_binary_range(filtered, 0xff000000, 0xff7f7f7f);
for (std::pair<uint32_t, uint32_t> filter : filters){

uint32_t rgb32_min = filter.first;
uint32_t rgb32_max = filter.second;

// Direct OCR is unreliable. Instead, we will waterfill each character
// to isolate them, then OCR them individually.

ImageRGB32 filtered = to_blackwhite_rgb32_range(image, rgb32_min, rgb32_max, text_inside_range);

// static int c = 0;
// int i = 0;
// filtered.save("test-" + std::to_string(c++) + ".png");

PackedBinaryMatrix matrix = compress_rgb32_to_binary_range(filtered, 0xff000000, 0xff7f7f7f);

std::map<size_t, WaterfillObject> map;
{
std::unique_ptr<WaterfillSession> session = make_WaterfillSession(matrix);
auto iter = session->make_iterator(20);
WaterfillObject object;
bool exceed_width_max = false;
while (map.size() < 16 && iter->find_next(object, true)){
if (object.width() > width_max){
exceed_width_max = true;
break;
}
map.emplace(object.min_x, std::move(object));
}
if (exceed_width_max){
// try the next color filter
continue;
}
}

std::map<size_t, WaterfillObject> map;
{
std::unique_ptr<WaterfillSession> session = make_WaterfillSession(matrix);
auto iter = session->make_iterator(20);
WaterfillObject object;
while (map.size() < 16 && iter->find_next(object, true)){
map.emplace(object.min_x, std::move(object));
std::string ocr_text;
bool empty_char = false;
for (const auto& item : map){
const WaterfillObject& object = item.second;
ImageRGB32 cropped = extract_box_reference(filtered, object).copy();
PackedBinaryMatrix tmp(object.packed_matrix());
filter_by_mask(tmp, cropped, Color(0xffffffff), true);
ImageRGB32 padded = pad_image(cropped, cropped.width(), 0xffffffff);
std::string ocr = OCR::ocr_read(Language::English, padded);
// padded.save("test-cropped" + std::to_string(c) + "-" + std::to_string(i++) + ".png");
// std::cout << ocr[0] << std::endl;
if (!ocr.empty()){
ocr_text += ocr[0];
}else{
empty_char = true;
break;
}
}
}
if (empty_char){
// try the next color filter
continue;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than breaking out on the first match, you might want to run all of them anyway, then do a majority vote on the results (after throwing out obvious incorrect reads like empty string or letters). TeraCardReader does this with the star count:

const std::vector<double> DISTANCES{70, 80, 90, 100, 110, 120, 130};

The reason is that on the borderline cases may return neither an empty string nor a valid result. You'll want to result when the "signal is strongest".

}

std::string normalized = run_number_normalization(ocr_text);

std::string ocr_text;
for (const auto& item : map){
const WaterfillObject& object = item.second;
ImageRGB32 cropped = extract_box_reference(filtered, object).copy();
PackedBinaryMatrix tmp(object.packed_matrix());
filter_by_mask(tmp, cropped, Color(0xffffffff), true);
ImageRGB32 padded = pad_image(cropped, cropped.width(), 0xffffffff);
std::string ocr = OCR::ocr_read(Language::English, padded);
if (!ocr.empty()){
ocr_text += ocr[0];
if (normalized.empty()){
logger.log("OCR Text: \"" + ocr_text + "\" -> \"" + normalized + "\" -> Unable to read.", COLOR_RED);
return -1;
}
}

std::string normalized = run_number_normalization(ocr_text);
int number = std::atoi(normalized.c_str());
logger.log("OCR Text: \"" + ocr_text + "\" -> \"" + normalized + "\" -> " + std::to_string(number));

if (normalized.empty()){
logger.log("OCR Text: \"" + ocr_text + "\" -> \"" + normalized + "\" -> Unable to read.", COLOR_RED);
return -1;
return number;
}

int number = std::atoi(normalized.c_str());
logger.log("OCR Text: \"" + ocr_text + "\" -> \"" + normalized + "\" -> " + std::to_string(number));

return number;
return -1;
}


Expand Down
11 changes: 11 additions & 0 deletions SerialPrograms/Source/CommonTools/OCR/OCR_NumberReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#define PokemonAutomation_CommonTools_OCR_NumberReader_H

#include <stdint.h>
#include <vector>
#include "CommonFramework/Language.h"

namespace PokemonAutomation{
Expand All @@ -32,6 +33,16 @@ int read_number_waterfill(
bool text_inside_range = true
);

// applies color filters on the text, until each individual waterfilled character/number is less than width_max. then apply OCR.
// this solves the problem where characters are too close to each other and touch, causing more than one character to be OCR'ed
// also, if running OCR on a character results in an empty string, try the next color filter
// the filters should be arranged in order of preference. (probably should be arranged broadest to narrowest)
int read_number_waterfill(
Logger& logger, const ImageViewRGB32& image,
std::vector<std::pair<uint32_t, uint32_t>> filters,
uint32_t width_max,
bool text_inside_range = true
);


}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -313,12 +313,31 @@ void TestProgram::program(MultiSwitchProgramEnvironment& env, CancellableScope&

// std::terminate();

#if 1
ImageRGB32 image("720p1.png");
// auto image = feed.snapshot();

ItemPrinterMaterialDetector detector(COLOR_RED, Language::English);

std::vector<ImageFloatBox> boxes = {
{0.485,0.176758,0.037,0.05}, {0.485,0.250977,0.037,0.05}, {0.485,0.325196,0.037,0.05}, {0.485,0.399415,0.037,0.05}, {0.485,0.473634,0.037,0.05}, {0.485,0.547853,0.037,0.05}, {0.485,0.622072,0.037,0.05}, {0.485,0.696291,0.037,0.05}, {0.485,0.77051,0.037,0.05}, {0.485,0.844729,0.037,0.05},
// {0.39,0.176758,0.025,0.05}, {0.39,0.250977,0.025,0.05}, {0.39,0.325196,0.025,0.05}, {0.39,0.399415,0.025,0.05}, {0.39,0.473634,0.025,0.05}, {0.39,0.547853,0.025,0.05}, {0.39,0.622072,0.025,0.05}, {0.39,0.696291,0.025,0.05}, {0.39,0.77051,0.025,0.05}, {0.39,0.844729,0.025,0.05},
};
for (ImageFloatBox box : boxes){
detector.read_number(console.logger(), env.inference_dispatcher(), image, box);
}

#endif

#if 0

ImageRGB32 image("20250323-011605651979.png");

DialogBoxDetector detector;
detector.make_overlays(overlays);
cout << detector.detect(image) << endl;

#endif


#if 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include "NintendoSwitch/Commands/NintendoSwitch_Commands_PushButtons.h"
#include "PokemonSV_ItemPrinterMaterialDetector.h"

//#include <iostream>
// #include <iostream>

namespace PokemonAutomation{
namespace NintendoSwitch{
Expand Down Expand Up @@ -69,7 +69,9 @@ std::array<ImageFloatBox, 10> ItemPrinterMaterialDetector::Material_Boxes(ImageF
for (size_t i = 0; i < 10; i++){
double y = initial_y + i*y_spacing;
material_boxes[i] = ImageFloatBox(x, y, width, height);
// std::cout << "{" << x << "," << y << "," << width << "," << height << "}, ";
}
// std::cout << std::endl;
return material_boxes;
}

Expand Down Expand Up @@ -98,9 +100,30 @@ int16_t ItemPrinterMaterialDetector::read_number(

int16_t number;
if (is_dark_text_light_background){
number = (int16_t)OCR::read_number_waterfill(logger, cropped, 0xff000000, 0xff808080);
const std::vector<std::pair<uint32_t, uint32_t>> filters = {
{0xff000000, 0xff909090},
{0xff000000, 0xff808080},
{0xff000000, 0xff707070},
{0xff000000, 0xff606060},
{0xff000000, 0xff505050},
{0xff000000, 0xff404040},
{0xff000000, 0xff303030},
{0xff000000, 0xff202020},
{0xff000000, 0xff101010},
};
number = (int16_t)OCR::read_number_waterfill(logger, cropped, filters, 24);
}else{
number = (int16_t)OCR::read_number_waterfill(logger, cropped, 0xff808080, 0xffffffff);
const std::vector<std::pair<uint32_t, uint32_t>> filters = {
// {0xff808080, 0xffffffff},
{0xff909090, 0xffffffff},
{0xffa0a0a0, 0xffffffff},
{0xffb0b0b0, 0xffffffff},
{0xffc0c0c0, 0xffffffff},
{0xffd0d0d0, 0xffffffff},
{0xffe0e0e0, 0xffffffff},
{0xfff0f0f0, 0xffffffff},
};
number = (int16_t)OCR::read_number_waterfill(logger, cropped, filters, 24);
}

if (number < 1 || number > 999){
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,6 @@ class ItemPrinterMaterialDetector{
int8_t row_index
) const;


private:
int16_t read_number(
Logger& logger, AsyncDispatcher& dispatcher,
const ImageViewRGB32& screen, const ImageFloatBox& box
Expand All @@ -85,9 +83,11 @@ class ItemPrinterMaterialDetector{
private:
Color m_color;
Language m_language;
std::array<ImageFloatBox, 10> m_box_mat_value;
std::array<ImageFloatBox, 10> m_box_mat_quantity;
std::array<ImageFloatBox, 10> m_box_mat_name;
std::array<ImageFloatBox, 10> m_box_mat_value; // {0.39,0.176758,0.025,0.05}, {0.39,0.250977,0.025,0.05}, {0.39,0.325196,0.025,0.05}, {0.39,0.399415,0.025,0.05}, {0.39,0.473634,0.025,0.05}, {0.39,0.547853,0.025,0.05}, {0.39,0.622072,0.025,0.05}, {0.39,0.696291,0.025,0.05}, {0.39,0.77051,0.025,0.05}, {0.39,0.844729,0.025,0.05},
std::array<ImageFloatBox, 10> m_box_mat_quantity; // {0.485,0.176758,0.037,0.05}, {0.485,0.250977,0.037,0.05}, {0.485,0.325196,0.037,0.05}, {0.485,0.399415,0.037,0.05}, {0.485,0.473634,0.037,0.05}, {0.485,0.547853,0.037,0.05}, {0.485,0.622072,0.037,0.05}, {0.485,0.696291,0.037,0.05}, {0.485,0.77051,0.037,0.05}, {0.485,0.844729,0.037,0.05},
std::array<ImageFloatBox, 10> m_box_mat_name; // {0.09,0.176758,0.275,0.05}, {0.09,0.250977,0.275,0.05}, {0.09,0.325196,0.275,0.05}, {0.09,0.399415,0.275,0.05}, {0.09,0.473634,0.275,0.05}, {0.09,0.547853,0.275,0.05}, {0.09,0.622072,0.275,0.05}, {0.09,0.696291,0.275,0.05}, {0.09,0.77051,0.275,0.05}, {0.09,0.844729,0.275,0.05},


};


Expand Down