Skip to content

Commit 90598b3

Browse files
author
Gin
committed
add image folder info
1 parent d0e8ba8 commit 90598b3

10 files changed

+256
-111
lines changed

SerialPrograms/CMakeLists.txt

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -888,14 +888,15 @@ file(GLOB MAIN_SOURCES
888888
Source/Kernels/Waterfill/Kernels_Waterfill_Session.h
889889
Source/Kernels/Waterfill/Kernels_Waterfill_Session.tpp
890890
Source/Kernels/Waterfill/Kernels_Waterfill_Types.h
891-
Source/ML/DataLabeling/SegmentAnythingModel.cpp
892-
Source/ML/DataLabeling/SegmentAnythingModel.h
891+
Source/ML/DataLabeling/ML_AnnotationIO.cpp
892+
Source/ML/DataLabeling/ML_AnnotationIO.h
893+
Source/ML/DataLabeling/ML_SegmentAnythingModel.cpp
894+
Source/ML/DataLabeling/ML_SegmentAnythingModel.h
895+
Source/ML/DataLabeling/ML_SegmentAnythingModelConstants.h
893896
Source/ML/ML_Panels.cpp
894897
Source/ML/ML_Panels.h
895898
Source/ML/Programs/ML_LabelImages.cpp
896899
Source/ML/Programs/ML_LabelImages.h
897-
Source/ML/UI/ML_ImageAnnotationSourceSelectorWidget.cpp
898-
Source/ML/UI/ML_ImageAnnotationSourceSelectorWidget.h
899900
Source/ML/UI/ML_ImageAnnotationCommandRow.cpp
900901
Source/ML/UI/ML_ImageAnnotationCommandRow.h
901902
Source/ML/UI/ML_ImageAnnotationDisplayOption.cpp
@@ -904,6 +905,8 @@ file(GLOB MAIN_SOURCES
904905
Source/ML/UI/ML_ImageAnnotationDisplaySession.h
905906
Source/ML/UI/ML_ImageAnnotationDisplayWidget.cpp
906907
Source/ML/UI/ML_ImageAnnotationDisplayWidget.h
908+
Source/ML/UI/ML_ImageAnnotationSourceSelectorWidget.cpp
909+
Source/ML/UI/ML_ImageAnnotationSourceSelectorWidget.h
907910
Source/NintendoSwitch/Commands/NintendoSwitch_Commands_PushButtons.cpp
908911
Source/NintendoSwitch/Commands/NintendoSwitch_Commands_PushButtons.h
909912
Source/NintendoSwitch/Commands/NintendoSwitch_Commands_Routines.cpp
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/* ML Annotation IO
2+
*
3+
* From: https://github.com/PokemonAutomation/
4+
*
5+
* Functions for IO of annotation related files
6+
*/
7+
8+
#include <fstream>
9+
#include <iostream>
10+
#include <QDirIterator>
11+
#include <QDir>
12+
13+
#include "ML_AnnotationIO.h"
14+
#include "ML_SegmentAnythingModelConstants.h"
15+
16+
namespace PokemonAutomation{
17+
namespace ML{
18+
19+
// save the image embedding as a file with path <image_filepath>.embedding
20+
void save_image_embedding_to_disk(const std::string& image_filepath, const std::vector<float>& embedding){
21+
const std::string embedding_path = image_filepath + ".embedding";
22+
std::ofstream fout(embedding_path, std::ios::binary);
23+
// write embedding shape
24+
fout.write(reinterpret_cast<const char*>(&SAM_EMBEDDER_OUTPUT_N_CHANNELS), sizeof(SAM_EMBEDDER_OUTPUT_N_CHANNELS));
25+
fout.write(reinterpret_cast<const char*>(&SAM_EMBEDDER_OUTPUT_IMAGE_SIZE), sizeof(SAM_EMBEDDER_OUTPUT_IMAGE_SIZE));
26+
fout.write(reinterpret_cast<const char*>(&SAM_EMBEDDER_OUTPUT_IMAGE_SIZE), sizeof(SAM_EMBEDDER_OUTPUT_IMAGE_SIZE));
27+
fout.write(reinterpret_cast<const char*>(embedding.data()), sizeof(float) * embedding.size());
28+
fout.close();
29+
std::cout << "Saved image embedding as " << embedding_path << std::endl;
30+
}
31+
32+
33+
bool load_image_embedding(const std::string& image_filepath, std::vector<float>& image_embedding){
34+
std::string emebdding_path = image_filepath + ".embedding";
35+
std::ifstream fin(emebdding_path, std::ios::binary);
36+
if (!fin.is_open()){
37+
std::cout << "No embedding for image " << image_filepath << std::endl;
38+
return false;
39+
}
40+
41+
int embedding_n_channels = 0, embedding_height = 0, emebedding_width = 0;
42+
fin.read(reinterpret_cast<char*>(&embedding_n_channels), sizeof(int));
43+
fin.read(reinterpret_cast<char*>(&embedding_height), sizeof(int));
44+
fin.read(reinterpret_cast<char*>(&emebedding_width), sizeof(int));
45+
46+
std::cout << "Image embedding shape [" << embedding_n_channels << ", " << embedding_height
47+
<< ", " << emebedding_width << "]" << std::endl;
48+
if (embedding_n_channels <= 0 || embedding_height <= 0 || emebedding_width <= 0){
49+
std::string err_msg = "Image embedding wrong dimension from " + emebdding_path;
50+
std::cerr << err_msg << std::endl;
51+
throw std::runtime_error(err_msg);
52+
}
53+
54+
const int size = embedding_n_channels * embedding_height * emebedding_width;
55+
image_embedding.resize(size);
56+
fin.read(reinterpret_cast<char*>(image_embedding.data()), sizeof(float) * size);
57+
std::cout << "Loaded image embedding from " << emebdding_path << std::endl;
58+
return true;
59+
}
60+
61+
62+
std::vector<std::string> find_images_in_folder(const std::string& folder_path, bool recursive){
63+
QDir image_dir(folder_path.c_str());
64+
if (!image_dir.exists()){
65+
std::cerr << "Error: input image folder path " << folder_path << " does not exist." << std::endl;
66+
return {};
67+
}
68+
69+
auto flag = recursive ? QDirIterator::Subdirectories : QDirIterator::NoIteratorFlags;
70+
QDirIterator image_file_iter(image_dir.absolutePath(), {"*.png", "*.jpg", "*.jpeg"}, QDir::Files, flag);
71+
std::vector<std::string> all_image_paths;
72+
while (image_file_iter.hasNext()){
73+
all_image_paths.emplace_back(image_file_iter.next().toStdString());
74+
}
75+
std::cout << "Found " << all_image_paths.size() << " images " << (recursive ? "recursively " : "") <<
76+
"in folder " << folder_path << std::endl;
77+
return all_image_paths;
78+
}
79+
80+
81+
}
82+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/* ML Annotation IO
2+
*
3+
* From: https://github.com/PokemonAutomation/
4+
*
5+
* Functions for IO of annotation related files
6+
*/
7+
8+
#ifndef PokemonAutomation_ML_AnnotationIO_H
9+
#define PokemonAutomation_ML_AnnotationIO_H
10+
11+
#include <vector>
12+
#include <string>
13+
14+
namespace PokemonAutomation{
15+
namespace ML{
16+
17+
// Load pre-computed image embedding from disk
18+
// Return true if there is the embedding file.
19+
// The embedding is stored in a file in the same folder as the image, having the same name but with a suffix ".embedding".
20+
bool load_image_embedding(const std::string& image_filepath, std::vector<float>& image_embedding);
21+
22+
// Save the image embedding as a file with path <image_filepath>.embedding.
23+
void save_image_embedding_to_disk(const std::string& image_filepath, const std::vector<float>& embedding);
24+
25+
// Find image paths stored in a folder. The search can be recursive into child folders or not.
26+
std::vector<std::string> find_images_in_folder(const std::string& folder_path, bool recursive);
27+
28+
}
29+
}
30+
31+
#endif

SerialPrograms/Source/ML/DataLabeling/SegmentAnythingModel.cpp renamed to SerialPrograms/Source/ML/DataLabeling/ML_SegmentAnythingModel.cpp

Lines changed: 7 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -14,28 +14,13 @@
1414
#include <opencv2/imgcodecs.hpp>
1515
#include <opencv2/imgproc.hpp>
1616
#include "3rdParty/ONNX/OnnxToolsPA.h"
17-
#include "SegmentAnythingModel.h"
17+
#include "ML_SegmentAnythingModelConstants.h"
18+
#include "ML_SegmentAnythingModel.h"
19+
#include "ML_AnnotationIO.h"
1820

1921
namespace PokemonAutomation{
2022
namespace ML{
2123

22-
23-
24-
25-
const int SAM_EMBEDDER_INPUT_IMAGE_WIDTH = 1024;
26-
const int SAM_EMBEDDER_INPUT_IMAGE_HEIGHT = 576;
27-
const int SAM_EMBEDDER_OUTPUT_N_CHANNELS = 256;
28-
const int SAM_EMBEDDER_OUTPUT_IMAGE_SIZE = 64;
29-
30-
const int SAM_EMBEDDER_INPUT_SIZE = SAM_EMBEDDER_INPUT_IMAGE_HEIGHT * SAM_EMBEDDER_INPUT_IMAGE_WIDTH * 3;
31-
const int SAM_EMBEDDER_OUTPUT_SIZE = SAM_EMBEDDER_OUTPUT_N_CHANNELS * SAM_EMBEDDER_OUTPUT_IMAGE_SIZE * SAM_EMBEDDER_OUTPUT_IMAGE_SIZE;
32-
33-
const int SAM_N_INPUT_TENSORS = 6;
34-
const int SAM_N_OUTPUT_TENSORS = 3;
35-
const int SAM_LOW_RES_MASK_SIZE = 256;
36-
const float SAM_OUTPUT_MASK_THRESHOLD = 0.0;
37-
38-
3924
Ort::SessionOptions create_session_option(){
4025
return Ort::SessionOptions{};
4126

@@ -207,63 +192,13 @@ void SAMSession::run(
207192
}
208193

209194

210-
// save the image embedding as a file with path <image_filepath>.embedding
211-
void save_image_embedding_to_disk(const std::string& image_filepath, const std::vector<float>& embedding){
212-
const std::string embedding_path = image_filepath + ".embedding";
213-
std::ofstream fout(embedding_path, std::ios::binary);
214-
// write embedding shape
215-
fout.write(reinterpret_cast<const char*>(&SAM_EMBEDDER_OUTPUT_N_CHANNELS), sizeof(SAM_EMBEDDER_OUTPUT_N_CHANNELS));
216-
fout.write(reinterpret_cast<const char*>(&SAM_EMBEDDER_OUTPUT_IMAGE_SIZE), sizeof(SAM_EMBEDDER_OUTPUT_IMAGE_SIZE));
217-
fout.write(reinterpret_cast<const char*>(&SAM_EMBEDDER_OUTPUT_IMAGE_SIZE), sizeof(SAM_EMBEDDER_OUTPUT_IMAGE_SIZE));
218-
fout.write(reinterpret_cast<const char*>(embedding.data()), sizeof(float) * embedding.size());
219-
fout.close();
220-
std::cout << "Saved image embedding as " << embedding_path << std::endl;
221-
}
222-
223-
224-
bool load_image_embedding(const std::string& image_filepath, std::vector<float>& image_embedding){
225-
std::string emebdding_path = image_filepath + ".embedding";
226-
std::ifstream fin(emebdding_path, std::ios::binary);
227-
if (!fin.is_open()){
228-
std::cout << "No embedding for image " << image_filepath << std::endl;
229-
return false;
230-
}
231-
232-
int embedding_n_channels = 0, embedding_height = 0, emebedding_width = 0;
233-
fin.read(reinterpret_cast<char*>(&embedding_n_channels), sizeof(int));
234-
fin.read(reinterpret_cast<char*>(&embedding_height), sizeof(int));
235-
fin.read(reinterpret_cast<char*>(&emebedding_width), sizeof(int));
236-
237-
std::cout << "Image embedding shape [" << embedding_n_channels << ", " << embedding_height
238-
<< ", " << emebedding_width << "]" << std::endl;
239-
if (embedding_n_channels <= 0 || embedding_height <= 0 || emebedding_width <= 0){
240-
std::string err_msg = "Image embedding wrong dimension from " + emebdding_path;
241-
std::cerr << err_msg << std::endl;
242-
throw std::runtime_error(err_msg);
243-
}
244-
245-
const int size = embedding_n_channels * embedding_height * emebedding_width;
246-
image_embedding.resize(size);
247-
fin.read(reinterpret_cast<char*>(image_embedding.data()), sizeof(float) * size);
248-
std::cout << "Loaded image embedding from " << emebdding_path << std::endl;
249-
return true;
250-
}
251-
252-
253195
void compute_embeddings_for_folder(const std::string& embedding_model_path, const std::string& image_folder_path){
254-
QDir image_dir(image_folder_path.c_str());
255-
if (!image_dir.exists()){
256-
std::cerr << "Error: input image folder path " << image_folder_path << " does not exist." << std::endl;
196+
const bool recursive_search = true;
197+
std::vector<std::string> all_image_paths = find_images_in_folder(image_folder_path, recursive_search);
198+
if (all_image_paths.size() == 0){
257199
return;
258200
}
259-
260-
QDirIterator image_file_iter(image_dir.absolutePath(), {"*.png", "*.jpg", "*.jpeg"}, QDir::Files, QDirIterator::Subdirectories);
261-
std::vector<std::string> all_image_paths;
262-
while (image_file_iter.hasNext()){
263-
all_image_paths.emplace_back(image_file_iter.next().toStdString());
264-
}
265-
std::cout << "Found " << all_image_paths.size() << " images recursively in folder " << image_folder_path << std::endl;
266-
201+
267202
SAMEmbedderSession embedding_session(embedding_model_path);
268203
std::vector<float> output_image_embedding;
269204
for (size_t i = 0; i < all_image_paths.size(); i++){

SerialPrograms/Source/ML/DataLabeling/SegmentAnythingModel.h renamed to SerialPrograms/Source/ML/DataLabeling/ML_SegmentAnythingModel.h

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
* Run Segment Anything Model (SAM) to segment objects on images
66
*/
77

8-
#ifndef PokemonAutomation_ML_SEGMENTANYTHINGMODEL_H
9-
#define PokemonAutomation_ML_SEGMENTANYTHINGMODEL_H
8+
#ifndef PokemonAutomation_ML_SegmentAnythingModel_H
9+
#define PokemonAutomation_ML_SegmentAnythingModel_H
1010

1111

1212
#include <string>
@@ -21,14 +21,6 @@ namespace PokemonAutomation{
2121
namespace ML{
2222

2323

24-
// Load pre-computed image embedding from disk
25-
// Return true if there is the embedding file.
26-
// The embedding is stored in a file in the same folder as the image, having the same name but with a suffix ".embedding".
27-
bool load_image_embedding(const std::string& image_filepath, std::vector<float>& image_embedding);
28-
29-
// Save the image embedding as a file with path <image_filepath>.embedding.
30-
void save_image_embedding_to_disk(const std::string& image_filepath, const std::vector<float>& embedding);
31-
3224
// Compute embeddings for all images in a folder. Only support .png, .jpg and .jpeg filename extensions so far.
3325
// This can be very slow!
3426
void compute_embeddings_for_folder(const std::string& embedding_model_path, const std::string& image_folder_path);
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/* ML Segment Anything Model Constants
2+
*
3+
* From: https://github.com/PokemonAutomation/
4+
*
5+
* Constants for Segment Anything Model (SAM) to segment objects on images
6+
*/
7+
8+
#ifndef PokemonAutomation_ML_SegmentAnythingModelConstants_H
9+
#define PokemonAutomation_ML_SegmentAnythingModelConstants_H
10+
11+
12+
const int SAM_EMBEDDER_INPUT_IMAGE_WIDTH = 1024;
13+
const int SAM_EMBEDDER_INPUT_IMAGE_HEIGHT = 576;
14+
const int SAM_EMBEDDER_OUTPUT_N_CHANNELS = 256;
15+
const int SAM_EMBEDDER_OUTPUT_IMAGE_SIZE = 64;
16+
17+
const int SAM_EMBEDDER_INPUT_SIZE = SAM_EMBEDDER_INPUT_IMAGE_HEIGHT * SAM_EMBEDDER_INPUT_IMAGE_WIDTH * 3;
18+
const int SAM_EMBEDDER_OUTPUT_SIZE = SAM_EMBEDDER_OUTPUT_N_CHANNELS * SAM_EMBEDDER_OUTPUT_IMAGE_SIZE * SAM_EMBEDDER_OUTPUT_IMAGE_SIZE;
19+
20+
const int SAM_N_INPUT_TENSORS = 6;
21+
const int SAM_N_OUTPUT_TENSORS = 3;
22+
const int SAM_LOW_RES_MASK_SIZE = 256;
23+
const float SAM_OUTPUT_MASK_THRESHOLD = 0.0;
24+
25+
26+
27+
#endif

SerialPrograms/Source/ML/Programs/ML_LabelImages.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@
3535
#include "ML_LabelImages.h"
3636
#include "Pokemon/Pokemon_Strings.h"
3737
#include "Common/Qt/Options/ConfigWidget.h"
38-
#include "ML/DataLabeling/SegmentAnythingModel.h"
38+
#include "ML/DataLabeling/ML_SegmentAnythingModel.h"
39+
#include "ML/DataLabeling/ML_AnnotationIO.h"
3940

4041

4142

@@ -244,7 +245,7 @@ void LabelImages::save_annotation_to_file() const{
244245
}
245246

246247
void LabelImages::clear_for_new_image(){
247-
source_image_height = source_image_height = 0;
248+
source_image_width = source_image_height = 0;
248249
m_image_embedding.clear();
249250
m_output_boolean_mask.clear();
250251
m_mask_image = ImageRGB32();

SerialPrograms/Source/ML/Programs/ML_LabelImages.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#ifndef PokemonAutomation_ML_LabelImages_H
88
#define PokemonAutomation_ML_LabelImages_H
99

10+
#include <QGraphicsScene>
1011
#include "Common/Cpp/Options/BatchOption.h"
1112
#include "Common/Cpp/Options/FloatingPointOption.h"
1213
#include "CommonFramework/Panels/PanelInstance.h"
@@ -18,8 +19,8 @@
1819
#include "NintendoSwitch/Framework/NintendoSwitch_SwitchSystemOption.h"
1920
#include "NintendoSwitch/Framework/NintendoSwitch_SwitchSystemSession.h"
2021
#include "CommonFramework/VideoPipeline/VideoOverlayScopes.h"
21-
#include <QGraphicsScene>
22-
#include "ML/DataLabeling/SegmentAnythingModel.h"
22+
23+
#include "ML/DataLabeling/ML_SegmentAnythingModel.h"
2324
#include "ML/UI/ML_ImageAnnotationDisplayOption.h"
2425
#include "ML/UI/ML_ImageAnnotationDisplaySession.h"
2526

0 commit comments

Comments
 (0)