77
88#include < fstream>
99#include < iostream>
10+ #include < map>
1011#include < QDirIterator>
1112#include < QDir>
13+ #include < QMessageBox>
1214
15+ #include " Common/Cpp/Json/JsonTools.h"
16+ #include " Common/Cpp/Json/JsonArray.h"
17+ #include " Common/Cpp/Json/JsonObject.h"
18+ #include " Common/Cpp/Json/JsonValue.h"
19+ #include " Common/Cpp/StringTools.h"
20+ #include " Common/Cpp/PrettyPrint.h"
1321#include " ML_AnnotationIO.h"
1422#include " ML_SegmentAnythingModelConstants.h"
23+ #include " ML_ObjectAnnotation.h"
24+
25+ namespace fs = std::filesystem;
26+ using std::cout, std::endl;
1527
1628namespace PokemonAutomation {
1729namespace ML {
@@ -77,6 +89,206 @@ std::vector<std::string> find_images_in_folder(const std::string& folder_path, b
7789 return all_image_paths;
7890}
7991
92+ void export_image_annotations_to_yolo_dataset (
93+ const std::string& image_folder_path,
94+ const std::string& annotation_folder_path,
95+ const std::string& yolo_dataset_path
96+ ){
97+ const bool recursive = true ;
98+ const std::vector<std::string>& image_paths = find_images_in_folder (image_folder_path, recursive);
99+ if (image_paths.size () == 0 ){
100+ QMessageBox box;
101+ box.critical (nullptr , " Empty Image Folder" ,
102+ QString::fromStdString (" No images found in " + image_folder_path + " ." ));
103+ return ;
104+ }
105+
106+ // TODO for simplicity we will parse this YAML file. In future we should use a proper YAML library
107+ std::ifstream fin (yolo_dataset_path.c_str ());
108+ if (!fin){
109+ QMessageBox box;
110+ box.critical (nullptr , " Cannot Open YOLO Dataset Config File" ,
111+ QString::fromStdString (" Cannot open " + yolo_dataset_path + " ." ));
112+ return ;
113+ }
114+
115+ std::vector<std::string> label_names;
116+ bool reading_labels = false ;
117+
118+ std::string line;
119+ int line_id = 0 ;
120+ while (std::getline (fin, line)){
121+ line_id++;
122+ // remove "#" comments
123+ size_t pound_idx = line.find_first_of (" #" );
124+ if (pound_idx != std::string::npos){
125+ line = line.substr (0 , pound_idx);
126+ }
127+ line = StringTools::strip (line);
128+ if (line.size () == 0 ){
129+ continue ;
130+ }
131+
132+ // cout << "Line: " << line << endl;
133+ if (line.starts_with (" names:" )){
134+ reading_labels = true ;
135+ line.clear ();
136+ continue ;
137+ }
138+
139+ if (reading_labels){
140+ // cout << "start reading labels" << endl;
141+ size_t colon_idx = line.find_first_of (" :" );
142+ if (colon_idx == std::string::npos){
143+ QMessageBox box;
144+ box.critical (nullptr , " Error Parsing Dataset YAML" ,
145+ QString::fromStdString (" YAML file " + yolo_dataset_path + " line " + std::to_string (line_id) + " contains no colon for labels." ));
146+ return ;
147+ }
148+ std::string number = StringTools::strip (line.substr (0 , colon_idx));
149+ std::string label = StringTools::strip (line.substr (colon_idx+1 ));
150+
151+ // cout << "found number " << number << " label " << label << endl;
152+ size_t num = StringTools::to_size_t (number);
153+ if (num != label_names.size ()){
154+ QMessageBox box;
155+ box.critical (nullptr , " Error Parsing Dataset YAML" ,
156+ QString::fromStdString (" YAML file " + yolo_dataset_path + " line " + std::to_string (line_id) + " has no label index." ));
157+ return ;
158+ }
159+ label_names.push_back (label);
160+ }
161+
162+ line.clear ();
163+ }
164+
165+ std::map<std::string, size_t > label_indices;
166+ cout << " Load dataset labels: " << endl;
167+ for (size_t i = 0 ; i < label_names.size (); i++){
168+ cout << " - " << label_names[i] << endl;
169+ label_indices[label_names[i]] = i;
170+ }
171+
172+
173+ // convert images and annotations into new subfolders in the folder of the dataset config
174+ auto converted_folder_name = " exported-" + now_to_filestring ();
175+
176+ const auto yolo_dataset_config_file = fs::path (yolo_dataset_path);
177+ const fs::path yolo_dataset_folder = yolo_dataset_config_file.parent_path ();
178+ const auto target_folder = yolo_dataset_folder / converted_folder_name;
179+ if (fs::exists (target_folder)){
180+ QMessageBox box;
181+ box.critical (nullptr , " Export Destination Folder Already Exists" ,
182+ QString::fromStdString (" Folder " + target_folder.string () + " already exists." ));
183+ return ;
184+ }
185+ const auto target_image_folder = target_folder / " images" ;
186+ const auto target_label_folder = target_folder / " labels" ;
187+ cout << " Export to image folder: " << target_image_folder << endl;
188+ cout << " Export to label folder: " << target_label_folder << endl;
189+
190+ fs::create_directories (target_image_folder);
191+ fs::create_directories (target_label_folder);
192+
193+ fs::path anno_folder (annotation_folder_path);
194+ for (size_t i = 0 ; i < image_paths.size (); i++){
195+ const auto & image_path = image_paths[i];
196+ const auto image_file = fs::path (image_path);
197+
198+ const std::string anno_filename = image_file.filename ().replace_extension (" .json" ).string ();
199+ fs::path anno_file = anno_folder / anno_filename;
200+ if (!fs::exists (anno_file)){
201+ QMessageBox box;
202+ box.critical (nullptr , " Cannot Find Annotation File" ,
203+ QString::fromStdString (" No annotation for " + image_path + " ." ));
204+ return ;
205+ }
206+
207+ const auto target_image_file = target_image_folder / image_file.filename ();
208+ try {
209+ fs::copy_file (image_file, target_image_file);
210+ } catch (fs::filesystem_error& e)
211+ {
212+ QMessageBox box;
213+ box.critical (nullptr , " Cannot Copy File" ,
214+ QString::fromStdString (
215+ " Cannot copy from " + image_file.string () + " to " + target_image_file.string () +
216+ " . Probably permission issue, source image is broken or target image path already exists due to image folder having same image filenames"
217+ ));
218+ return ;
219+ }
220+
221+ std::string json_content;
222+ const bool anno_loaded = file_to_string (anno_file.string (), json_content);
223+ if (!anno_loaded){
224+ QMessageBox box;
225+ box.warning (nullptr , " Unable to Load Annotation" ,
226+ QString::fromStdString (" Cannot open annotation file " + anno_file.string () + " . Probably wrong permission?" ));
227+ return ;
228+ }
229+
230+ const JsonValue loaded_json = parse_json (json_content);
231+ const JsonObject* json_obj = loaded_json.to_object ();
232+ if (!json_obj){
233+ QMessageBox box;
234+ box.warning (nullptr , " Wrong JSON content" ,
235+ QString::fromStdString (" Wong JSON content in annotation file " + anno_file.string () +
236+ " . Probably older annotataion? Try loading and saving this annotation file." ));
237+ return ;
238+ }
239+
240+ std::vector<std::string> label_file_lines;
241+ try {
242+ const int64_t image_width = json_obj->get_integer_throw (" IMAGE_WIDTH" );
243+ const int64_t image_height = json_obj->get_integer_throw (" IMAGE_HEIGHT" );
244+ const JsonArray& json_array = json_obj->get_array_throw (" ANNOTATION" );
245+ for (size_t i = 0 ; i < json_array.size (); i++){
246+ const ObjectAnnotation anno_obj = ObjectAnnotation::from_json ((json_array)[i]);
247+ const std::string& label = anno_obj.label ;
248+
249+ auto it = label_indices.find (label);
250+ if (it == label_indices.end ()){
251+ if (label.ends_with (" -male" )){
252+ it = label_indices.find (label.substr (0 , label.size ()-5 ));
253+ } else if (label.ends_with (" -female" )){
254+ it = label_indices.find (label.substr (0 , label.size ()-7 ));
255+ }
256+ }
257+ if (it == label_indices.end ()){
258+ continue ; // label not part of the YOLO dataset. Ignored.
259+ }
260+
261+ const size_t label_id = it->second ;
262+
263+ // TODO: once we implement the user checkbox on mask reliability, we should change this line
264+ const auto & box = anno_obj.mask_box ;
265+ const double center_x = (box.min_x + box.max_x ) / (2.0 * image_width);
266+ const double center_y = (box.min_y + box.max_y ) / (2.0 * image_height);
267+ const double width = box.width () / (double )image_width;
268+ const double height = box.height () / (double )image_height;
269+
270+ // each row in the YOLO dataclass label file is: class_index x_center y_center width height
271+ // https://docs.ultralytics.com/yolov5/tutorials/train_custom_data/#12-leverage-models-for-automated-labeling
272+ std::ostringstream os;
273+ os << label_id << " " << center_x << " " << center_y << " " << width << " " << height;
274+ label_file_lines.push_back (os.str ());
275+ }
276+ } catch (JsonParseException& ){
277+ QMessageBox box;
278+ box.warning (nullptr , " Wrong JSON content" ,
279+ QString::fromStdString (" Wong JSON content in annotation file " + anno_file.string () + " ." ));
280+ return ;
281+ }
282+
283+ const auto target_label_file = target_label_folder / image_file.filename ().replace_extension (" .txt" );
284+ std::ofstream fout (target_label_file.string ());
285+ for (const auto & line : label_file_lines){
286+ fout << line << " \n " ;
287+ }
288+ }
289+ cout << " Done exporting " << image_paths.size () << " annotations to YOLOv5 dataset" << endl;
290+ }
291+
80292
81293}
82294}
0 commit comments