From 5f3237e9946fc4749dc6b94b81c33474cf01f86f Mon Sep 17 00:00:00 2001 From: Bengt Ljungquist Date: Wed, 14 May 2025 12:50:29 -0400 Subject: [PATCH 1/3] Corrected recursive matching --- src/filepattern/cpp/internal/filepattern.cpp | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/filepattern/cpp/internal/filepattern.cpp b/src/filepattern/cpp/internal/filepattern.cpp index d299042..2255039 100644 --- a/src/filepattern/cpp/internal/filepattern.cpp +++ b/src/filepattern/cpp/internal/filepattern.cpp @@ -100,8 +100,6 @@ void FilePatternObject::matchFilesMultDir(){ std::smatch sm; std::string file, file_path; - bool is_pushed = false; - // Iterate over directories and subdirectories for (const auto& entry : this->recursive_iterator_) { @@ -123,18 +121,12 @@ void FilePatternObject::matchFilesMultDir(){ tup = getVariableMapMultDir(file_path, sm); } - if(std::get<0>(tup).size() > 0){ - this->valid_files_.push_back(tup); - is_pushed = true; - } else { - is_pushed = false; + // If the path vector in the tuple is not empty, it's a valid match + if(!std::get<1>(tup).empty()){ + this->valid_files_.push_back(tup); } } } - - if (!is_pushed && std::get<1>(tup).size() > 0) { - this->valid_files_.push_back(tup); - } } void FilePatternObject::matchFiles() { From 0ef61af3409be40f8a3449e281700c9d83bf18b8 Mon Sep 17 00:00:00 2001 From: Bengt Ljungquist Date: Wed, 14 May 2025 15:08:15 -0400 Subject: [PATCH 2/3] Added unit test, corrected formatting --- tests/test_filepattern.py | 87 ++++++++++++------------- tests/test_generate_filepattern_data.py | 55 +++++++++++++++- 2 files changed, 96 insertions(+), 46 deletions(-) diff --git a/tests/test_filepattern.py b/tests/test_filepattern.py index b9a8b1c..f21f53f 100644 --- a/tests/test_filepattern.py +++ b/tests/test_filepattern.py @@ -5,36 +5,30 @@ import test_generate_filepattern_data import test_filepattern_data as fp_data + class TestFilePatternFunctions(): - + def test_get_regex(self): - + # Test with a pattern that includes a single variable pattern = 'img_{row:c}{col:dd}f{f:dd}d{channel:d}.tif' - - regex_pattern = fp.get_regex(pattern) - + regex_pattern = fp.get_regex(pattern) assert regex_pattern == 'img_([a-zA-Z])([0-9][0-9])f([0-9][0-9])d([0-9]).tif' - - def test_get_variables(self): + def test_get_variables(self): + # Test with a pattern that includes a single variable pattern = 'img_r{r:ddd}_c{c:ddd}.tif' - variables = fp.get_variables(pattern) - assert (variables == ['r', 'c'] or variables == ['c', 'r']) + class TestArrayPattern(): test_generate_filepattern_data.generate_text_data() - root_directory = os.path.dirname(os.path.realpath(__file__)) - filepath = root_directory + '/test_data/data100.txt' - old_pattern = 'img_r{rrr}_c{ccc}.tif' - - patterns = ['img_r{r:ddd}_c{c:ddd}.tif', 'img_r{r:d+}_c{c:d+}.tif', old_pattern] - + patterns = ['img_r{r:ddd}_c{c:ddd}.tif', 'img_r{r:d+}_c{c:d+}.tif', + old_pattern] MAX_NUM = 9 with open(filepath, 'r') as file: @@ -273,18 +267,14 @@ def test_no_sorting(self): for i in range(len(data)): assert str(results[i][1][0]) == data[i] + class TestFilePattern(): root_directory = os.path.dirname(os.path.realpath(__file__)) - path = root_directory + '/test_data/data100' - sorted_path = root_directory + '/test_data/sorted_data' - old_pattern = 'img_r{rrr}_c{ccc}.tif' - patterns = ['img_r00{r:d}_c{c:ddd}.tif', 'img_r{r:d+}_c{c:d+}.tif', old_pattern] - MAX_NUM = 9 test_generate_filepattern_data.generate_data() @@ -293,6 +283,7 @@ class TestFilePattern(): test_generate_filepattern_data.generate_bracket_data() test_generate_filepattern_data.generate_channel_data_sc() test_generate_filepattern_data.generate_channel_data_spaces() + test_generate_filepattern_data.generate_recursive_no_capture_data() # Added new generator call def test_file_pattern(self): @@ -501,11 +492,11 @@ def test_group_by_all_pydantic(self): assert fp_data.test_fp[i][0]["r"] == result[i].r assert fp_data.test_fp[i][0]["c"] == result[i].c assert os.path.basename(fp_data.test_fp[i][1][0]) == os.path.basename(result[i].path[0]) - + def test_named_group_direcotry(self): - + path = self.root_directory + '/test_data/recursive_data' - + path += '/(?P[a-zA-Z]+)/img_r{r:ddd}_c{c:ddd}.tif' for pattern in self.patterns: @@ -532,13 +523,11 @@ def test_named_group_direcotry(self): basename = os.path.basename(mapping[1][0]) for filepath in mapping[1]: assert basename == os.path.basename(filepath) - + def test_recursive_directory_fp(self): - + path = self.root_directory + '/test_data/recursive_data' - filepattern = '/{directory:c+}/img_r{r:ddd}_c{c:ddd}.tif' - files = fp.FilePattern(path, filepattern, recursive=True) result = [] @@ -556,13 +545,11 @@ def test_recursive_directory_fp(self): assert fp_data.test_recursive_directory_fp[i][0]["c"] == result[i][0]["c"] assert fp_data.test_recursive_directory_fp[i][0]["directory"] == result[i][0]["directory"] assert str(os.path.basename(fp_data.test_recursive_directory_fp[i][1][0])) == os.path.basename(result[i][1][0]) - + def test_recursive_directory_regex_fp(self): - + # Test that recursive matching with a regex pattern path = self.root_directory + '/test_data/recursive_data' - filepattern = '/(?P[a-zA-Z]+)/img_r{r:ddd}_c{c:ddd}.tif' - files = fp.FilePattern(path, filepattern, recursive=True) result = [] @@ -580,13 +567,11 @@ def test_recursive_directory_regex_fp(self): assert fp_data.test_recursive_directory_fp[i][0]["c"] == result[i][0]["c"] assert fp_data.test_recursive_directory_fp[i][0]["directory"] == result[i][0]["directory"] assert str(os.path.basename(fp_data.test_recursive_directory_fp[i][1][0])) == os.path.basename(result[i][1][0]) - + def test_recursive_directory_regex_special_character_fp(self): - + # Test that recursive matching with a regex pattern path = self.root_directory + '/test_data/recursive_data_sc' - filepattern = '/(?P.*)/img_r{r:ddd}_c{c:ddd}.tif' - files = fp.FilePattern(path, filepattern, recursive=True) result = [] @@ -606,11 +591,9 @@ def test_recursive_directory_regex_special_character_fp(self): assert str(os.path.basename(fp_data.test_recursive_directory_fp[i][1][0])) == os.path.basename(result[i][1][0]) def test_recursive_directory_spaces_fp(self): - + path = self.root_directory + '/test_data/recursive_data_spaces/' - filepattern = 'img_r{r:ddd}_c{c:ddd}.tif' - files = fp.FilePattern(path, filepattern, recursive=True) result = [] @@ -631,11 +614,10 @@ def test_recursive_directory_spaces_fp(self): assert str(os.path.basename(fp_data.test_recursive_space[i][1][0])) == os.path.basename(result[i][1][0]) def test_recursive_multi_directory_regex_fp(self): - + # Test that recursive matching with a regex pattern + path = self.root_directory + '/test_data' - filepattern = '/.*/{directory:c+}/img_r{r:ddd}_c{c:ddd}.tif' - files = fp.FilePattern(path, filepattern, recursive=True) result = [] @@ -654,6 +636,22 @@ def test_recursive_multi_directory_regex_fp(self): assert fp_data.test_recursive_directory_fp[i][0]["directory"] == result[i][0]["directory"] assert str(os.path.basename(fp_data.test_recursive_directory_fp[i][1][0])) == os.path.basename(result[i][1][0]) + def test_recursive_no_capture_group_returns_all_files(self): + # Test that recursive matching with a non-capturing pattern + # returns all matching files across subdirectories. + path_to_test_dir = os.path.join(self.root_directory, 'test_data', 'recursive_no_capture_data') + pattern_no_capture = '.*.tmp' # Match all .tmp files + expected_file_count = 12 # 5 in subdir1 + 7 in subdir2 + + files_fp = fp.FilePattern(path_to_test_dir, pattern_no_capture, recursive=True) + + # Using len() as it seems to be the standard way to get count in existing tests + actual_file_count = len(files_fp) + + assert actual_file_count == expected_file_count, \ + f"Expected {expected_file_count} '.tmp' files, but found {actual_file_count} " \ + f"using recursive non-capturing pattern in {path_to_test_dir}." + def test_file_pattern_iter(self): for pattern in self.patterns: @@ -674,9 +672,8 @@ def test_file_pattern_iter(self): assert fp_data.test_fp[i][0]["c"] == result[i][0]["c"] assert os.path.basename(fp_data.test_fp[i][1][0]) == os.path.basename(result[i][1][0]) - # test that numeric only, double digit numbers are sorted properly def test_file_pattern_sorting(self): - + # test that numeric only, double digit numbers are sorted properly sorted_pattern = '{index:d+}.tif' files = fp.FilePattern(self.sorted_path, sorted_pattern) @@ -687,6 +684,7 @@ def test_file_pattern_sorting(self): assert sorted(indices) == indices def test_file_pattern_brackets(self): + # test that numeric only, double digit numbers are sorted properly bracket_path = self.root_directory + '/test_data/bracket_data/' @@ -703,7 +701,8 @@ def test_file_pattern_brackets(self): for i in range(len(result)): result[i][0]['c'] == i - os.path.basename(result[i][1][0]) == f'x(0-31)_y(01-48)_c{i}.ome.tif' + os.path.basename( + result[i][1][0]) == f'x(0-31)_y(01-48)_c{i}.ome.tif' # Todo: These tests need new data to be added after replacing the old version of filepattern. diff --git a/tests/test_generate_filepattern_data.py b/tests/test_generate_filepattern_data.py index 20e7b80..ae37ba7 100644 --- a/tests/test_generate_filepattern_data.py +++ b/tests/test_generate_filepattern_data.py @@ -2,10 +2,12 @@ import math import os + directory = 'test_data' root_directory = os.path.dirname(os.path.realpath(__file__)) path = os.path.join(root_directory, directory) + def generate_data(): MAX = 100 length = 0 @@ -100,6 +102,7 @@ def generate_channel_data(): print("Files generated.") + def generate_channel_data_sc(): MAX = 3 @@ -153,6 +156,7 @@ def generate_channel_data_sc(): print("Files generated.") + def generate_channel_data_spaces(): MAX = 3 @@ -206,6 +210,7 @@ def generate_channel_data_spaces(): print("Files generated.") + def generate_sorted_data(): MAX = 30 length = 0 @@ -235,6 +240,7 @@ def generate_sorted_data(): print(str(length) + " files generated.") + def generate_text_data(): output_file = path + '/data100.txt' print(output_file) @@ -244,6 +250,7 @@ def generate_text_data(): filename = f"img_r{r:03}_c{c:03}.tif" file.write(filename + "\n") + def generate_bracket_data(): directory = 'test_data' root_directory = os.path.dirname(os.path.realpath(__file__)) @@ -271,14 +278,58 @@ def generate_bracket_data(): f2.close() +def generate_recursive_no_capture_data(): + base_data_dir_name = 'recursive_no_capture_data' + subdir1_name = 'subdir1' + subdir2_name = 'subdir2' + num_files_subdir1 = 5 + num_files_subdir2 = 7 + file_extension = '.tmp' + + recursive_base_path = os.path.join(path, base_data_dir_name) + subdir1_path = os.path.join(recursive_base_path, subdir1_name) + subdir2_path = os.path.join(recursive_base_path, subdir2_name) + + try: + os.makedirs(recursive_base_path, exist_ok=True) + os.makedirs(subdir1_path, exist_ok=True) + os.makedirs(subdir2_path, exist_ok=True) + print(f"Created directory structure at {recursive_base_path}") + except OSError as e: + print(f"Error creating directories: {e}") + return # Stop if directories can't be created + + # Create files in subdir1 + for i in range(num_files_subdir1): + file_name = f"file{i+1}{file_extension}" + try: + with open(os.path.join(subdir1_path, file_name), 'w'): + pass # Create empty file + except IOError as e: + print(f"Error creating file {os.path.join(subdir1_path, file_name)}: {e}") + + # Create files in subdir2 + for i in range(num_files_subdir2): + file_name = f"item{chr(ord('A')+i)}{file_extension}" # e.g., itemA.tmp, itemB.tmp + try: + with open(os.path.join(subdir2_path, file_name), 'w'): + pass # Create empty file + except IOError as e: + print(f"Error creating file {os.path.join(subdir2_path, file_name)}: {e}") + + total_files = num_files_subdir1 + num_files_subdir2 + print(f"{total_files} files generated in {recursive_base_path}") + + if __name__ == '__main__': generate_data() generate_channel_data() generate_sorted_data() generate_text_data() generate_bracket_data() - generate_channel_data_sc - generate_channel_data_spaces + generate_channel_data_sc() # Corrected call + generate_channel_data_spaces() # Corrected call + generate_recursive_no_capture_data() # Added new generator call MAX = 3 From f7f4eaaa9a13232984440595d77713b41826efd0 Mon Sep 17 00:00:00 2001 From: Sameeul B Samee Date: Thu, 15 May 2025 09:47:25 -0400 Subject: [PATCH 3/3] minor updates --- tests/test_generate_filepattern_data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_generate_filepattern_data.py b/tests/test_generate_filepattern_data.py index ae37ba7..9204830 100644 --- a/tests/test_generate_filepattern_data.py +++ b/tests/test_generate_filepattern_data.py @@ -327,9 +327,9 @@ def generate_recursive_no_capture_data(): generate_sorted_data() generate_text_data() generate_bracket_data() - generate_channel_data_sc() # Corrected call - generate_channel_data_spaces() # Corrected call - generate_recursive_no_capture_data() # Added new generator call + generate_channel_data_sc() + generate_channel_data_spaces() + generate_recursive_no_capture_data() MAX = 3