From 773dbdb314a6c755a7742151f921eef52bb6a1af Mon Sep 17 00:00:00 2001 From: RyanL2004 Date: Thu, 9 Jan 2025 03:45:41 +0000 Subject: [PATCH 1/3] Add test for *.txt include pattern filtering --- tests/test_query_ingestion.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tests/test_query_ingestion.py b/tests/test_query_ingestion.py index a74e8265..616fd6b1 100644 --- a/tests/test_query_ingestion.py +++ b/tests/test_query_ingestion.py @@ -57,7 +57,26 @@ def test_read_file_content_with_non_notebook(tmp_path: Path): mock_process.assert_not_called() -# TODO: test with include patterns: ['*.txt'] +# Test that when using a ['*.txt'] as include pattern, only .txt files are processed & .py files are excluded +def test_include_txt_pattern(temp_directory: Path, sample_query: dict[str, Any]) -> None: + sample_query["local_path"] = temp_directory + sample_query["include_patterns"] = ["*.txt"] + + result = _scan_directory(temp_directory, query=sample_query) + assert result is not None, "Result should not be None" + + files = _extract_files_content(query=sample_query, node=result, max_file_size=1_000_000) + file_paths = [f["path"] for f in files] + assert len(files) == 5, "Should have found exactly 5 .txt files" + assert all(path.endswith(".txt") for path in file_paths), "Should only include .txt files" + + expected_files = ["file1.txt", "subfile1.txt", "file_subdir.txt", "file_dir1.txt", "file_dir2.txt"] + for expected_file in expected_files: + assert any(expected_file in path for path in file_paths), f"Missing expected file: {expected_file}" + + assert not any(path.endswith(".py") for path in file_paths), "Should not include .py files" + + # TODO: test with wrong include patterns: ['*.qwerty'] From 2d1a4282b57da14fc890ad84d7dc5247069e7b35 Mon Sep 17 00:00:00 2001 From: RyanL2004 Date: Fri, 10 Jan 2025 17:24:20 +0000 Subject: [PATCH 2/3] Add test case for non-exist file extension pattern handling --- tests/test_query_ingestion.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/test_query_ingestion.py b/tests/test_query_ingestion.py index 616fd6b1..65af7bee 100644 --- a/tests/test_query_ingestion.py +++ b/tests/test_query_ingestion.py @@ -77,7 +77,22 @@ def test_include_txt_pattern(temp_directory: Path, sample_query: dict[str, Any]) assert not any(path.endswith(".py") for path in file_paths), "Should not include .py files" -# TODO: test with wrong include patterns: ['*.qwerty'] +def test_include_nonexistent_extension(temp_directory: Path, sample_query: dict[str, Any]) -> None: + sample_query["local_path"] = temp_directory + sample_query["include_patterns"] = ["*.query"] # Is a Non existant extension ? + + result = _scan_directory(temp_directory, query=sample_query) + assert result is not None, "Result should not be None" + + # Extract the files content & set file limit cap + files = _extract_files_content(query=sample_query, node=result, max_file_size=1_000_000) + # Verify no file processed with wrong extension + assert len(files) == 0, "Should not find any files with .qwerty extension" + + assert result["type"] == "directory" + assert result["file_count"] == 0 + assert result["dir_count"] == 0 + assert len(result["children"]) == 0 # single folder patterns From 85be231b19700a956376cb438561877856dd16f3 Mon Sep 17 00:00:00 2001 From: RyanL2004 Date: Sat, 11 Jan 2025 19:05:02 +0000 Subject: [PATCH 3/3] Add test cases for directory pattern matching --- tests/test_query_ingestion.py | 86 +++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 5 deletions(-) diff --git a/tests/test_query_ingestion.py b/tests/test_query_ingestion.py index 616fd6b1..a2db6882 100644 --- a/tests/test_query_ingestion.py +++ b/tests/test_query_ingestion.py @@ -77,14 +77,90 @@ def test_include_txt_pattern(temp_directory: Path, sample_query: dict[str, Any]) assert not any(path.endswith(".py") for path in file_paths), "Should not include .py files" -# TODO: test with wrong include patterns: ['*.qwerty'] +def test_include_nonexistent_extension(temp_directory: Path, sample_query: dict[str, Any]) -> None: + sample_query["local_path"] = temp_directory + sample_query["include_patterns"] = ["*.query"] # Is a Non existant extension ? + + result = _scan_directory(temp_directory, query=sample_query) + assert result is not None, "Result should not be None" + + # Extract the files content & set file limit cap + files = _extract_files_content(query=sample_query, node=result, max_file_size=1_000_000) + # Verify no file processed with wrong extension + assert len(files) == 0, "Should not find any files with .qwerty extension" + + assert result["type"] == "directory" + assert result["file_count"] == 0 + assert result["dir_count"] == 0 + assert len(result["children"]) == 0 # single folder patterns -# TODO: test with include patterns: ['src/*'] -# TODO: test with include patterns: ['/src/*'] -# TODO: test with include patterns: ['/src/'] -# TODO: test with include patterns: ['/src*'] +def test_include_src_star_pattern(temp_directory: Path, sample_query: dict[str, Any]) -> None: + + sample_query["local_path"] = temp_directory + sample_query["include_patterns"] = ["src/*"] # Without leading slash + + result = _scan_directory(temp_directory, query=sample_query) + assert result is not None, "Result should not be None" + + files = _extract_files_content(query=sample_query, node=result, max_file_size=1_000_000) + assert len(files) == 4, "Should find all files under src directory" + + # Normalize paths to use platform-specific separator + file_paths = {str(Path(f["path"])) for f in files} # Using set and Path for normalization + expected_paths = { + str(Path("src/subfile1.txt")), + str(Path("src/subfile2.py")), + str(Path("src/subdir/file_subdir.txt")), + str(Path("src/subdir/file_subdir.py")), + } + assert file_paths == expected_paths, "Missing or unexpected files in result" + + +def test_include_src_recursive(temp_directory: Path, sample_query: dict[str, Any]) -> None: + + sample_query["local_path"] = temp_directory + sample_query["include_patterns"] = ["src/**"] # Use ** for recursive matching + + result = _scan_directory(temp_directory, query=sample_query) + assert result is not None, "Result should not be None" + + files = _extract_files_content(query=sample_query, node=result, max_file_size=1_000_000) + assert len(files) == 4, "Should find all files under src/" + + # Normalize paths to use platform-specific separator + file_paths = {str(Path(f["path"])) for f in files} + expected_paths = { + str(Path("src/subfile1.txt")), + str(Path("src/subfile2.py")), + str(Path("src/subdir/file_subdir.txt")), + str(Path("src/subdir/file_subdir.py")), + } + assert file_paths == expected_paths, "Missing or unexpected files in result" + + +def test_include_src_wildcard_prefix(temp_directory: Path, sample_query: dict[str, Any]) -> None: + + sample_query["local_path"] = temp_directory + sample_query["include_patterns"] = ["src*"] # Without leading slash + + result = _scan_directory(temp_directory, query=sample_query) + assert result is not None, "Result should not be None" + + files = _extract_files_content(query=sample_query, node=result, max_file_size=1_000_000) + assert len(files) == 4, "Should find all files under paths starting with src" + + # Normalize paths to use platform-specific separator + file_paths = {str(Path(f["path"])) for f in files} + expected_paths = { + str(Path("src/subfile1.txt")), + str(Path("src/subfile2.py")), + str(Path("src/subdir/file_subdir.txt")), + str(Path("src/subdir/file_subdir.py")), + } + assert file_paths == expected_paths, "Missing or unexpected files in result" + # multiple patterns # TODO: test with multiple include patterns: ['*.txt', '*.py']