@@ -12,19 +12,51 @@ def _should_include(path: Path, base_path: Path, include_patterns: Set[str]) ->
1212 This function checks whether the relative path of a file or directory matches any of the specified patterns. If a
1313 match is found, it returns `True`, indicating that the file or directory should be included in further processing.
1414
15+ The function handles both recursive (**) and non-recursive patterns differently:
16+ - For non-recursive patterns (e.g. "src/*.py"), files must match the exact pattern depth
17+ - For recursive patterns (e.g. "src/**/*.py"), files can match at any depth under the pattern prefix
18+
19+ Directory matching has special handling:
20+ - For directories, we check if they could contain matching files based on the pattern
21+ - Patterns ending in /* are treated as matching any files in that directory
22+ - For non-recursive patterns, directories must match the exact pattern depth
23+ - For recursive patterns with **, directories are checked against the pattern prefix
24+
1525 Parameters
1626 ----------
1727 path : Path
1828 The absolute path of the file or directory to check.
1929 base_path : Path
2030 The base directory from which the relative path is calculated.
2131 include_patterns : Set[str]
22- A set of patterns to check against the relative path.
32+ A set of patterns to check against the relative path. Patterns can include:
33+ - * to match any characters except /
34+ - ** to match any characters including /
35+ - /* at the end to match any files in a directory
2336
2437 Returns
2538 -------
2639 bool
2740 `True` if the path matches any of the include patterns, `False` otherwise.
41+
42+ Raises
43+ ------
44+ ValueError
45+ If a non-recursive pattern is used and the directory depth exceeds the pattern depth.
46+ This indicates a traversal error since parent directories should have been filtered.
47+
48+ Examples
49+ --------
50+ >>> _should_include(Path("/root/src/file.py"), Path("/root"), include_patterns={"src/*.py"})
51+ True
52+ >>> _should_include(Path("/root/src/nested/file.py"), Path("/root"), include_patterns={"src/**/*.py"})
53+ True
54+ >>> _should_include(Path("/root/src"), Path("/root"), include_patterns={"src/*"})
55+ True # Directory matches as it could contain matching files
56+
57+ # TODO: Fix bug where directories are included in the directory structure output when they should not be,
58+ # e.g. atomics/**/Indexes-Markdown/*.md should not include atomics/Indexes/Attack-Navigator-Layers/
59+ # or atomics/T1003.003/src/ for the repository https://github.com/redcanaryco/atomic-red-team.
2860 """
2961 try :
3062 rel_path = path .relative_to (base_path )
@@ -33,12 +65,40 @@ def _should_include(path: Path, base_path: Path, include_patterns: Set[str]) ->
3365 return False
3466
3567 rel_str = str (rel_path )
36- if path .is_dir ():
37- rel_str += "/"
3868
39- for pattern in include_patterns :
40- if fnmatch (rel_str , pattern ):
41- return True
69+ for pattern in include_patterns - {"" }: # ignore empty pattern
70+ if path .is_dir ():
71+ # For directory traversal, check if the directory is part of the path that leads to matching files
72+ if pattern .endswith ("/*" ):
73+ # If the pattern ends with *, add a trailing * to the pattern to match any files in the directory
74+ pattern += "*"
75+
76+ pattern_parts = pattern .split ("/" )
77+ dir_parts = rel_str .split ("/" )
78+
79+ # For non-recursive patterns (no **), validate directory depth matches pattern depth
80+ # Recursive patterns can match directories at any depth
81+ if all (["**" not in pattern , len (pattern_parts ) > 1 , len (dir_parts ) > len (pattern_parts )]):
82+ raise ValueError (
83+ f"Directory '{ rel_str } ' has { len (dir_parts )} path segments but pattern '{ pattern } ' "
84+ f"only has { len (pattern_parts )} segments. This indicates a traversal error since "
85+ f"parent directories should have been filtered out by pattern matching."
86+ )
87+
88+ relevant_pattern_length = (
89+ min (len (dir_parts ), pattern_parts .index ("**" )) if "**" in pattern_parts else len (dir_parts )
90+ )
91+ pattern_prefix = "/" .join (pattern_parts [:relevant_pattern_length ])
92+
93+ if "**" in pattern_parts :
94+ pattern_prefix += "*"
95+
96+ if fnmatch (rel_str , pattern_prefix ):
97+ return True
98+ else :
99+ if fnmatch (rel_str , pattern ):
100+ return True
101+
42102 return False
43103
44104
@@ -50,6 +110,8 @@ def _should_exclude(path: Path, base_path: Path, ignore_patterns: Set[str]) -> b
50110 any of the specified ignore patterns. If a match is found, it returns `True`, indicating
51111 that the file or directory should be excluded from further processing.
52112
113+ TODO: Check if we need to handle exclude patterns with **, and if so, how.
114+
53115 Parameters
54116 ----------
55117 path : Path
0 commit comments