@@ -63,6 +63,12 @@ def _compile_pattern(pat, sep, case_sensitive):
6363 return re.compile(regex, flags=flags).match
6464
6565
66+ def _select_special(paths, part):
67+ """Yield special literal children of the given paths."""
68+ for path in paths:
69+ yield path._make_child_relpath(part)
70+
71+
6672def _select_children(parent_paths, dir_only, follow_symlinks, match):
6773 """Yield direct children of given paths, filtering by name and type."""
6874 if follow_symlinks is None:
@@ -84,7 +90,7 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match):
8490 except OSError:
8591 continue
8692 if match(entry.name):
87- yield parent_path._make_child_entry(entry, dir_only )
93+ yield parent_path._make_child_entry(entry)
8894
8995
9096def _select_recursive(parent_paths, dir_only, follow_symlinks):
@@ -107,7 +113,7 @@ def _select_recursive(parent_paths, dir_only, follow_symlinks):
107113 for entry in entries:
108114 try:
109115 if entry.is_dir(follow_symlinks=follow_symlinks):
110- paths.append(path._make_child_entry(entry, dir_only ))
116+ paths.append(path._make_child_entry(entry))
111117 continue
112118 except OSError:
113119 pass
@@ -427,6 +433,14 @@ def is_absolute(self):
427433 a drive)."""
428434 return self.pathmod.isabs(self._raw_path)
429435
436+ @property
437+ def _pattern_stack(self):
438+ """Stack of path components, to be used with patterns in glob()."""
439+ anchor, parts = self._stack
440+ if anchor:
441+ raise NotImplementedError("Non-relative patterns are unsupported")
442+ return parts
443+
430444 def match(self, path_pattern, *, case_sensitive=None):
431445 """
432446 Return True if this path matches the given pattern.
@@ -436,11 +450,10 @@ def match(self, path_pattern, *, case_sensitive=None):
436450 if case_sensitive is None:
437451 case_sensitive = _is_case_sensitive(self.pathmod)
438452 sep = path_pattern.pathmod.sep
439- pattern_str = str(path_pattern)
440453 if path_pattern.anchor:
441- pass
454+ pattern_str = str(path_pattern)
442455 elif path_pattern.parts:
443- pattern_str = f '**{sep}{pattern_str}'
456+ pattern_str = str( '**' / path_pattern)
444457 else:
445458 raise ValueError("empty pattern")
446459 match = _compile_pattern(pattern_str, sep, case_sensitive)
@@ -714,10 +727,8 @@ def _scandir(self):
714727 from contextlib import nullcontext
715728 return nullcontext(self.iterdir())
716729
717- def _make_child_entry(self, entry, is_dir=False ):
730+ def _make_child_entry(self, entry):
718731 # Transform an entry yielded from _scandir() into a path object.
719- if is_dir:
720- return entry.joinpath('')
721732 return entry
722733
723734 def _make_child_relpath(self, name):
@@ -727,57 +738,35 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
727738 """Iterate over this subtree and yield all existing files (of any
728739 kind, including directories) matching the given relative pattern.
729740 """
730- path_pattern = self.with_segments(pattern)
731- if path_pattern.anchor:
732- raise NotImplementedError("Non-relative patterns are unsupported")
733- elif not path_pattern.parts:
734- raise ValueError("Unacceptable pattern: {!r}".format(pattern))
735-
736- pattern_parts = list(path_pattern.parts)
737- if not self.pathmod.split(pattern)[1]:
738- # GH-65238: pathlib doesn't preserve trailing slash. Add it back.
739- pattern_parts.append('')
740-
741+ if not isinstance(pattern, PurePathBase):
742+ pattern = self.with_segments(pattern)
741743 if case_sensitive is None:
742744 # TODO: evaluate case-sensitivity of each directory in _select_children().
743745 case_sensitive = _is_case_sensitive(self.pathmod)
744746
745- # If symlinks are handled consistently, and the pattern does not
746- # contain '..' components, then we can use a 'walk-and-match' strategy
747- # when expanding '**' wildcards. When a '**' wildcard is encountered,
748- # all following pattern parts are immediately consumed and used to
749- # build a `re.Pattern` object. This pattern is used to filter the
750- # recursive walk. As a result, pattern parts following a '**' wildcard
751- # do not perform any filesystem access, which can be much faster!
752- filter_paths = follow_symlinks is not None and '..' not in pattern_parts
747+ stack = pattern._pattern_stack
748+ specials = ('', '.', '..')
749+ filter_paths = False
753750 deduplicate_paths = False
754751 sep = self.pathmod.sep
755752 paths = iter([self.joinpath('')] if self.is_dir() else [])
756- part_idx = 0
757- while part_idx < len(pattern_parts):
758- part = pattern_parts[part_idx]
759- part_idx += 1
760- if part == '':
761- # Trailing slash.
762- pass
763- elif part == '..':
764- paths = (path._make_child_relpath('..') for path in paths)
753+ while stack:
754+ part = stack.pop()
755+ if part in specials:
756+ paths = _select_special(paths, part)
765757 elif part == '**':
766758 # Consume adjacent '**' components.
767- while part_idx < len(pattern_parts) and pattern_parts[part_idx] == '**':
768- part_idx += 1
769-
770- if filter_paths and part_idx < len(pattern_parts) and pattern_parts[part_idx] != '':
771- dir_only = pattern_parts[-1] == ''
772- paths = _select_recursive(paths, dir_only, follow_symlinks)
759+ while stack and stack[-1] == '**':
760+ stack.pop()
773761
774- # Filter out paths that don't match pattern.
775- prefix_len = len(str(self._make_child_relpath('_'))) - 1
776- match = _compile_pattern(str(path_pattern), sep, case_sensitive)
777- paths = (path for path in paths if match(str(path), prefix_len))
778- return paths
762+ # Consume adjacent non-special components and enable post-walk
763+ # regex filtering, provided we're treating symlinks consistently.
764+ if follow_symlinks is not None:
765+ while stack and stack[-1] not in specials:
766+ filter_paths = True
767+ stack.pop()
779768
780- dir_only = part_idx < len(pattern_parts )
769+ dir_only = bool(stack )
781770 paths = _select_recursive(paths, dir_only, follow_symlinks)
782771 if deduplicate_paths:
783772 # De-duplicate if we've already seen a '**' component.
@@ -786,18 +775,25 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
786775 elif '**' in part:
787776 raise ValueError("Invalid pattern: '**' can only be an entire path component")
788777 else:
789- dir_only = part_idx < len(pattern_parts )
778+ dir_only = bool(stack )
790779 match = _compile_pattern(part, sep, case_sensitive)
791780 paths = _select_children(paths, dir_only, follow_symlinks, match)
781+ if filter_paths:
782+ # Filter out paths that don't match pattern.
783+ prefix_len = len(str(self._make_child_relpath('_'))) - 1
784+ match = _compile_pattern(str(pattern), sep, case_sensitive)
785+ paths = (path for path in paths if match(str(path), prefix_len))
792786 return paths
793787
794788 def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
795789 """Recursively yield all existing files (of any kind, including
796790 directories) matching the given relative pattern, anywhere in
797791 this subtree.
798792 """
799- return self.glob(
800- f'**/{pattern}', case_sensitive=case_sensitive, follow_symlinks=follow_symlinks)
793+ if not isinstance(pattern, PurePathBase):
794+ pattern = self.with_segments(pattern)
795+ pattern = '**' / pattern
796+ return self.glob(pattern, case_sensitive=case_sensitive, follow_symlinks=follow_symlinks)
801797
802798 def walk(self, top_down=True, on_error=None, follow_symlinks=False):
803799 """Walk the directory tree from this directory, similar to os.walk()."""
0 commit comments