Skip to content
Open
196 changes: 132 additions & 64 deletions Doc/library/re.rst

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions Doc/whatsnew/3.15.rst
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,19 @@ resource
(Contributed by Serhiy Storchaka in :gh:`137512`.)


re
--

* :func:`re.prefixmatch` and a corresponding :meth:`~re.Pattern.prefixmatch`
have been added as alternate more explicit names for the existing
:func:`re.match` and :meth:`~re.Pattern.match` APIs. These are intended
to be used to alleviate confusion around what *match* means by following the
Zen of Python's *"Explicit is better than implicit"* mantra. Most other
language regular expression libraries use an API named *match* to mean what
Python has always called *search*.
(Contributed by Gregory P. Smith in :gh:`86519`.)


shelve
------

Expand Down
38 changes: 21 additions & 17 deletions Lib/re/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,17 +85,18 @@
\\ Matches a literal backslash.
This module exports the following functions:
match Match a regular expression pattern to the beginning of a string.
fullmatch Match a regular expression pattern to all of a string.
search Search a string for the presence of a pattern.
sub Substitute occurrences of a pattern found in a string.
subn Same as sub, but also return the number of substitutions made.
split Split a string by the occurrences of a pattern.
findall Find all occurrences of a pattern in a string.
finditer Return an iterator yielding a Match object for each match.
compile Compile a pattern into a Pattern object.
purge Clear the regular expression cache.
escape Backslash all non-alphanumerics in a string.
prefixmatch Match a regular expression pattern to the beginning of a str.
match The original name of prefixmatch prior to 3.15.
fullmatch Match a regular expression pattern to all of a string.
search Search a string for the presence of a pattern.
sub Substitute occurrences of a pattern found in a string.
subn Same as sub, but also return the number of substitutions made.
split Split a string by the occurrences of a pattern.
findall Find all occurrences of a pattern in a string.
finditer Return an iterator yielding a Match object for each match.
compile Compile a pattern into a Pattern object.
purge Clear the regular expression cache.
escape Backslash all non-alphanumerics in a string.
Each function other than purge and escape can take an optional 'flags' argument
consisting of one or more of the following module constants, joined by "|".
Expand Down Expand Up @@ -130,7 +131,7 @@

# public symbols
__all__ = [
"match", "fullmatch", "search", "sub", "subn", "split",
"prefixmatch", "match", "fullmatch", "search", "sub", "subn", "split",
"findall", "finditer", "compile", "purge", "escape",
"error", "Pattern", "Match", "A", "I", "L", "M", "S", "X", "U",
"ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
Expand Down Expand Up @@ -159,10 +160,13 @@ class RegexFlag:
# --------------------------------------------------------------------
# public interface

def match(pattern, string, flags=0):
def prefixmatch(pattern, string, flags=0):
"""Try to apply the pattern at the start of the string, returning
a Match object, or None if no match was found."""
return _compile(pattern, flags).match(string)
return _compile(pattern, flags).prefixmatch(string)

# Our original less explicitly clear about the behavior name for prefixmatch.
match = prefixmatch

def fullmatch(pattern, string, flags=0):
"""Try to apply the pattern to all of the string, returning
Expand Down Expand Up @@ -311,7 +315,7 @@ def escape(pattern):
return pattern.translate(_special_chars_map).encode('latin1')

Pattern = type(_compiler.compile('', 0))
Match = type(_compiler.compile('', 0).match(''))
Match = type(_compiler.compile('', 0).prefixmatch(''))

# --------------------------------------------------------------------
# internals
Expand Down Expand Up @@ -410,10 +414,10 @@ def __init__(self, lexicon, flags=0):
def scan(self, string):
result = []
append = result.append
match = self.scanner.scanner(string).match
_match = self.scanner.scanner(string).prefixmatch
i = 0
while True:
m = match()
m = _match()
if not m:
break
j = m.end()
Expand Down
5 changes: 4 additions & 1 deletion Lib/test/test_inspect/test_inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -6277,7 +6277,10 @@ def test_pwd_module_has_signatures(self):

def test_re_module_has_signatures(self):
import re
methods_no_signature = {'Match': {'group'}}
methods_no_signature = {
'Match': {'group'},
'Pattern': {'match'}, # It is now an alias for prefixmatch
}
self._test_module_has_signatures(re,
methods_no_signature=methods_no_signature,
good_exceptions={'error', 'PatternError'})
Expand Down
88 changes: 48 additions & 40 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,13 @@ def test_search_star_plus(self):
self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
self.assertIsNone(re.search('x', 'aaa'))
self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
self.assertEqual(re.prefixmatch('a*', 'xxx').span(0), (0, 0))
self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
self.assertEqual(re.prefixmatch('x*', 'xxxa').span(0), (0, 3))
self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
self.assertIsNone(re.match('a+', 'xxx'))
self.assertIsNone(re.prefixmatch('a+', 'xxx'))

def test_branching(self):
"""Test Branching
Expand Down Expand Up @@ -180,6 +183,7 @@ def test_bug_449000(self):
def test_bug_1661(self):
# Verify that flags do not get silently ignored with compiled patterns
pattern = re.compile('.')
self.assertRaises(ValueError, re.prefixmatch, pattern, 'A', re.I)
self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
Expand Down Expand Up @@ -517,6 +521,8 @@ def test_re_match(self):
self.assertEqual(re.match(b'(a)', string).group(0), b'a')
self.assertEqual(re.match(b'(a)', string).group(1), b'a')
self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a'))
self.assertEqual(re.prefixmatch(b'(a)', string).group(1, 1),
(b'a', b'a'))
for a in ("\xe0", "\u0430", "\U0001d49c"):
self.assertEqual(re.match(a, a).groups(), ())
self.assertEqual(re.match('(%s)' % a, a).groups(), (a,))
Expand Down Expand Up @@ -561,46 +567,48 @@ def __index__(self):
def test_match_getitem(self):
pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')

m = pat.match('a')
self.assertEqual(m['a1'], 'a')
self.assertEqual(m['b2'], None)
self.assertEqual(m['c3'], None)
self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=None')
self.assertEqual(m[0], 'a')
self.assertEqual(m[1], 'a')
self.assertEqual(m[2], None)
self.assertEqual(m[3], None)
with self.assertRaisesRegex(IndexError, 'no such group'):
m['X']
with self.assertRaisesRegex(IndexError, 'no such group'):
m[-1]
with self.assertRaisesRegex(IndexError, 'no such group'):
m[4]
with self.assertRaisesRegex(IndexError, 'no such group'):
m[0, 1]
with self.assertRaisesRegex(IndexError, 'no such group'):
m[(0,)]
with self.assertRaisesRegex(IndexError, 'no such group'):
m[(0, 1)]
with self.assertRaisesRegex(IndexError, 'no such group'):
'a1={a2}'.format_map(m)

m = pat.match('ac')
self.assertEqual(m['a1'], 'a')
self.assertEqual(m['b2'], None)
self.assertEqual(m['c3'], 'c')
self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=c')
self.assertEqual(m[0], 'ac')
self.assertEqual(m[1], 'a')
self.assertEqual(m[2], None)
self.assertEqual(m[3], 'c')

# Cannot assign.
with self.assertRaises(TypeError):
m[0] = 1

# No len().
self.assertRaises(TypeError, len, m)
for match_fn in pat.match, pat.prefixmatch:
with self.subTest(match_fn.__name__):
m = match_fn('a')
self.assertEqual(m['a1'], 'a')
self.assertEqual(m['b2'], None)
self.assertEqual(m['c3'], None)
self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=None')
self.assertEqual(m[0], 'a')
self.assertEqual(m[1], 'a')
self.assertEqual(m[2], None)
self.assertEqual(m[3], None)
with self.assertRaisesRegex(IndexError, 'no such group'):
m['X']
with self.assertRaisesRegex(IndexError, 'no such group'):
m[-1]
with self.assertRaisesRegex(IndexError, 'no such group'):
m[4]
with self.assertRaisesRegex(IndexError, 'no such group'):
m[0, 1]
with self.assertRaisesRegex(IndexError, 'no such group'):
m[(0,)]
with self.assertRaisesRegex(IndexError, 'no such group'):
m[(0, 1)]
with self.assertRaisesRegex(IndexError, 'no such group'):
'a1={a2}'.format_map(m)

m = match_fn('ac')
self.assertEqual(m['a1'], 'a')
self.assertEqual(m['b2'], None)
self.assertEqual(m['c3'], 'c')
self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=c')
self.assertEqual(m[0], 'ac')
self.assertEqual(m[1], 'a')
self.assertEqual(m[2], None)
self.assertEqual(m[3], 'c')

# Cannot assign.
with self.assertRaises(TypeError):
m[0] = 1

# No len().
self.assertRaises(TypeError, len, m)

def test_re_fullmatch(self):
# Issue 16203: Proposal: add re.fullmatch() method.
Expand Down
10 changes: 10 additions & 0 deletions Misc/NEWS.d/next/Library/2022-02-05-00-15-03.bpo-42353.0ebVGG.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
The :mod:`re` module gains a new :func:`re.prefixmatch` function as an
explicit spelling of what has to date always been known as :func:`re.match`.
:class:`re.Pattern` similary gains a :meth:`re.Pattern.prefixmatch` method.

Why? Explicit is better than implicit. Other widely used languages all use
the term "match" to mean what Python uses the term "search" for. The
unadorened "match" name in Python has been a frequent case of confusion and
coding bugs due to the inconsistency with the rest if the software industry.

We will never remove the original ``match`` name.
38 changes: 19 additions & 19 deletions Modules/_sre/clinic/sre.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading