From 587dc4d74eae7f71381fd70a34663ca9ef65b8a9 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Sat, 5 Feb 2022 02:03:34 +0000 Subject: [PATCH 1/7] bpo-42353: Add prefixmatch APIs to the re module. These alleviate common confusion around what "match" means as Python is different than other popular languages in our use of the term as an API name. The original "match" names are NOT being deprecated. Source tooling like linters are expected to suggest using prefixmatch instead of match to improve code health and reduce cognitive burden of understanding the intent when reading code. See the documentation changes within this PR for a better description. --- Doc/library/re.rst | 142 ++++++++++++------ Doc/whatsnew/3.11.rst | 11 ++ Lib/re.py | 45 +++--- Lib/test/test_re.py | 87 ++++++----- .../2022-02-05-00-15-03.bpo-42353.0ebVGG.rst | 11 ++ Modules/_sre.c | 64 ++++++-- Modules/clinic/_sre.c.h | 69 ++++++++- 7 files changed, 317 insertions(+), 112 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-02-05-00-15-03.bpo-42353.0ebVGG.rst diff --git a/Doc/library/re.rst b/Doc/library/re.rst index b12ce4b9744f94..1dcdd98cd49779 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -610,8 +610,8 @@ form. Compile a regular expression pattern into a :ref:`regular expression object `, which can be used for matching using its - :func:`~Pattern.match`, :func:`~Pattern.search` and other methods, described - below. + :func:`~Pattern.prefixmatch`, :func:`~Pattern.search` and other methods, + described below. The expression's behaviour can be modified by specifying a *flags* value. Values can be any of the following variables, combined using bitwise OR (the @@ -620,11 +620,11 @@ form. The sequence :: prog = re.compile(pattern) - result = prog.match(string) + result = prog.search(string) is equivalent to :: - result = re.match(pattern, string) + result = re.search(pattern, string) but using :func:`re.compile` and saving the resulting regular expression object for reuse is more efficient when the expression will be used several @@ -753,19 +753,36 @@ form. point in the string. -.. function:: match(pattern, string, flags=0) +.. function:: prefixmatch(pattern, string, flags=0) If zero or more characters at the beginning of *string* match the regular expression *pattern*, return a corresponding :ref:`match object `. Return ``None`` if the string does not match the pattern; note that this is different from a zero-length match. - Note that even in :const:`MULTILINE` mode, :func:`re.match` will only match - at the beginning of the string and not at the beginning of each line. + Note that even in :const:`MULTILINE` mode, :func:`re.prefixmatch` will only + match at the beginning of the string and not at the beginning of each line. If you want to locate a match anywhere in *string*, use :func:`search` instead (see also :ref:`search-vs-match`). + Use :func:`~re.match` when your code needs to support older Python versions. + + .. versionadded:: 3.11 + + +.. function:: match(pattern, string, flags=0) + + The same as :func:`prefixmatch` documented above. Prefer using that more + explicit name when writing code intended only for Python versions 3.11 + and up. + + The new name was created in order to be explicit about its behavior + to reduce confusion vs the industry norm for regular expression APIs. + See :ref:`prefixmatch-vs-match`. + + .. versionchanged:: 3.11 + .. function:: fullmatch(pattern, string, flags=0) @@ -1041,7 +1058,7 @@ attributes: >>> pattern.search("dog", 1) # No match; search doesn't include the "d" -.. method:: Pattern.match(string[, pos[, endpos]]) +.. method:: Pattern.prefixmatch(string[, pos[, endpos]]) If zero or more characters at the *beginning* of *string* match this regular expression, return a corresponding :ref:`match object `. @@ -1059,6 +1076,23 @@ attributes: If you want to locate a match anywhere in *string*, use :meth:`~Pattern.search` instead (see also :ref:`search-vs-match`). + Use :meth:`~Pattern.match` when your code needs to support older Pythons. + + .. versionadded:: 3.11 + + +.. method:: Pattern.match(string[, pos[, endpos]]) + + The same as :meth:`Pattern.prefixmatch` documented above. Prefer using that + more explicit name when writing code intended only for Python versions 3.11 + and up. + + The new name was created in order to be explicit about its behavior + to reduce confusion vs the industry norm for regular expression APIs. + See :ref:`prefixmatch-vs-match`. + + .. versionchanged:: 3.11 + .. method:: Pattern.fullmatch(string[, pos[, endpos]]) @@ -1179,7 +1213,7 @@ Match objects support the following methods and attributes: If a group is contained in a part of the pattern that matched multiple times, the last match is returned. :: - >>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist") + >>> m = re.search(r"(\w+) (\w+)", "Isaac Newton, physicist") >>> m.group(0) # The entire match 'Isaac Newton' >>> m.group(1) # The first parenthesized subgroup. @@ -1196,7 +1230,7 @@ Match objects support the following methods and attributes: A moderately complicated example:: - >>> m = re.match(r"(?P\w+) (?P\w+)", "Malcolm Reynolds") + >>> m = re.search(r"(?P\w+) (?P\w+)", "Malcolm Reynolds") >>> m.group('first_name') 'Malcolm' >>> m.group('last_name') @@ -1211,8 +1245,8 @@ Match objects support the following methods and attributes: If a group matches multiple times, only the last match is accessible:: - >>> m = re.match(r"(..)+", "a1b2c3") # Matches 3 times. - >>> m.group(1) # Returns only the last match. + >>> m = re.search(r"(..)+", "a1b2c3") # Matches 3 times. + >>> m.group(1) # Returns only the last match. 'c3' @@ -1221,7 +1255,7 @@ Match objects support the following methods and attributes: This is identical to ``m.group(g)``. This allows easier access to an individual group from a match:: - >>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist") + >>> m = re.search(r"(\w+) (\w+)", "Isaac Newton, physicist") >>> m[0] # The entire match 'Isaac Newton' >>> m[1] # The first parenthesized subgroup. @@ -1240,7 +1274,7 @@ Match objects support the following methods and attributes: For example:: - >>> m = re.match(r"(\d+)\.(\d+)", "24.1632") + >>> m = re.search(r"(\d+)\.(\d+)", "24.1632") >>> m.groups() ('24', '1632') @@ -1248,7 +1282,7 @@ Match objects support the following methods and attributes: might participate in the match. These groups will default to ``None`` unless the *default* argument is given:: - >>> m = re.match(r"(\d+)\.?(\d+)?", "24") + >>> m = re.search(r"(\d+)\.?(\d+)?", "24") >>> m.groups() # Second group defaults to None. ('24', None) >>> m.groups('0') # Now, the second group defaults to '0'. @@ -1261,7 +1295,7 @@ Match objects support the following methods and attributes: the subgroup name. The *default* argument is used for groups that did not participate in the match; it defaults to ``None``. For example:: - >>> m = re.match(r"(?P\w+) (?P\w+)", "Malcolm Reynolds") + >>> m = re.search(r"(?P\w+) (?P\w+)", "Malcolm Reynolds") >>> m.groupdict() {'first_name': 'Malcolm', 'last_name': 'Reynolds'} @@ -1367,38 +1401,38 @@ representing the card with that value. To see if a given string is a valid hand, one could do the following:: >>> valid = re.compile(r"^[a2-9tjqk]{5}$") - >>> displaymatch(valid.match("akt5q")) # Valid. + >>> displaymatch(valid.search("akt5q")) # Valid. "" - >>> displaymatch(valid.match("akt5e")) # Invalid. - >>> displaymatch(valid.match("akt")) # Invalid. - >>> displaymatch(valid.match("727ak")) # Valid. + >>> displaymatch(valid.search("akt5e")) # Invalid. + >>> displaymatch(valid.search("akt")) # Invalid. + >>> displaymatch(valid.search("727ak")) # Valid. "" That last hand, ``"727ak"``, contained a pair, or two of the same valued cards. To match this with a regular expression, one could use backreferences as such:: - >>> pair = re.compile(r".*(.).*\1") - >>> displaymatch(pair.match("717ak")) # Pair of 7s. + >>> pair = re.compile(r"^.*(.).*\1") + >>> displaymatch(pair.search("717ak")) # Pair of 7s. "" - >>> displaymatch(pair.match("718ak")) # No pairs. - >>> displaymatch(pair.match("354aa")) # Pair of aces. + >>> displaymatch(pair.search("718ak")) # No pairs. + >>> displaymatch(pair.search("354aa")) # Pair of aces. "" To find out what card the pair consists of, one could use the :meth:`~Match.group` method of the match object in the following manner:: - >>> pair = re.compile(r".*(.).*\1") - >>> pair.match("717ak").group(1) + >>> pair = re.compile(r"^.*(.).*\1") + >>> pair.search("717ak").group(1) '7' # Error because re.match() returns None, which doesn't have a group() method: - >>> pair.match("718ak").group(1) + >>> pair.search("718ak").group(1) Traceback (most recent call last): File "", line 1, in - re.match(r".*(.).*\1", "718ak").group(1) + re.search(r".*(.).*\1", "718ak").group(1) AttributeError: 'NoneType' object has no attribute 'group' - >>> pair.match("354aa").group(1) + >>> pair.search("354aa").group(1) 'a' @@ -1456,32 +1490,54 @@ search() vs. match() .. sectionauthor:: Fred L. Drake, Jr. Python offers two different primitive operations based on regular expressions: -:func:`re.match` checks for a match only at the beginning of the string, while -:func:`re.search` checks for a match anywhere in the string (this is what Perl -does by default). +:func:`re.prefixmatch` and its older equivalent named :func:`re.match` checks +for a match only at the beginning of the string, while :func:`re.search` checks +for a match anywhere in the string (this is what Perl does by default). For example:: - >>> re.match("c", "abcdef") # No match - >>> re.search("c", "abcdef") # Match + >>> re.match("c", "abcdef") # No match + >>> re.prefixmatch("c", "abcdef") # No match + >>> re.search("c", "abcdef") # Match Regular expressions beginning with ``'^'`` can be used with :func:`search` to restrict the match at the beginning of the string:: - >>> re.match("c", "abcdef") # No match - >>> re.search("^c", "abcdef") # No match - >>> re.search("^a", "abcdef") # Match + >>> re.match("c", "abcdef") # No match + >>> re.prefixmatch("c", "abcdef") # No match + >>> re.search("^c", "abcdef") # No match + >>> re.search("^a", "abcdef") # Match Note however that in :const:`MULTILINE` mode :func:`match` only matches at the beginning of the string, whereas using :func:`search` with a regular expression beginning with ``'^'`` will match at the beginning of each line. :: - >>> re.match('X', 'A\nB\nX', re.MULTILINE) # No match - >>> re.search('^X', 'A\nB\nX', re.MULTILINE) # Match + >>> re.match('X', 'A\nB\nX', re.MULTILINE) # No match + >>> re.prefixmatch('X', 'A\nB\nX', re.MULTILINE) # No match + >>> re.search('^X', 'A\nB\nX', re.MULTILINE) # Match +.. _prefixmatch-vs-match: + +prefixmatch() vs. match() +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Why is the :func:`re.match` name being discouraged in favor of the longer +:func:`re.prefixmatch` as of Python 3.11? + +Since regular expressions were introduced in Python, many other languages have +been created and or gained regex support libraries. However in the most popular +of those, they use the term "match" in their APIs to mean the unanchored +behavior provided in Python by :func:`re.search`. Thus any use of the plain +term "match" can be confusing to those reading or writing Python who are not +familiar with it's divergence from the collective software industry norm. + +Quoting from the Zen Of Python (``python3 -m this``): *"Explicit is better than +implicit"*. Anyone reading the name :func:`re.prefixmatch` is likely to +understand the semantics intended. When reading :func:`re.match` there remains +a seed of doubt about the author's actual intended behavior. Making a Phonebook ^^^^^^^^^^^^^^^^^^ @@ -1600,9 +1656,9 @@ every backslash (``'\'``) in a regular expression would have to be prefixed with another one to escape it. For example, the two following lines of code are functionally identical:: - >>> re.match(r"\W(.)\1\W", " ff ") + >>> re.search(r"\W(.)\1\W", " ff ") - >>> re.match("\\W(.)\\1\\W", " ff ") + >>> re.search("\\W(.)\\1\\W", " ff ") When one wants to match a literal backslash, it must be escaped in the regular @@ -1610,9 +1666,9 @@ expression. With raw string notation, this means ``r"\\"``. Without raw string notation, one must use ``"\\\\"``, making the following lines of code functionally identical:: - >>> re.match(r"\\", r"\\") + >>> re.search(r"\\", r"\\") - >>> re.match("\\\\", r"\\") + >>> re.search("\\\\", r"\\") diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index 1558d67d9a89f8..c8f8846891ebb6 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -264,6 +264,17 @@ os (Contributed by Dong-hee Na in :issue:`44611`.) +re +-- + +* :func:`re.prefixmatch` and a corresponding :meth:`re.Pattern.prefixmatch` + have been added as alternate names for the existing :func:`re.match` and + :meth:`re.Pattern.prefixmatch` APIs. These are intended to be used to + alleviate confusion around what "match" means by following *"Explicit is + better than implicit"*. Other popular language regular expression libraries + use an API named ``match`` to mean what Python has always called ``search``. + + socket ------ diff --git a/Lib/re.py b/Lib/re.py index a7ab9b3706748a..e4d4512bd3c9fb 100644 --- a/Lib/re.py +++ b/Lib/re.py @@ -85,17 +85,18 @@ \\ Matches a literal backslash. This module exports the following functions: - match Match a regular expression pattern to the beginning of a string. - fullmatch Match a regular expression pattern to all of a string. - search Search a string for the presence of a pattern. - sub Substitute occurrences of a pattern found in a string. - subn Same as sub, but also return the number of substitutions made. - split Split a string by the occurrences of a pattern. - findall Find all occurrences of a pattern in a string. - finditer Return an iterator yielding a Match object for each match. - compile Compile a pattern into a Pattern object. - purge Clear the regular expression cache. - escape Backslash all non-alphanumerics in a string. + prefixmatch Match a regular expression pattern to the beginning of a string. + match The old name of prefixmatch. Prefer prefixmatch in 3.11+ code. + fullmatch Match a regular expression pattern to all of a string. + search Search a string for the presence of a pattern. + sub Substitute occurrences of a pattern found in a string. + subn Same as sub, but also return the number of substitutions made. + split Split a string by the occurrences of a pattern. + findall Find all occurrences of a pattern in a string. + finditer Return an iterator yielding a Match object for each match. + compile Compile a pattern into a Pattern object. + purge Clear the regular expression cache. + escape Backslash all non-alphanumerics in a string. Each function other than purge and escape can take an optional 'flags' argument consisting of one or more of the following module constants, joined by "|". @@ -133,14 +134,14 @@ # public symbols __all__ = [ - "match", "fullmatch", "search", "sub", "subn", "split", + "prefixmatch", "match", "fullmatch", "search", "sub", "subn", "split", "findall", "finditer", "compile", "purge", "template", "escape", "error", "Pattern", "Match", "A", "I", "L", "M", "S", "X", "U", "ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE", "UNICODE", ] -__version__ = "2.2.1" +__version__ = "3.11.0" @enum.global_enum @enum._simple_enum(enum.IntFlag, boundary=enum.KEEP) @@ -164,10 +165,18 @@ class RegexFlag: # -------------------------------------------------------------------- # public interface -def match(pattern, string, flags=0): +def prefixmatch(pattern, string, flags=0): """Try to apply the pattern at the start of the string, returning a Match object, or None if no match was found.""" - return _compile(pattern, flags).match(string) + return _compile(pattern, flags).prefixmatch(string) + +def match(pattern, string, flags=0): + """Less explicitly named equivalent to prefixmatch. Prefer prefixmatch. + Try to apply the pattern at the start of the string, returning + a Match object, or None if no match was found.""" + # TODO(https://bugs.python.org/issue42353): PendingDeprecationWarning + # once we decide by what version, IF EVER, we'd consider removing this. + return _compile(pattern, flags).prefixmatch(string) def fullmatch(pattern, string, flags=0): """Try to apply the pattern to all of the string, returning @@ -257,7 +266,7 @@ def escape(pattern): return pattern.translate(_special_chars_map).encode('latin1') Pattern = type(sre_compile.compile('', 0)) -Match = type(sre_compile.compile('', 0).match('')) +Match = type(sre_compile.compile('', 0).prefixmatch('')) # -------------------------------------------------------------------- # internals @@ -344,10 +353,10 @@ def __init__(self, lexicon, flags=0): def scan(self, string): result = [] append = result.append - match = self.scanner.scanner(string).match + _match = self.scanner.scanner(string).prefixmatch i = 0 while True: - m = match() + m = _match() if not m: break j = m.end() diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 18fa24a99ce032..6c7532ab3432c6 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -78,10 +78,13 @@ def test_search_star_plus(self): self.assertEqual(re.search('x+', 'axx').span(), (1, 3)) self.assertIsNone(re.search('x', 'aaa')) self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0)) + self.assertEqual(re.prefixmatch('a*', 'xxx').span(0), (0, 0)) self.assertEqual(re.match('a*', 'xxx').span(), (0, 0)) self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3)) + self.assertEqual(re.prefixmatch('x*', 'xxxa').span(0), (0, 3)) self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3)) self.assertIsNone(re.match('a+', 'xxx')) + self.assertIsNone(re.prefixmatch('a+', 'xxx')) def bump_num(self, matchobj): int_value = int(matchobj.group(0)) @@ -148,6 +151,7 @@ def test_bug_449000(self): def test_bug_1661(self): # Verify that flags do not get silently ignored with compiled patterns pattern = re.compile('.') + self.assertRaises(ValueError, re.prefixmatch, pattern, 'A', re.I) self.assertRaises(ValueError, re.match, pattern, 'A', re.I) self.assertRaises(ValueError, re.search, pattern, 'A', re.I) self.assertRaises(ValueError, re.findall, pattern, 'A', re.I) @@ -398,6 +402,8 @@ def test_re_match(self): self.assertEqual(re.match(b'(a)', string).group(0), b'a') self.assertEqual(re.match(b'(a)', string).group(1), b'a') self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a')) + self.assertEqual(re.prefixmatch(b'(a)', string).group(1, 1), + (b'a', b'a')) for a in ("\xe0", "\u0430", "\U0001d49c"): self.assertEqual(re.match(a, a).groups(), ()) self.assertEqual(re.match('(%s)' % a, a).groups(), (a,)) @@ -442,46 +448,47 @@ def __index__(self): def test_match_getitem(self): pat = re.compile('(?:(?Pa)|(?Pb))(?Pc)?') - m = pat.match('a') - self.assertEqual(m['a1'], 'a') - self.assertEqual(m['b2'], None) - self.assertEqual(m['c3'], None) - self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=None') - self.assertEqual(m[0], 'a') - self.assertEqual(m[1], 'a') - self.assertEqual(m[2], None) - self.assertEqual(m[3], None) - with self.assertRaisesRegex(IndexError, 'no such group'): - m['X'] - with self.assertRaisesRegex(IndexError, 'no such group'): - m[-1] - with self.assertRaisesRegex(IndexError, 'no such group'): - m[4] - with self.assertRaisesRegex(IndexError, 'no such group'): - m[0, 1] - with self.assertRaisesRegex(IndexError, 'no such group'): - m[(0,)] - with self.assertRaisesRegex(IndexError, 'no such group'): - m[(0, 1)] - with self.assertRaisesRegex(IndexError, 'no such group'): - 'a1={a2}'.format_map(m) - - m = pat.match('ac') - self.assertEqual(m['a1'], 'a') - self.assertEqual(m['b2'], None) - self.assertEqual(m['c3'], 'c') - self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=c') - self.assertEqual(m[0], 'ac') - self.assertEqual(m[1], 'a') - self.assertEqual(m[2], None) - self.assertEqual(m[3], 'c') - - # Cannot assign. - with self.assertRaises(TypeError): - m[0] = 1 - - # No len(). - self.assertRaises(TypeError, len, m) + for match_fn in pat.match, pat.prefixmatch: + m = match_fn('a') + self.assertEqual(m['a1'], 'a') + self.assertEqual(m['b2'], None) + self.assertEqual(m['c3'], None) + self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=None') + self.assertEqual(m[0], 'a') + self.assertEqual(m[1], 'a') + self.assertEqual(m[2], None) + self.assertEqual(m[3], None) + with self.assertRaisesRegex(IndexError, 'no such group'): + m['X'] + with self.assertRaisesRegex(IndexError, 'no such group'): + m[-1] + with self.assertRaisesRegex(IndexError, 'no such group'): + m[4] + with self.assertRaisesRegex(IndexError, 'no such group'): + m[0, 1] + with self.assertRaisesRegex(IndexError, 'no such group'): + m[(0,)] + with self.assertRaisesRegex(IndexError, 'no such group'): + m[(0, 1)] + with self.assertRaisesRegex(IndexError, 'no such group'): + 'a1={a2}'.format_map(m) + + m = match_fn('ac') + self.assertEqual(m['a1'], 'a') + self.assertEqual(m['b2'], None) + self.assertEqual(m['c3'], 'c') + self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=c') + self.assertEqual(m[0], 'ac') + self.assertEqual(m[1], 'a') + self.assertEqual(m[2], None) + self.assertEqual(m[3], 'c') + + # Cannot assign. + with self.assertRaises(TypeError): + m[0] = 1 + + # No len(). + self.assertRaises(TypeError, len, m) def test_re_fullmatch(self): # Issue 16203: Proposal: add re.fullmatch() method. diff --git a/Misc/NEWS.d/next/Library/2022-02-05-00-15-03.bpo-42353.0ebVGG.rst b/Misc/NEWS.d/next/Library/2022-02-05-00-15-03.bpo-42353.0ebVGG.rst new file mode 100644 index 00000000000000..12784beef22f77 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-02-05-00-15-03.bpo-42353.0ebVGG.rst @@ -0,0 +1,11 @@ +The :mod:`re` module gains a new :func:`re.prefixmatch` function as an +explicit spelling of what has to date always been known as :func:`re.match`. +:class:`re.Pattern` similary gains a :meth:`re.Pattern.prefixmatch` method. + +Why? Explicit is better than implicit. Other widely used languages all use +the term "match" to mean what Python uses the term "search" for. The +unadorened "match" name in Python has been a frequent case of confusion and +coding bugs due to the inconsistency with the rest if the software industry. + +No plans to remove and deprecate the existing ``match`` names exist. If that +were to happen it would be at minimum 7 years in the future. diff --git a/Modules/_sre.c b/Modules/_sre.c index 213730860cfb53..e7496c72ba7500 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -619,7 +619,7 @@ sre_search(SRE_STATE* state, SRE_CODE* pattern) } /*[clinic input] -_sre.SRE_Pattern.match +_sre.SRE_Pattern.prefixmatch cls: defining_class / @@ -631,10 +631,10 @@ Matches zero or more characters at the beginning of the string. [clinic start generated code]*/ static PyObject * -_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, - PyObject *string, Py_ssize_t pos, - Py_ssize_t endpos) -/*[clinic end generated code: output=ec6208ea58a0cca0 input=4bdb9c3e564d13ac]*/ +_sre_SRE_Pattern_prefixmatch_impl(PatternObject *self, PyTypeObject *cls, + PyObject *string, Py_ssize_t pos, + Py_ssize_t endpos) +/*[clinic end generated code: output=a0e079fb4f875240 input=e2a7e68ea47d048c]*/ { _sremodulestate *module_state = get_sre_module_state_by_class(cls); SRE_STATE state; @@ -661,6 +661,33 @@ _sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, return match; } +/*[clinic input] +_sre.SRE_Pattern.match + + cls: defining_class + / + string: object + pos: Py_ssize_t = 0 + endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize + +Matches zero or more characters at the beginning of the string. + +This is the legacy non-explicit method name. Prefer using it's +explicit spelling of prefixmatch in 3.11+ code. + +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, + PyObject *string, Py_ssize_t pos, + Py_ssize_t endpos) +/*[clinic end generated code: output=ec6208ea58a0cca0 input=d63ebdd3c7c96189]*/ +{ + /* TODO(https://bugs.python.org/issue42353): Plan if we EVER want to + * issue a PendingDeprecationWarning here. */ + return _sre_SRE_Pattern_prefixmatch_impl(self, cls, string, pos, endpos); +} + /*[clinic input] _sre.SRE_Pattern.fullmatch @@ -2356,7 +2383,7 @@ _sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo) } PyDoc_STRVAR(match_doc, -"The result of re.match() and re.search().\n\ +"The result of re.search(), re.prefixmatch(), and re.fullmatch().\n\ Match objects always have a boolean value of True."); PyDoc_STRVAR(match_group_doc, @@ -2512,7 +2539,7 @@ scanner_dealloc(ScannerObject* self) } /*[clinic input] -_sre.SRE_Scanner.match +_sre.SRE_Scanner.prefixmatch cls: defining_class / @@ -2520,8 +2547,8 @@ _sre.SRE_Scanner.match [clinic start generated code]*/ static PyObject * -_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls) -/*[clinic end generated code: output=6e22c149dc0f0325 input=b5146e1f30278cb7]*/ +_sre_SRE_Scanner_prefixmatch_impl(ScannerObject *self, PyTypeObject *cls) +/*[clinic end generated code: output=02b3b9d2954a2157 input=3049b20466c56a8e]*/ { _sremodulestate *module_state = get_sre_module_state_by_class(cls); SRE_STATE* state = &self->state; @@ -2552,6 +2579,23 @@ _sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls) return match; } +/*[clinic input] +_sre.SRE_Scanner.match + + cls: defining_class + / + +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls) +/*[clinic end generated code: output=6e22c149dc0f0325 input=b5146e1f30278cb7]*/ +{ + /* TODO(https://bugs.python.org/issue42353): Plan if we EVER want to + * issue a PendingDeprecationWarning here. */ + return _sre_SRE_Scanner_prefixmatch_impl(self, cls); +} + /*[clinic input] _sre.SRE_Scanner.search @@ -2697,6 +2741,7 @@ pattern_richcompare(PyObject *lefto, PyObject *righto, int op) #include "clinic/_sre.c.h" static PyMethodDef pattern_methods[] = { + _SRE_SRE_PATTERN_PREFIXMATCH_METHODDEF _SRE_SRE_PATTERN_MATCH_METHODDEF _SRE_SRE_PATTERN_FULLMATCH_METHODDEF _SRE_SRE_PATTERN_SEARCH_METHODDEF @@ -2823,6 +2868,7 @@ static PyType_Spec match_spec = { }; static PyMethodDef scanner_methods[] = { + _SRE_SRE_SCANNER_PREFIXMATCH_METHODDEF _SRE_SRE_SCANNER_MATCH_METHODDEF _SRE_SRE_SCANNER_SEARCH_METHODDEF {NULL, NULL} diff --git a/Modules/clinic/_sre.c.h b/Modules/clinic/_sre.c.h index 72d772c289ae8b..bfa4129b3a626c 100644 --- a/Modules/clinic/_sre.c.h +++ b/Modules/clinic/_sre.c.h @@ -157,11 +157,48 @@ _sre_unicode_tolower(PyObject *module, PyObject *arg) return return_value; } +PyDoc_STRVAR(_sre_SRE_Pattern_prefixmatch__doc__, +"prefixmatch($self, /, string, pos=0, endpos=sys.maxsize)\n" +"--\n" +"\n" +"Matches zero or more characters at the beginning of the string."); + +#define _SRE_SRE_PATTERN_PREFIXMATCH_METHODDEF \ + {"prefixmatch", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_prefixmatch, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_prefixmatch__doc__}, + +static PyObject * +_sre_SRE_Pattern_prefixmatch_impl(PatternObject *self, PyTypeObject *cls, + PyObject *string, Py_ssize_t pos, + Py_ssize_t endpos); + +static PyObject * +_sre_SRE_Pattern_prefixmatch(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; + static _PyArg_Parser _parser = {"O|nn:prefixmatch", _keywords, 0}; + PyObject *string; + Py_ssize_t pos = 0; + Py_ssize_t endpos = PY_SSIZE_T_MAX; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + &string, &pos, &endpos)) { + goto exit; + } + return_value = _sre_SRE_Pattern_prefixmatch_impl(self, cls, string, pos, endpos); + +exit: + return return_value; +} + PyDoc_STRVAR(_sre_SRE_Pattern_match__doc__, "match($self, /, string, pos=0, endpos=sys.maxsize)\n" "--\n" "\n" -"Matches zero or more characters at the beginning of the string."); +"Matches zero or more characters at the beginning of the string.\n" +"\n" +"This is the legacy non-explicit method name. Prefer using it\'s\n" +"explicit spelling of prefixmatch in 3.11+ code."); #define _SRE_SRE_PATTERN_MATCH_METHODDEF \ {"match", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_match, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_match__doc__}, @@ -855,6 +892,34 @@ PyDoc_STRVAR(_sre_SRE_Match___deepcopy____doc__, #define _SRE_SRE_MATCH___DEEPCOPY___METHODDEF \ {"__deepcopy__", (PyCFunction)_sre_SRE_Match___deepcopy__, METH_O, _sre_SRE_Match___deepcopy____doc__}, +PyDoc_STRVAR(_sre_SRE_Scanner_prefixmatch__doc__, +"prefixmatch($self, /)\n" +"--\n" +"\n"); + +#define _SRE_SRE_SCANNER_PREFIXMATCH_METHODDEF \ + {"prefixmatch", (PyCFunction)(void(*)(void))_sre_SRE_Scanner_prefixmatch, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Scanner_prefixmatch__doc__}, + +static PyObject * +_sre_SRE_Scanner_prefixmatch_impl(ScannerObject *self, PyTypeObject *cls); + +static PyObject * +_sre_SRE_Scanner_prefixmatch(ScannerObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = { NULL}; + static _PyArg_Parser _parser = {":prefixmatch", _keywords, 0}; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser + )) { + goto exit; + } + return_value = _sre_SRE_Scanner_prefixmatch_impl(self, cls); + +exit: + return return_value; +} + PyDoc_STRVAR(_sre_SRE_Scanner_match__doc__, "match($self, /)\n" "--\n" @@ -910,4 +975,4 @@ _sre_SRE_Scanner_search(ScannerObject *self, PyTypeObject *cls, PyObject *const exit: return return_value; } -/*[clinic end generated code: output=518f7bb775c1184f input=a9049054013a1b77]*/ +/*[clinic end generated code: output=18612f48d6853239 input=a9049054013a1b77]*/ From fa2885eccd72a3e6ec33721b459ad13358b9034a Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 29 Jan 2023 22:48:38 -0800 Subject: [PATCH 2/7] fix editor fat finger typo during the merge. #vim --- Lib/re/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py index c620c812fedfe9..176ceef12af532 100644 --- a/Lib/re/__init__.py +++ b/Lib/re/__init__.py @@ -313,7 +313,7 @@ def _compile(pattern, flags): "without an obvious purpose. " "Don't use it.", DeprecationWarning) - p = _COMPiler.compile(pattern, flags) + p = _compiler.compile(pattern, flags) if flags & DEBUG: return p if len(_cache) >= _MAXCACHE: From 6195ad74fc9b48764c9f77209663f2d658773ad6 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 29 Jan 2023 22:52:33 -0800 Subject: [PATCH 3/7] 3.12 --- Lib/re/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py index 176ceef12af532..2444714ecf3595 100644 --- a/Lib/re/__init__.py +++ b/Lib/re/__init__.py @@ -86,7 +86,7 @@ This module exports the following functions: prefixmatch Match a regular expression pattern to the beginning of a string. - match The old name of prefixmatch. Prefer prefixmatch in 3.11+ code. + match The old name of prefixmatch. Prefer prefixmatch in 3.12+ code. fullmatch Match a regular expression pattern to all of a string. search Search a string for the presence of a pattern. sub Substitute occurrences of a pattern found in a string. @@ -137,7 +137,7 @@ "UNICODE", "NOFLAG", "RegexFlag", ] -__version__ = "3.11.0" +__version__ = "3.12.0" @enum.global_enum @enum._simple_enum(enum.IntFlag, boundary=enum.KEEP) From 149f6e4751f20c9a5acd12da3e4b21138d6c7faf Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Fri, 18 Apr 2025 17:52:18 -0700 Subject: [PATCH 4/7] Simplify, don't have duplicate definitions. `match = prefixmatch` --- Lib/re/__init__.py | 7 +- Lib/test/test_inspect/test_inspect.py | 5 +- Modules/_sre/clinic/sre.c.h | 122 +------------------------- Modules/_sre/sre.c | 76 +++++++--------- 4 files changed, 38 insertions(+), 172 deletions(-) diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py index 2564a8f93699f0..fb176ee23207f8 100644 --- a/Lib/re/__init__.py +++ b/Lib/re/__init__.py @@ -167,11 +167,8 @@ def prefixmatch(pattern, string, flags=0): a Match object, or None if no match was found.""" return _compile(pattern, flags).prefixmatch(string) -def match(pattern, string, flags=0): - """The original name for prefixmatch. Equivalent behavior. - Try to apply the pattern at the start of the string, returning - a Match object, or None if no match was found.""" - return _compile(pattern, flags).prefixmatch(string) +# Our original less explicitly clear about the behavior name for prefixmatch. +match = prefixmatch def fullmatch(pattern, string, flags=0): """Try to apply the pattern to all of the string, returning diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index daae990458d708..bad56a2dd181a5 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -5812,7 +5812,10 @@ def test_pwd_module_has_signatures(self): def test_re_module_has_signatures(self): import re - methods_no_signature = {'Match': {'group'}} + methods_no_signature = { + 'Match': {'group'}, + 'Pattern': {'match'}, # It is now an alias for prefixmatch + } self._test_module_has_signatures(re, methods_no_signature=methods_no_signature, good_exceptions={'error', 'PatternError'}) diff --git a/Modules/_sre/clinic/sre.c.h b/Modules/_sre/clinic/sre.c.h index c9caa3127283ce..b49bf4e058b69b 100644 --- a/Modules/_sre/clinic/sre.c.h +++ b/Modules/_sre/clinic/sre.c.h @@ -260,105 +260,6 @@ _sre_SRE_Pattern_prefixmatch(PyObject *self, PyTypeObject *cls, PyObject *const return return_value; } -PyDoc_STRVAR(_sre_SRE_Pattern_match__doc__, -"match($self, /, string, pos=0, endpos=sys.maxsize)\n" -"--\n" -"\n" -"Matches zero or more characters at the beginning of the string.\n" -"\n" -"This is the legacy method name. Modern Python also provides it under the name\n" -"\'prefixmatch\' to allow code to be explicitly clear about the intended behavior."); - -#define _SRE_SRE_PATTERN_MATCH_METHODDEF \ - {"match", _PyCFunction_CAST(_sre_SRE_Pattern_match), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_match__doc__}, - -static PyObject * -_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, - PyObject *string, Py_ssize_t pos, - Py_ssize_t endpos); - -static PyObject * -_sre_SRE_Pattern_match(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) -{ - PyObject *return_value = NULL; - #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - - #define NUM_KEYWORDS 3 - static struct { - PyGC_Head _this_is_not_used; - PyObject_VAR_HEAD - Py_hash_t ob_hash; - PyObject *ob_item[NUM_KEYWORDS]; - } _kwtuple = { - .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(pos), &_Py_ID(endpos), }, - }; - #undef NUM_KEYWORDS - #define KWTUPLE (&_kwtuple.ob_base.ob_base) - - #else // !Py_BUILD_CORE - # define KWTUPLE NULL - #endif // !Py_BUILD_CORE - - static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; - static _PyArg_Parser _parser = { - .keywords = _keywords, - .fname = "match", - .kwtuple = KWTUPLE, - }; - #undef KWTUPLE - PyObject *argsbuf[3]; - Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; - PyObject *string; - Py_ssize_t pos = 0; - Py_ssize_t endpos = PY_SSIZE_T_MAX; - - args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, - /*minpos*/ 1, /*maxpos*/ 3, /*minkw*/ 0, /*varpos*/ 0, argsbuf); - if (!args) { - goto exit; - } - string = args[0]; - if (!noptargs) { - goto skip_optional_pos; - } - if (args[1]) { - { - Py_ssize_t ival = -1; - PyObject *iobj = _PyNumber_Index(args[1]); - if (iobj != NULL) { - ival = PyLong_AsSsize_t(iobj); - Py_DECREF(iobj); - } - if (ival == -1 && PyErr_Occurred()) { - goto exit; - } - pos = ival; - } - if (!--noptargs) { - goto skip_optional_pos; - } - } - { - Py_ssize_t ival = -1; - PyObject *iobj = _PyNumber_Index(args[2]); - if (iobj != NULL) { - ival = PyLong_AsSsize_t(iobj); - Py_DECREF(iobj); - } - if (ival == -1 && PyErr_Occurred()) { - goto exit; - } - endpos = ival; - } -skip_optional_pos: - return_value = _sre_SRE_Pattern_match_impl((PatternObject *)self, cls, string, pos, endpos); - -exit: - return return_value; -} - PyDoc_STRVAR(_sre_SRE_Pattern_fullmatch__doc__, "fullmatch($self, /, string, pos=0, endpos=sys.maxsize)\n" "--\n" @@ -1643,27 +1544,6 @@ _sre_SRE_Scanner_prefixmatch(PyObject *self, PyTypeObject *cls, PyObject *const return _sre_SRE_Scanner_prefixmatch_impl((ScannerObject *)self, cls); } -PyDoc_STRVAR(_sre_SRE_Scanner_match__doc__, -"match($self, /)\n" -"--\n" -"\n"); - -#define _SRE_SRE_SCANNER_MATCH_METHODDEF \ - {"match", _PyCFunction_CAST(_sre_SRE_Scanner_match), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Scanner_match__doc__}, - -static PyObject * -_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls); - -static PyObject * -_sre_SRE_Scanner_match(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) -{ - if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { - PyErr_SetString(PyExc_TypeError, "match() takes no arguments"); - return NULL; - } - return _sre_SRE_Scanner_match_impl((ScannerObject *)self, cls); -} - PyDoc_STRVAR(_sre_SRE_Scanner_search__doc__, "search($self, /)\n" "--\n" @@ -1688,4 +1568,4 @@ _sre_SRE_Scanner_search(PyObject *self, PyTypeObject *cls, PyObject *const *args #ifndef _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF #define _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF #endif /* !defined(_SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF) */ -/*[clinic end generated code: output=ce9568a4b57dece3 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=0c867efb64e020aa input=a9049054013a1b77]*/ diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index c4f853c6c9b49a..9280834a84d639 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -811,32 +811,6 @@ _sre_SRE_Pattern_prefixmatch_impl(PatternObject *self, PyTypeObject *cls, return match; } -/*[clinic input] -_sre.SRE_Pattern.match - - cls: defining_class - / - string: object - pos: Py_ssize_t = 0 - endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize - -Matches zero or more characters at the beginning of the string. - -This is the legacy method name. Modern Python also provides it under the name -'prefixmatch' to allow code to be explicitly clear about the intended behavior. - -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, - PyObject *string, Py_ssize_t pos, - Py_ssize_t endpos) -/*[clinic end generated code: output=ec6208ea58a0cca0 input=7541a63c722fcfdf]*/ -{ - /* TODO - https://github.com/python/cpython/issues/86519 - If we ever - * want a PendingDeprecationWarning here, wait until year >=2030. */ - return _sre_SRE_Pattern_prefixmatch_impl(self, cls, string, pos, endpos); -} /*[clinic input] _sre.SRE_Pattern.fullmatch @@ -2929,23 +2903,6 @@ _sre_SRE_Scanner_prefixmatch_impl(ScannerObject *self, PyTypeObject *cls) return match; } -/*[clinic input] -_sre.SRE_Scanner.match - - cls: defining_class - / - -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls) -/*[clinic end generated code: output=6e22c149dc0f0325 input=b5146e1f30278cb7]*/ -{ - /* TODO(https://bugs.python.org/issue42353): Plan if we EVER want to - * issue a PendingDeprecationWarning here. */ - return _sre_SRE_Scanner_prefixmatch_impl(self, cls); -} - /*[clinic input] _sre.SRE_Scanner.search @@ -3206,7 +3163,7 @@ pattern_richcompare(PyObject *lefto, PyObject *righto, int op) static PyMethodDef pattern_methods[] = { _SRE_SRE_PATTERN_PREFIXMATCH_METHODDEF - _SRE_SRE_PATTERN_MATCH_METHODDEF + {"match", NULL}, /* filled in by sre_exec() */ _SRE_SRE_PATTERN_FULLMATCH_METHODDEF _SRE_SRE_PATTERN_SEARCH_METHODDEF _SRE_SRE_PATTERN_SUB_METHODDEF @@ -3334,7 +3291,7 @@ static PyType_Spec match_spec = { static PyMethodDef scanner_methods[] = { _SRE_SRE_SCANNER_PREFIXMATCH_METHODDEF - _SRE_SRE_SCANNER_MATCH_METHODDEF + {"match", NULL}, /* filled in by sre_exec() */ _SRE_SRE_SCANNER_SEARCH_METHODDEF {NULL, NULL} }; @@ -3438,11 +3395,40 @@ do { \ } \ } while (0) + +static void +copy_prefixmatch_method_def_to_match(PyMethodDef *method_defs) +{ + /* We could implement logic to scan the null filled sentry + * terminated list for the two method names. But we're a + * bunch of static structs. We just guarantee their position + * and flag deviation from this via debug build assertions. + */ + assert(method_defs); + PyMethodDef *prefixmatch_md = &method_defs[0]; + assert(prefixmatch_md->ml_name != NULL); + assert(strcmp(prefixmatch_md->ml_name, "prefixmatch") == 0); + + PyMethodDef *match_md = &method_defs[1]; + assert(match_md->ml_name != NULL); + assert(strcmp(match_md->ml_name, "match") == 0); + /* If the public stable C API struct ever changed (!) and + * somehow wound up with unexpected layout and alignment + * constraints, fix the memcpy below. */ + assert(offsetof(PyMethodDef, ml_meth) == sizeof(char *)); + memcpy(&match_md->ml_meth, &prefixmatch_md->ml_meth, + sizeof(PyMethodDef) - offsetof(PyMethodDef, ml_meth)); +} + + static int sre_exec(PyObject *m) { _sremodulestate *state; + copy_prefixmatch_method_def_to_match(pattern_methods); + copy_prefixmatch_method_def_to_match(scanner_methods); + /* Create heap types */ state = get_sre_module_state(m); CREATE_TYPE(m, state->Pattern_Type, &pattern_spec); From 58d8f579c97918f142bfa4b0bd6fbd4c336c41b8 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 31 Jan 2026 15:26:10 -0800 Subject: [PATCH 5/7] Update match name removal language to be definitive - Change "25 years" to "30 years" to reflect actual time - Replace speculative "this decade, if ever" / "7 years" language with clear statement that we will never remove the original match name Co-Authored-By: Claude Opus 4.5 --- Doc/library/re.rst | 4 ++-- .../next/Library/2022-02-05-00-15-03.bpo-42353.0ebVGG.rst | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Doc/library/re.rst b/Doc/library/re.rst index 83e97c3bc5365e..2944be5112eceb 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -1788,8 +1788,8 @@ understand the intended semantics. When reading :func:`~re.match` there remains a seed of doubt about the intended behavior to anyone not already familiar with this old Python gotcha. -We **do not** plan to deprecate and remove the older *match* name in this -decade, if ever, as it has been used in code for over 25 years. +We will **never** remove the original :func:`~re.match` name, as it has been +used in code for over 30 years. .. versionadded:: next diff --git a/Misc/NEWS.d/next/Library/2022-02-05-00-15-03.bpo-42353.0ebVGG.rst b/Misc/NEWS.d/next/Library/2022-02-05-00-15-03.bpo-42353.0ebVGG.rst index 12784beef22f77..3f50c453d76703 100644 --- a/Misc/NEWS.d/next/Library/2022-02-05-00-15-03.bpo-42353.0ebVGG.rst +++ b/Misc/NEWS.d/next/Library/2022-02-05-00-15-03.bpo-42353.0ebVGG.rst @@ -7,5 +7,4 @@ the term "match" to mean what Python uses the term "search" for. The unadorened "match" name in Python has been a frequent case of confusion and coding bugs due to the inconsistency with the rest if the software industry. -No plans to remove and deprecate the existing ``match`` names exist. If that -were to happen it would be at minimum 7 years in the future. +We will never remove the original ``match`` name. From 3cd9d7170b02515aad5906d7566ef2c4bf5a0d0e Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 31 Jan 2026 15:42:36 -0800 Subject: [PATCH 6/7] Fix re.rst examples after match to search conversion - Fix traceback to include ^ anchor matching the pair pattern definition - Add \A anchor to one example as a teaching hint for readers - Update card game examples to demonstrate search/match/prefixmatch mix - Add explanatory paragraph about match and prefixmatch being identical - Rename compiled regex variables to use _re suffix (valid_hand_re, pair_re) Co-Authored-By: Claude Opus 4.5 --- Doc/library/re.rst | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/Doc/library/re.rst b/Doc/library/re.rst index 2944be5112eceb..9f6efae32bbebe 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -1446,7 +1446,7 @@ when there is no match, you can test whether there was a match with a simple If a group is contained in a part of the pattern that matched multiple times, the last match is returned. :: - >>> m = re.search(r"(\w+) (\w+)", "Isaac Newton, physicist") + >>> m = re.search(r"\A(\w+) (\w+)", "Isaac Newton, physicist") >>> m.group(0) # The entire match 'Isaac Newton' >>> m.group(1) # The first parenthesized subgroup. @@ -1641,41 +1641,49 @@ representing the card with that value. To see if a given string is a valid hand, one could do the following:: - >>> valid = re.compile(r"^[a2-9tjqk]{5}$") - >>> displaymatch(valid.search("akt5q")) # Valid. + >>> valid_hand_re = re.compile(r"^[a2-9tjqk]{5}$") + >>> displaymatch(valid_hand_re.search("akt5q")) # Valid. "" - >>> displaymatch(valid.search("akt5e")) # Invalid. - >>> displaymatch(valid.search("akt")) # Invalid. - >>> displaymatch(valid.search("727ak")) # Valid. + >>> displaymatch(valid_hand_re.search("akt5e")) # Invalid. + >>> displaymatch(valid_hand_re.search("akt")) # Invalid. + >>> displaymatch(valid_hand_re.search("727ak")) # Valid. "" That last hand, ``"727ak"``, contained a pair, or two of the same valued cards. To match this with a regular expression, one could use backreferences as such:: - >>> pair = re.compile(r"^.*(.).*\1") - >>> displaymatch(pair.search("717ak")) # Pair of 7s. + >>> pair_re = re.compile(r".*(.).*\1") + >>> displaymatch(pair_re.match("717ak")) # Pair of 7s. "" - >>> displaymatch(pair.search("718ak")) # No pairs. - >>> displaymatch(pair.search("354aa")) # Pair of aces. + >>> displaymatch(pair_re.match("718ak")) # No pairs. + >>> displaymatch(pair_re.match("354aa")) # Pair of aces. "" To find out what card the pair consists of, one could use the :meth:`~Match.group` method of the match object in the following manner:: - >>> pair = re.compile(r"^.*(.).*\1") - >>> pair.search("717ak").group(1) + >>> pair_re = re.compile(r".*(.).*\1") + >>> pair_re.prefixmatch("717ak").group(1) '7' - # Error because re.search() returns None, which doesn't have a group() method: - >>> pair.search("718ak").group(1) + # Error because prefixmatch() returns None, which doesn't have a group() method: + >>> pair_re.prefixmatch("718ak").group(1) Traceback (most recent call last): File "", line 1, in - re.search(r".*(.).*\1", "718ak").group(1) + pair_re.prefixmatch("718ak").group(1) AttributeError: 'NoneType' object has no attribute 'group' - >>> pair.search("354aa").group(1) + >>> pair_re.prefixmatch("354aa").group(1) 'a' +The examples above use :meth:`~Pattern.match` and :meth:`~Pattern.prefixmatch` +interchangeably because they are two names for the same method. +:meth:`~Pattern.prefixmatch` was added in Python 3.15 as a more explicit name; +use it when your code does not need to run on older Python versions. +:meth:`~Pattern.search` with a ``^`` or ``\A`` anchor is equivalent, but using +an explicit method name is clearer to readers of the code. +See :ref:`prefixmatch-vs-match` for more on this topic. + Simulating scanf() ^^^^^^^^^^^^^^^^^^ From d6090bb003f1f5f5144c5c541eaf97355f59537f Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 31 Jan 2026 16:10:04 -0800 Subject: [PATCH 7/7] Replace name-parsing examples with Monty Python references Using first_name/last_name patterns promotes the myth that names have simple, universal structures. Replace with: - "killer rabbit" with adjective/animal groups - "Norwegian Blue, pining for the fjords" for unlabeled groups --- Doc/library/re.rst | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/Doc/library/re.rst b/Doc/library/re.rst index 9f6efae32bbebe..ed071d21bef1c7 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -1446,15 +1446,15 @@ when there is no match, you can test whether there was a match with a simple If a group is contained in a part of the pattern that matched multiple times, the last match is returned. :: - >>> m = re.search(r"\A(\w+) (\w+)", "Isaac Newton, physicist") + >>> m = re.search(r"\A(\w+) (\w+)", "Norwegian Blue, pining for the fjords") >>> m.group(0) # The entire match - 'Isaac Newton' + 'Norwegian Blue' >>> m.group(1) # The first parenthesized subgroup. - 'Isaac' + 'Norwegian' >>> m.group(2) # The second parenthesized subgroup. - 'Newton' + 'Blue' >>> m.group(1, 2) # Multiple arguments give us a tuple. - ('Isaac', 'Newton') + ('Norwegian', 'Blue') If the regular expression uses the ``(?P...)`` syntax, the *groupN* arguments may also be strings identifying groups by their group name. If a @@ -1463,18 +1463,18 @@ when there is no match, you can test whether there was a match with a simple A moderately complicated example:: - >>> m = re.search(r"(?P\w+) (?P\w+)", "Malcolm Reynolds") - >>> m.group('first_name') - 'Malcolm' - >>> m.group('last_name') - 'Reynolds' + >>> m = re.search(r"(?P\w+) (?P\w+)", "killer rabbit") + >>> m.group('adjective') + 'killer' + >>> m.group('animal') + 'rabbit' Named groups can also be referred to by their index:: >>> m.group(1) - 'Malcolm' + 'killer' >>> m.group(2) - 'Reynolds' + 'rabbit' If a group matches multiple times, only the last match is accessible:: @@ -1488,21 +1488,21 @@ when there is no match, you can test whether there was a match with a simple This is identical to ``m.group(g)``. This allows easier access to an individual group from a match:: - >>> m = re.search(r"(\w+) (\w+)", "Isaac Newton, physicist") + >>> m = re.search(r"(\w+) (\w+)", "Norwegian Blue, pining for the fjords") >>> m[0] # The entire match - 'Isaac Newton' + 'Norwegian Blue' >>> m[1] # The first parenthesized subgroup. - 'Isaac' + 'Norwegian' >>> m[2] # The second parenthesized subgroup. - 'Newton' + 'Blue' Named groups are supported as well:: - >>> m = re.search(r"(?P\w+) (?P\w+)", "Isaac Newton") - >>> m['first_name'] - 'Isaac' - >>> m['last_name'] - 'Newton' + >>> m = re.search(r"(?P\w+) (?P\w+)", "killer rabbit") + >>> m['adjective'] + 'killer' + >>> m['animal'] + 'rabbit' .. versionadded:: 3.6 @@ -1536,9 +1536,9 @@ when there is no match, you can test whether there was a match with a simple the subgroup name. The *default* argument is used for groups that did not participate in the match; it defaults to ``None``. For example:: - >>> m = re.search(r"(?P\w+) (?P\w+)", "Malcolm Reynolds") + >>> m = re.search(r"(?P\w+) (?P\w+)", "killer rabbit") >>> m.groupdict() - {'first_name': 'Malcolm', 'last_name': 'Reynolds'} + {'adjective': 'killer', 'animal': 'rabbit'} .. method:: Match.start([group])