gh-116738: Test re module for free threading

yoney · yoney · commit 81a459c57f36 · 2025-11-24T12:49:58.000-08:00
diff --git a/Lib/test/test_free_threading/test_re.py b/Lib/test/test_free_threading/test_re.py
@@ -0,0 +1,62 @@
+import re
+import unittest
+
+from test.support import threading_helper
+from test.support.threading_helper import run_concurrently
+
+
+NTHREADS = 10
+
+
+@threading_helper.requires_working_threading()
+class TestRe(unittest.TestCase):
+    def test_pattern_sub(self):
+        """Pattern substitution should work across threads"""
+        pattern = re.compile(r"\w+@\w+\.\w+")
+        text = "e-mail: test@python.org or user@pycon.org. " * 5
+        results = []
+
+        def worker():
+            substituted = pattern.sub("(redacted)", text)
+            results.append(substituted.count("(redacted)"))
+
+        run_concurrently(worker_func=worker, nthreads=NTHREADS)
+        self.assertEqual(results, [2 * 5] * NTHREADS)
+
+    def test_pattern_search(self):
+        """Pattern search should work across threads."""
+        emails = ["alice@python.org", "bob@pycon.org"] * 10
+        pattern = re.compile(r"\w+@\w+\.\w+")
+        results = []
+
+        def worker():
+            matches = [pattern.search(e).group() for e in emails]
+            results.append(len(matches))
+
+        run_concurrently(worker_func=worker, nthreads=NTHREADS)
+        self.assertEqual(results, [2 * 10] * NTHREADS)
+
+    def test_scanner_concurrent_access(self):
+        """Shared scanner should reject concurrent access."""
+        pattern = re.compile(r"\w+")
+        scanner = pattern.scanner("word " * 10)
+
+        def worker():
+            for _ in range(100):
+                try:
+                    scanner.search()
+                except ValueError as e:
+                    if "already executing" in str(e):
+                        pass
+                    else:
+                        raise
+
+        run_concurrently(worker_func=worker, nthreads=NTHREADS)
+        # This test has no assertions. Its purpose is to catch crashes and
+        # enable thread sanitizer to detect race conditions. While "already
+        # executing" errors are very likely, they're not guaranteed due to
+        # non-deterministic thread scheduling, so we can't assert errors > 0.
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c
@@ -2841,20 +2841,28 @@ scanner_dealloc(PyObject *self)
 static int
 scanner_begin(ScannerObject* self)
 {
+    int result;
+    Py_BEGIN_CRITICAL_SECTION(self);
     if (self->executing) {
         PyErr_SetString(PyExc_ValueError,
                         "regular expression scanner already executing");
-        return 0;
+        result = 0;
     }
-    self->executing = 1;
-    return 1;
+    else {
+        self->executing = 1;
+        result = 1;
+    }
+    Py_END_CRITICAL_SECTION();
+    return result;
 }
 
 static void
 scanner_end(ScannerObject* self)
 {
+    Py_BEGIN_CRITICAL_SECTION(self);
     assert(self->executing);
     self->executing = 0;
+    Py_END_CRITICAL_SECTION();
 }
 
 /*[clinic input]