@@ -236,6 +236,16 @@ def test_symbolic_groups(self):
236236 re .compile (r'(?P<a>x)(?P=a)(?(a)y)' )
237237 re .compile (r'(?P<a1>x)(?P=a1)(?(a1)y)' )
238238 re .compile (r'(?P<a1>x)\1(?(1)y)' )
239+ re .compile (b'(?P<a1>x)(?P=a1)(?(a1)y)' )
240+ # New valid identifiers in Python 3
241+ re .compile ('(?P<µ>x)(?P=µ)(?(µ)y)' )
242+ re .compile ('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)' )
243+ # Support > 100 groups.
244+ pat = '|' .join ('x(?P<a%d>%x)y' % (i , i ) for i in range (1 , 200 + 1 ))
245+ pat = '(?:%s)(?(200)z|t)' % pat
246+ self .assertEqual (re .match (pat , 'xc8yz' ).span (), (0 , 5 ))
247+
248+ def test_symbolic_groups_errors (self ):
239249 self .checkPatternError (r'(?P<a>)(?P<a>)' ,
240250 "redefinition of group name 'a' as group 2; "
241251 "was group 1" )
@@ -261,16 +271,22 @@ def test_symbolic_groups(self):
261271 self .checkPatternError (r'(?(-1))' , "bad character in group name '-1'" , 3 )
262272 self .checkPatternError (r'(?(1a))' , "bad character in group name '1a'" , 3 )
263273 self .checkPatternError (r'(?(a.))' , "bad character in group name 'a.'" , 3 )
264- # New valid/invalid identifiers in Python 3
265- re .compile ('(?P<µ>x)(?P=µ)(?(µ)y)' )
266- re .compile ('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)' )
267274 self .checkPatternError ('(?P<©>x)' , "bad character in group name '©'" , 4 )
275+ self .checkPatternError ('(?P=©)' , "bad character in group name '©'" , 4 )
276+ self .checkPatternError ('(?(©)y)' , "bad character in group name '©'" , 3 )
277+
278+ def test_symbolic_refs (self ):
279+ self .assertEqual (re .sub ('(?P<a>x)|(?P<b>y)' , r'\g<b>' , 'xx' ), '' )
280+ self .assertEqual (re .sub ('(?P<a>x)|(?P<b>y)' , r'\2' , 'xx' ), '' )
281+ self .assertEqual (re .sub (b'(?P<a1>x)' , br'\g<a1>' , b'xx' ), b'xx' )
282+ # New valid identifiers in Python 3
283+ self .assertEqual (re .sub ('(?P<µ>x)' , r'\g<µ>' , 'xx' ), 'xx' )
284+ self .assertEqual (re .sub ('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)' , r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>' , 'xx' ), 'xx' )
268285 # Support > 100 groups.
269286 pat = '|' .join ('x(?P<a%d>%x)y' % (i , i ) for i in range (1 , 200 + 1 ))
270- pat = '(?:%s)(?(200)z|t)' % pat
271- self .assertEqual (re .match (pat , 'xc8yz' ).span (), (0 , 5 ))
287+ self .assertEqual (re .sub (pat , r'\g<200>' , 'xc8yzxc8y' ), 'c8zc8' )
272288
273- def test_symbolic_refs (self ):
289+ def test_symbolic_refs_errors (self ):
274290 self .checkTemplateError ('(?P<a>x)' , r'\g<a' , 'xx' ,
275291 'missing >, unterminated name' , 3 )
276292 self .checkTemplateError ('(?P<a>x)' , r'\g<' , 'xx' ,
@@ -288,18 +304,14 @@ def test_symbolic_refs(self):
288304 'invalid group reference 2' , 1 )
289305 with self .assertRaisesRegex (IndexError , "unknown group name 'ab'" ):
290306 re .sub ('(?P<a>x)' , r'\g<ab>' , 'xx' )
291- self .assertEqual (re .sub ('(?P<a>x)|(?P<b>y)' , r'\g<b>' , 'xx' ), '' )
292- self .assertEqual (re .sub ('(?P<a>x)|(?P<b>y)' , r'\2' , 'xx' ), '' )
293307 self .checkTemplateError ('(?P<a>x)' , r'\g<-1>' , 'xx' ,
294308 "bad character in group name '-1'" , 3 )
295- # New valid/invalid identifiers in Python 3
296- self .assertEqual (re .sub ('(?P<µ>x)' , r'\g<µ>' , 'xx' ), 'xx' )
297- self .assertEqual (re .sub ('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)' , r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>' , 'xx' ), 'xx' )
298309 self .checkTemplateError ('(?P<a>x)' , r'\g<©>' , 'xx' ,
299310 "bad character in group name '©'" , 3 )
300- # Support > 100 groups.
301- pat = '|' .join ('x(?P<a%d>%x)y' % (i , i ) for i in range (1 , 200 + 1 ))
302- self .assertEqual (re .sub (pat , r'\g<200>' , 'xc8yzxc8y' ), 'c8zc8' )
311+ self .checkTemplateError ('(?P<a>x)' , r'\g<㊀>' , 'xx' ,
312+ "bad character in group name '㊀'" , 3 )
313+ self .checkTemplateError ('(?P<a>x)' , r'\g<¹>' , 'xx' ,
314+ "bad character in group name '¹'" , 3 )
303315
304316 def test_re_subn (self ):
305317 self .assertEqual (re .subn ("(?i)b+" , "x" , "bbbb BBBB" ), ('x x' , 2 ))
@@ -561,9 +573,23 @@ def test_re_groupref_exists(self):
561573 pat = '(?:%s)(?(200)z)' % pat
562574 self .assertEqual (re .match (pat , 'xc8yz' ).span (), (0 , 5 ))
563575
564- self .checkPatternError (r'(?P<a>)(?(0))' , 'bad group number' , 10 )
576+ def test_re_groupref_exists_errors (self ):
577+ self .checkPatternError (r'(?P<a>)(?(0)a|b)' , 'bad group number' , 10 )
578+ self .checkPatternError (r'()(?(-1)a|b)' ,
579+ "bad character in group name '-1'" , 5 )
580+ self .checkPatternError (r'()(?(㊀)a|b)' ,
581+ "bad character in group name '㊀'" , 5 )
582+ self .checkPatternError (r'()(?(¹)a|b)' ,
583+ "bad character in group name '¹'" , 5 )
584+ self .checkPatternError (r'()(?(1' ,
585+ "missing ), unterminated name" , 5 )
586+ self .checkPatternError (r'()(?(1)a' ,
587+ "missing ), unterminated subpattern" , 2 )
565588 self .checkPatternError (r'()(?(1)a|b' ,
566589 'missing ), unterminated subpattern' , 2 )
590+ self .checkPatternError (r'()(?(1)a|b|c' ,
591+ 'conditional backref with more than '
592+ 'two branches' , 10 )
567593 self .checkPatternError (r'()(?(1)a|b|c)' ,
568594 'conditional backref with more than '
569595 'two branches' , 10 )
0 commit comments