@@ -186,7 +186,7 @@ abstract class RegexString extends Expr {
186186 )
187187 }
188188
189- /** Hold is a character set starts between `start` and `end`. */
189+ /** Holds if a character set starts between `start` and `end`. */
190190 predicate char_set_start ( int start , int end ) {
191191 this .char_set_start ( start ) = true and
192192 (
@@ -314,8 +314,10 @@ abstract class RegexString extends Expr {
314314 result = this .( Bytes ) .getS ( )
315315 }
316316
317+ /** Gets the `i`th character of this regex */
317318 string getChar ( int i ) { result = this .getText ( ) .charAt ( i ) }
318319
320+ /** Gets the `i`th character of this regex, unless it is part of an character escape sequence. */
319321 string nonEscapedCharAt ( int i ) {
320322 result = this .getText ( ) .charAt ( i ) and
321323 not exists ( int x , int y | this .escapedCharacter ( x , y ) and i in [ x .. y - 1 ] )
@@ -327,6 +329,9 @@ abstract class RegexString extends Expr {
327329
328330 private predicate isGroupStart ( int i ) { this .nonEscapedCharAt ( i ) = "(" and not this .inCharSet ( i ) }
329331
332+ /**
333+ * Holds if the `i`th character could not be parsed.
334+ */
330335 predicate failedToParse ( int i ) {
331336 exists ( this .getChar ( i ) ) and
332337 not exists ( int start , int end |
@@ -415,6 +420,9 @@ abstract class RegexString extends Expr {
415420 )
416421 }
417422
423+ /**
424+ * Holds if a simple or escaped character is found between `start` and `end`.
425+ */
418426 predicate character ( int start , int end ) {
419427 (
420428 this .simpleCharacter ( start , end ) and
@@ -426,12 +434,18 @@ abstract class RegexString extends Expr {
426434 not exists ( int x , int y | this .backreference ( x , y ) and x <= start and y >= end )
427435 }
428436
437+ /**
438+ * Holds if a normal character is found between `start` and `end`.
439+ */
429440 predicate normalCharacter ( int start , int end ) {
430441 end = start + 1 and
431442 this .character ( start , end ) and
432443 not this .specialCharacter ( start , end , _)
433444 }
434445
446+ /**
447+ * Holds if a special character is found between `start` and `end`.
448+ */
435449 predicate specialCharacter ( int start , int end , string char ) {
436450 not this .inCharSet ( start ) and
437451 this .character ( start , end ) and
@@ -490,7 +504,7 @@ abstract class RegexString extends Expr {
490504 this .specialCharacter ( start , end , _)
491505 }
492506
493- /** Whether the text in the range start,end is a group */
507+ /** Whether the text in the range ` start,end` is a group */
494508 predicate group ( int start , int end ) {
495509 this .groupContents ( start , end , _, _)
496510 or
@@ -609,19 +623,26 @@ abstract class RegexString extends Expr {
609623 this .simple_group_start ( start , end )
610624 }
611625
626+ /** Matches the start of a non-capturing group, e.g. `(?:` */
612627 private predicate non_capturing_group_start ( int start , int end ) {
613628 this .isGroupStart ( start ) and
614629 this .getChar ( start + 1 ) = "?" and
615630 this .getChar ( start + 2 ) = ":" and
616631 end = start + 3
617632 }
618633
634+ /** Matches the start of a simple group, e.g. `(a+)`. */
619635 private predicate simple_group_start ( int start , int end ) {
620636 this .isGroupStart ( start ) and
621637 this .getChar ( start + 1 ) != "?" and
622638 end = start + 1
623639 }
624640
641+ /**
642+ * Matches the start of a named group, such as:
643+ * - `(?<name>\w+)`
644+ * - `(?'name'\w+)`
645+ */
625646 private predicate named_group_start ( int start , int end ) {
626647 this .isGroupStart ( start ) and
627648 this .getChar ( start + 1 ) = "?" and
@@ -673,20 +694,23 @@ abstract class RegexString extends Expr {
673694 )
674695 }
675696
697+ /** Matches the start of a positive lookahead assertion, i.e. `(?=`. */
676698 private predicate lookahead_assertion_start ( int start , int end ) {
677699 this .isGroupStart ( start ) and
678700 this .getChar ( start + 1 ) = "?" and
679701 this .getChar ( start + 2 ) = "=" and
680702 end = start + 3
681703 }
682704
705+ /** Matches the start of a negative lookahead assertion, i.e. `(?!`. */
683706 private predicate negative_lookahead_assertion_start ( int start , int end ) {
684707 this .isGroupStart ( start ) and
685708 this .getChar ( start + 1 ) = "?" and
686709 this .getChar ( start + 2 ) = "!" and
687710 end = start + 3
688711 }
689712
713+ /** Matches the start of a positive lookbehind assertion, i.e. `(?<=`. */
690714 private predicate lookbehind_assertion_start ( int start , int end ) {
691715 this .isGroupStart ( start ) and
692716 this .getChar ( start + 1 ) = "?" and
@@ -695,6 +719,7 @@ abstract class RegexString extends Expr {
695719 end = start + 4
696720 }
697721
722+ /** Matches the start of a negative lookbehind assertion, i.e. `(?<!`. */
698723 private predicate negative_lookbehind_assertion_start ( int start , int end ) {
699724 this .isGroupStart ( start ) and
700725 this .getChar ( start + 1 ) = "?" and
@@ -703,26 +728,30 @@ abstract class RegexString extends Expr {
703728 end = start + 4
704729 }
705730
731+ /** Matches the start of a comment group, i.e. `(?#`. */
706732 private predicate comment_group_start ( int start , int end ) {
707733 this .isGroupStart ( start ) and
708734 this .getChar ( start + 1 ) = "?" and
709735 this .getChar ( start + 2 ) = "#" and
710736 end = start + 3
711737 }
712738
739+ /** Matches the contents of a group. */
713740 predicate groupContents ( int start , int end , int in_start , int in_end ) {
714741 this .group_start ( start , in_start ) and
715742 end = in_end + 1 and
716743 this .top_level ( in_start , in_end ) and
717744 this .isGroupEnd ( in_end )
718745 }
719746
747+ /** Matches a named backreference, e.g. `\k<foo>`. */
720748 private predicate named_backreference ( int start , int end , string name ) {
721749 this .named_backreference_start ( start , start + 4 ) and
722750 end = min ( int i | i > start + 4 and this .getChar ( i ) = ")" ) + 1 and
723751 name = this .getText ( ) .substring ( start + 4 , end - 2 )
724752 }
725753
754+ /** Matches a numbered backreference, e.g. `\1`. */
726755 private predicate numbered_backreference ( int start , int end , int value ) {
727756 this .escapingChar ( start ) and
728757 // starting with 0 makes it an octal escape
@@ -747,7 +776,7 @@ abstract class RegexString extends Expr {
747776 )
748777 }
749778
750- /** Whether the text in the range start,end is a back reference */
779+ /** Whether the text in the range ` start,end` is a back reference */
751780 predicate backreference ( int start , int end ) {
752781 this .numbered_backreference ( start , end , _)
753782 or
0 commit comments