@@ -188,7 +188,7 @@ abstract class RegexString extends Expr {
188188 )
189189 }
190190
191- /** Hold is a character set starts between `start` and `end`. */
191+ /** Holds if a character set starts between `start` and `end`. */
192192 predicate char_set_start ( int start , int end ) {
193193 this .char_set_start ( start ) = true and
194194 (
@@ -316,8 +316,10 @@ abstract class RegexString extends Expr {
316316 result = this .( Bytes ) .getS ( )
317317 }
318318
319+ /** Gets the `i`th character of this regex */
319320 string getChar ( int i ) { result = this .getText ( ) .charAt ( i ) }
320321
322+ /** Gets the `i`th character of this regex, unless it is part of a character escape sequence. */
321323 string nonEscapedCharAt ( int i ) {
322324 result = this .getText ( ) .charAt ( i ) and
323325 not exists ( int x , int y | this .escapedCharacter ( x , y ) and i in [ x .. y - 1 ] )
@@ -329,6 +331,9 @@ abstract class RegexString extends Expr {
329331
330332 private predicate isGroupStart ( int i ) { this .nonEscapedCharAt ( i ) = "(" and not this .inCharSet ( i ) }
331333
334+ /**
335+ * Holds if the `i`th character could not be parsed.
336+ */
332337 predicate failedToParse ( int i ) {
333338 exists ( this .getChar ( i ) ) and
334339 not exists ( int start , int end |
@@ -417,6 +422,9 @@ abstract class RegexString extends Expr {
417422 )
418423 }
419424
425+ /**
426+ * Holds if a simple or escaped character is found between `start` and `end`.
427+ */
420428 predicate character ( int start , int end ) {
421429 (
422430 this .simpleCharacter ( start , end ) and
@@ -428,12 +436,18 @@ abstract class RegexString extends Expr {
428436 not exists ( int x , int y | this .backreference ( x , y ) and x <= start and y >= end )
429437 }
430438
439+ /**
440+ * Holds if a normal character is found between `start` and `end`.
441+ */
431442 predicate normalCharacter ( int start , int end ) {
432443 end = start + 1 and
433444 this .character ( start , end ) and
434445 not this .specialCharacter ( start , end , _)
435446 }
436447
448+ /**
449+ * Holds if a special character is found between `start` and `end`.
450+ */
437451 predicate specialCharacter ( int start , int end , string char ) {
438452 not this .inCharSet ( start ) and
439453 this .character ( start , end ) and
@@ -492,7 +506,7 @@ abstract class RegexString extends Expr {
492506 this .specialCharacter ( start , end , _)
493507 }
494508
495- /** Whether the text in the range start,end is a group */
509+ /** Whether the text in the range ` start,end` is a group */
496510 predicate group ( int start , int end ) {
497511 this .groupContents ( start , end , _, _)
498512 or
@@ -611,19 +625,26 @@ abstract class RegexString extends Expr {
611625 this .simple_group_start ( start , end )
612626 }
613627
628+ /** Matches the start of a non-capturing group, e.g. `(?:` */
614629 private predicate non_capturing_group_start ( int start , int end ) {
615630 this .isGroupStart ( start ) and
616631 this .getChar ( start + 1 ) = "?" and
617632 this .getChar ( start + 2 ) = ":" and
618633 end = start + 3
619634 }
620635
636+ /** Matches the start of a simple group, e.g. `(a+)`. */
621637 private predicate simple_group_start ( int start , int end ) {
622638 this .isGroupStart ( start ) and
623639 this .getChar ( start + 1 ) != "?" and
624640 end = start + 1
625641 }
626642
643+ /**
644+ * Matches the start of a named group, such as:
645+ * - `(?<name>\w+)`
646+ * - `(?'name'\w+)`
647+ */
627648 private predicate named_group_start ( int start , int end ) {
628649 this .isGroupStart ( start ) and
629650 this .getChar ( start + 1 ) = "?" and
@@ -675,20 +696,23 @@ abstract class RegexString extends Expr {
675696 )
676697 }
677698
699+ /** Matches the start of a positive lookahead assertion, i.e. `(?=`. */
678700 private predicate lookahead_assertion_start ( int start , int end ) {
679701 this .isGroupStart ( start ) and
680702 this .getChar ( start + 1 ) = "?" and
681703 this .getChar ( start + 2 ) = "=" and
682704 end = start + 3
683705 }
684706
707+ /** Matches the start of a negative lookahead assertion, i.e. `(?!`. */
685708 private predicate negative_lookahead_assertion_start ( int start , int end ) {
686709 this .isGroupStart ( start ) and
687710 this .getChar ( start + 1 ) = "?" and
688711 this .getChar ( start + 2 ) = "!" and
689712 end = start + 3
690713 }
691714
715+ /** Matches the start of a positive lookbehind assertion, i.e. `(?<=`. */
692716 private predicate lookbehind_assertion_start ( int start , int end ) {
693717 this .isGroupStart ( start ) and
694718 this .getChar ( start + 1 ) = "?" and
@@ -697,6 +721,7 @@ abstract class RegexString extends Expr {
697721 end = start + 4
698722 }
699723
724+ /** Matches the start of a negative lookbehind assertion, i.e. `(?<!`. */
700725 private predicate negative_lookbehind_assertion_start ( int start , int end ) {
701726 this .isGroupStart ( start ) and
702727 this .getChar ( start + 1 ) = "?" and
@@ -705,26 +730,30 @@ abstract class RegexString extends Expr {
705730 end = start + 4
706731 }
707732
733+ /** Matches the start of a comment group, i.e. `(?#`. */
708734 private predicate comment_group_start ( int start , int end ) {
709735 this .isGroupStart ( start ) and
710736 this .getChar ( start + 1 ) = "?" and
711737 this .getChar ( start + 2 ) = "#" and
712738 end = start + 3
713739 }
714740
741+ /** Matches the contents of a group. */
715742 predicate groupContents ( int start , int end , int in_start , int in_end ) {
716743 this .group_start ( start , in_start ) and
717744 end = in_end + 1 and
718745 this .top_level ( in_start , in_end ) and
719746 this .isGroupEnd ( in_end )
720747 }
721748
749+ /** Matches a named backreference, e.g. `\k<foo>`. */
722750 private predicate named_backreference ( int start , int end , string name ) {
723751 this .named_backreference_start ( start , start + 4 ) and
724752 end = min ( int i | i > start + 4 and this .getChar ( i ) = ")" ) + 1 and
725753 name = this .getText ( ) .substring ( start + 4 , end - 2 )
726754 }
727755
756+ /** Matches a numbered backreference, e.g. `\1`. */
728757 private predicate numbered_backreference ( int start , int end , int value ) {
729758 this .escapingChar ( start ) and
730759 // starting with 0 makes it an octal escape
@@ -749,7 +778,7 @@ abstract class RegexString extends Expr {
749778 )
750779 }
751780
752- /** Whether the text in the range start,end is a back reference */
781+ /** Whether the text in the range ` start,end` is a back reference */
753782 predicate backreference ( int start , int end ) {
754783 this .numbered_backreference ( start , end , _)
755784 or
0 commit comments