@@ -1114,4 +1114,64 @@ module RegExp {
11141114 or
11151115 result = node .asExpr ( ) .( StringLiteral ) .asRegExp ( )
11161116 }
1117+
1118+ /**
1119+ * A character that will be analyzed by `RegExp::alwaysMatchesMetaCharacter`.
1120+ *
1121+ * Currently only `<`, `'`, and `"` are considered to be meta-characters, but new meta-characters
1122+ * can be added by subclassing this class.
1123+ */
1124+ abstract class MetaCharacter extends string {
1125+ bindingset [ this ]
1126+ MetaCharacter ( ) { any ( ) }
1127+
1128+ /**
1129+ * Holds if the given atomic term matches this meta-character.
1130+ *
1131+ * Does not hold for derived terms like alternatives and groups.
1132+ *
1133+ * By default, `.`, `\W`, `\S`, and `\D` are considered to match any meta-character,
1134+ * but the predicate can be overridden for meta-characters where this is not the case.
1135+ */
1136+ predicate matchedByAtom ( RegExpTerm term ) {
1137+ term .( RegExpConstant ) .getConstantValue ( ) = this
1138+ or
1139+ term instanceof RegExpDot
1140+ or
1141+ term .( RegExpCharacterClassEscape ) .getValue ( ) = [ "\\W" , "\\S" , "\\D" ]
1142+ or
1143+ exists ( string lo , string hi |
1144+ term .( RegExpCharacterRange ) .isRange ( lo , hi ) and
1145+ lo <= this and
1146+ this <= hi
1147+ )
1148+ }
1149+ }
1150+
1151+ private class DefaultMetaCharacter extends MetaCharacter {
1152+ DefaultMetaCharacter ( ) { this = [ "<" , "'" , "\"" ] }
1153+ }
1154+
1155+ /**
1156+ * Holds if `term` can match any occurence of `char` within a string (not taking into account
1157+ * the context in which `term` appears).
1158+ *
1159+ * This predicate is under-approximate and never considers sequences to guarantee a match.
1160+ */
1161+ predicate alwaysMatchesMetaCharacter ( RegExpTerm term , MetaCharacter char ) {
1162+ not term .getParent ( ) instanceof RegExpSequence and // restrict size of predicate
1163+ char .matchedByAtom ( term )
1164+ or
1165+ alwaysMatchesMetaCharacter ( term .( RegExpGroup ) .getAChild ( ) , char )
1166+ or
1167+ alwaysMatchesMetaCharacter ( term .( RegExpAlt ) .getAlternative ( ) , char )
1168+ or
1169+ exists ( RegExpCharacterClass class_ | term = class_ |
1170+ not class_ .isInverted ( ) and
1171+ char .matchedByAtom ( class_ .getAChild ( ) )
1172+ or
1173+ class_ .isInverted ( ) and
1174+ not char .matchedByAtom ( class_ .getAChild ( ) )
1175+ )
1176+ }
11171177}
0 commit comments