Skip to content

Commit 5fb22ba

Browse files
author
Max Schaefer
committed
JavaScript: Handle zero-width assertions and sequences.
1 parent ec9a3c8 commit 5fb22ba

File tree

1 file changed

+25
-5
lines changed

1 file changed

+25
-5
lines changed

javascript/ql/src/RegExp/IdentityReplacement.ql

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,36 @@ predicate matchesString(Expr e, string s) {
2727
}
2828

2929
/**
30-
* Holds if `t` matches `c` and nothing else.
30+
* Holds if `t` matches `s` and nothing else.
3131
*/
32-
predicate matchesConstant(RegExpTerm t, RegExpConstant c) {
33-
c = t
32+
language[monotonicAggregates]
33+
predicate regExpMatchesString(RegExpTerm t, string s) {
34+
// constants match themselves
35+
s = t.(RegExpConstant).getValue()
3436
or
35-
matchesConstant(t.(RegExpGroup).getAChild(), c)
37+
// assertions match the empty string
38+
(t instanceof RegExpCaret or
39+
t instanceof RegExpDollar or
40+
t instanceof RegExpWordBoundary or
41+
t instanceof RegExpNonWordBoundary or
42+
t instanceof RegExpLookahead or
43+
t instanceof RegExpLookbehind) and
44+
s = ""
3645
or
46+
// groups match their content
47+
regExpMatchesString(t.(RegExpGroup).getAChild(), s)
48+
or
49+
// single-character classes match that character
3750
exists (RegExpCharacterClass recc | recc = t and not recc.isInverted() |
3851
recc.getNumChild() = 1 and
39-
matchesConstant(recc.getChild(0), c)
52+
regExpMatchesString(recc.getChild(0), s)
53+
)
54+
or
55+
// sequences match the concatenation of their elements
56+
exists (RegExpSequence seq | seq = t |
57+
s = concat(int i, RegExpTerm child | child = seq.getChild(i) |
58+
any(string subs | regExpMatchesString(child, subs)) order by i
59+
)
4060
)
4161
}
4262

0 commit comments

Comments
 (0)