Skip to content

Commit 6b7f8dd

Browse files
committed
fix parse errors related to char escapes and char ranges
1 parent 1e048d8 commit 6b7f8dd

File tree

4 files changed

+32
-1
lines changed

4 files changed

+32
-1
lines changed

javascript/extractor/src/com/semmle/js/parser/RegExpParser.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import com.semmle.js.ast.regexp.ZeroWidthPositiveLookahead;
3636
import com.semmle.js.ast.regexp.ZeroWidthPositiveLookbehind;
3737
import java.util.ArrayList;
38+
import java.util.Arrays;
3839
import java.util.List;
3940

4041
/** A parser for ECMAScript 2018 regular expressions. */
@@ -499,7 +500,11 @@ private RegExpTerm parseCharacterClass() {
499500
private RegExpTerm parseCharacterClassElement() {
500501
SourceLocation loc = new SourceLocation(pos());
501502
RegExpTerm atom = this.parseCharacterClassAtom();
502-
if (!this.lookahead("-]") && this.match("-"))
503+
for (String c : Arrays.asList("d", "D", "s", "S", "w", "W")) {
504+
if (this.lookahead("-\\" + c))
505+
return atom;
506+
}
507+
if (!this.lookahead("-]") && this.match("-") && !(atom instanceof CharacterClassEscape))
503508
return this.finishTerm(new CharacterClassRange(loc, atom, this.parseCharacterClassAtom()));
504509
return atom;
505510
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
range
2+
| tst.js:1:13:1:17 | [w-z] | tst.js:1:14:1:16 | w-z |
3+
| tst.js:6:13:6:19 | [\\n-\\r] | tst.js:6:14:6:18 | \\n-\\r |
4+
| tst.js:7:13:7:18 | [\\n-z] | tst.js:7:14:7:17 | \\n-z |
5+
escapeClass
6+
| tst.js:2:13:2:16 | [\\w] | tst.js:2:14:2:15 | \\w |
7+
| tst.js:3:13:3:18 | [\\w-z] | tst.js:3:14:3:15 | \\w |
8+
| tst.js:4:13:4:19 | [\\w-\\w] | tst.js:4:14:4:15 | \\w |
9+
| tst.js:4:13:4:19 | [\\w-\\w] | tst.js:4:17:4:18 | \\w |
10+
| tst.js:5:13:5:18 | [z-\\w] | tst.js:5:16:5:17 | \\w |
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import javascript
2+
3+
query predicate range(RegExpCharacterClass cla, RegExpCharacterRange range) {
4+
cla.getAChild() = range
5+
}
6+
7+
query predicate escapeClass(RegExpCharacterClass cla, RegExpCharacterClassEscape escape) {
8+
cla.getAChild() = escape
9+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
var reg1 = /[w-z]/; // normal range w-z, matches: wxyz
2+
var reg2 = /[\w]/; // escape class, same as \w.
3+
var reg3 = /[\w-z]/; // escape class \w and "-" and "z", same as [a-zA-Z0-9\-z]
4+
var reg4 = /[\w-\w]/; // escape class \w (twice) and the char "-".
5+
var reg5 = /[z-\w]/; // same as reg3
6+
var reg6 = /[\n-\r]/; // from \n (code 10) to \r (code 13).
7+
var reg7 = /[\n-z]/; // from \n (code 10) to z (code 122).

0 commit comments

Comments
 (0)