Skip to content

Commit 2576884

Browse files
authored
Merge pull request #1499 from markshannon/python-fix-regex-parsing
Python regex: Fix handling of character sets.
2 parents 7ff6d82 + 347e3f3 commit 2576884

File tree

8 files changed

+34
-3
lines changed

8 files changed

+34
-3
lines changed

python/ql/src/semmle/python/regex.qll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ abstract class RegexString extends Expr {
6868
/** Whether there is a character class, between start (inclusive) and end (exclusive) */
6969
predicate charSet(int start, int end) {
7070
exists(int inner_start, int inner_end |
71-
this.char_set_start(start, inner_start) |
71+
this.char_set_start(start, inner_start) and
72+
not this.char_set_start(_, start) |
7273
end = inner_end + 1 and inner_end > inner_start and
7374
this.nonEscapedCharAt(inner_end) = "]" and
7475
not exists(int mid | this.nonEscapedCharAt(mid) = "]" |

python/ql/test/library-tests/regex/Characters.expected

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@
5656
| \\A[+-]?\\d+ | 3 | 4 |
5757
| \\A[+-]?\\d+ | 4 | 5 |
5858
| \\A[+-]?\\d+ | 7 | 9 |
59+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 0 | 2 |
60+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 12 | 13 |
61+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 16 | 18 |
62+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 18 | 20 |
63+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 30 | 31 |
5964
| \\\|\\[\\][123]\|\\{\\} | 0 | 2 |
6065
| \\\|\\[\\][123]\|\\{\\} | 2 | 4 |
6166
| \\\|\\[\\][123]\|\\{\\} | 4 | 6 |

python/ql/test/library-tests/regex/FirstLast.expected

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@
4545
| \\A[+-]?\\d+ | first | 0 | 2 |
4646
| \\A[+-]?\\d+ | last | 7 | 9 |
4747
| \\A[+-]?\\d+ | last | 7 | 10 |
48+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | first | 0 | 2 |
49+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | last | 28 | 32 |
50+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | last | 28 | 33 |
4851
| \\\|\\[\\][123]\|\\{\\} | first | 0 | 2 |
4952
| \\\|\\[\\][123]\|\\{\\} | first | 12 | 14 |
5053
| \\\|\\[\\][123]\|\\{\\} | last | 6 | 11 |

python/ql/test/library-tests/regex/GroupContents.expected

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
| (?P<name>[\\w]+)\| | 0 | 15 | (?P<name>[\\w]+) | 9 | 14 | [\\w]+ |
1111
| (?m)^(?!$) | 5 | 10 | (?!$) | 8 | 9 | $ |
1212
| (\\033\|~{) | 0 | 9 | (\\033\|~{) | 1 | 8 | \\033\|~{ |
13+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 2 | 16 | (?P<txt>[^[]*) | 10 | 15 | [^[]* |
14+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 20 | 34 | (?P<uri>[^)]*) | 28 | 33 | [^)]* |
1315
| ^(^y\|^z)(u$\|v$)$ | 1 | 8 | (^y\|^z) | 2 | 7 | ^y\|^z |
1416
| ^(^y\|^z)(u$\|v$)$ | 8 | 15 | (u$\|v$) | 9 | 14 | u$\|v$ |
1517
| ^[A-Z_]+$(?<!not-this) | 9 | 22 | (?<!not-this) | 13 | 21 | not-this |
16-
| x\|(?<!\\w)l | 2 | 9 | (?<!\\w) | 6 | 8 | \\w |
18+
| x\|(?<!\\w)l | 2 | 9 | (?<!\\w) | 6 | 8 | \\w |

python/ql/test/library-tests/regex/Qualified.expected

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
| (?P<name>[\\w]+)\| | 9 | 14 | false |
77
| \\A[+-]?\\d+ | 2 | 7 | true |
88
| \\A[+-]?\\d+ | 7 | 10 | false |
9+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 10 | 15 | true |
10+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 28 | 33 | true |
911
| ^[A-Z_]+$(?<!not-this) | 1 | 8 | false |
1012
| ax{01,3} | 1 | 8 | false |
1113
| ax{3,} | 1 | 6 | false |
12-
| ax{,3} | 1 | 6 | true |
14+
| ax{,3} | 1 | 6 | true |

python/ql/test/library-tests/regex/Regex.expected

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,18 @@
121121
| \\A[+-]?\\d+ | qualified | 2 | 7 |
122122
| \\A[+-]?\\d+ | qualified | 7 | 10 |
123123
| \\A[+-]?\\d+ | sequence | 0 | 10 |
124+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char | 0 | 2 |
125+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char | 12 | 13 |
126+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char | 16 | 18 |
127+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char | 18 | 20 |
128+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char | 30 | 31 |
129+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char-set | 10 | 14 |
130+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char-set | 28 | 32 |
131+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | non-empty group | 2 | 16 |
132+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | non-empty group | 20 | 34 |
133+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | qualified | 10 | 15 |
134+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | qualified | 28 | 33 |
135+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | sequence | 0 | 34 |
124136
| \\\|\\[\\][123]\|\\{\\} | char | 0 | 2 |
125137
| \\\|\\[\\][123]\|\\{\\} | char | 2 | 4 |
126138
| \\\|\\[\\][123]\|\\{\\} | char | 4 | 6 |

python/ql/test/library-tests/regex/test.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,6 @@
5757

5858
#Named group with caret and empty choice.
5959
re.compile(r'(?:(?P<n1>^(?:|x)))')
60+
61+
#Misparsed on LGTM
62+
re.compile(r"\[(?P<txt>[^[]*)\]\((?P<uri>[^)]*)")

python/ql/test/query-tests/Expressions/Regex/test.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,3 +136,6 @@
136136

137137
#Named group with caret and empty choice.
138138
re.compile(r'(?:(?P<n1>^(?:|x)))')
139+
140+
#Potentially mis-parsed character set
141+
re.compile(r"\[(?P<txt>[^[]*)\]\((?P<uri>[^)]*)")

0 commit comments

Comments
 (0)