|
14 | 14 | */ |
15 | 15 |
|
16 | 16 | import javascript |
17 | | - |
18 | | -/** |
19 | | - * A regexp term that matches substrings that should be replaced with the empty string. |
20 | | - */ |
21 | | -class EmptyReplaceRegExpTerm extends RegExpTerm { |
22 | | - EmptyReplaceRegExpTerm() { |
23 | | - exists(StringReplaceCall replace | |
24 | | - [replace.getRawReplacement(), replace.getCallback(1).getAReturn()].mayHaveStringValue("") and |
25 | | - this = replace.getRegExp().getRoot().getAChild*() |
26 | | - ) |
27 | | - } |
28 | | -} |
29 | | - |
30 | | -/** |
31 | | - * A prefix that may be dangerous to sanitize explicitly. |
32 | | - * |
33 | | - * Note that this class exists solely as a (necessary) optimization for this query. |
34 | | - */ |
35 | | -class DangerousPrefix extends string { |
36 | | - DangerousPrefix() { |
37 | | - this = ["/..", "../"] or |
38 | | - this = "<!--" or |
39 | | - this = "<" + ["iframe", "script", "cript", "scrip", "style"] |
40 | | - } |
41 | | -} |
42 | | - |
43 | | -/** |
44 | | - * A substring of a prefix that may be dangerous to sanitize explicitly. |
45 | | - */ |
46 | | -class DangerousPrefixSubstring extends string { |
47 | | - DangerousPrefixSubstring() { |
48 | | - exists(DangerousPrefix s | this = s.substring([0 .. s.length()], [0 .. s.length()])) |
49 | | - } |
50 | | -} |
51 | | - |
52 | | -/** |
53 | | - * Gets a dangerous prefix that is in the prefix language of `t`. |
54 | | - */ |
55 | | -DangerousPrefix getADangerousMatchedPrefix(EmptyReplaceRegExpTerm t) { |
56 | | - result = getADangerousMatchedPrefixSubstring(t) and |
57 | | - not exists(EmptyReplaceRegExpTerm pred | pred = t.getPredecessor+() and not pred.isNullable()) |
58 | | -} |
59 | | - |
60 | | -private import semmle.javascript.security.regexp.NfaUtils as NfaUtils |
61 | | - |
62 | | -/** |
63 | | - * Gets a char from a dangerous prefix that is matched by `t`. |
64 | | - */ |
65 | | -pragma[noinline] |
66 | | -DangerousPrefixSubstring getADangerousMatchedChar(EmptyReplaceRegExpTerm t) { |
67 | | - t.isNullable() and result = "" |
68 | | - or |
69 | | - t.getAMatchedString() = result |
70 | | - or |
71 | | - // A substring matched by some character class. This is only used to match the "word" part of a HTML tag (e.g. "iframe" in "<iframe"). |
72 | | - exists(NfaUtils::CharacterClass cc | |
73 | | - cc = NfaUtils::getCanonicalCharClass(t) and |
74 | | - cc.matches(result) and |
75 | | - result.regexpMatch("\\w") and |
76 | | - // excluding character classes that match ">" (e.g. /<[^<]*>/), as these might consume nested HTML tags, and thus prevent the dangerous pattern this query is looking for. |
77 | | - not cc.matches(">") |
78 | | - ) |
79 | | - or |
80 | | - t instanceof RegExpDot and |
81 | | - result.length() = 1 |
82 | | - or |
83 | | - ( |
84 | | - t instanceof RegExpOpt or |
85 | | - t instanceof RegExpStar or |
86 | | - t instanceof RegExpPlus or |
87 | | - t instanceof RegExpGroup or |
88 | | - t instanceof RegExpAlt |
89 | | - ) and |
90 | | - result = getADangerousMatchedChar(t.getAChild()) |
91 | | -} |
92 | | - |
93 | | -/** |
94 | | - * Gets a substring of a dangerous prefix that is in the language starting at `t` (ignoring lookarounds). |
95 | | - * |
96 | | - * Note that the language of `t` is slightly restricted as not all RegExpTerm types are supported. |
97 | | - */ |
98 | | -DangerousPrefixSubstring getADangerousMatchedPrefixSubstring(EmptyReplaceRegExpTerm t) { |
99 | | - result = getADangerousMatchedChar(t) + getADangerousMatchedPrefixSubstring(t.getSuccessor()) |
100 | | - or |
101 | | - result = getADangerousMatchedChar(t) |
102 | | - or |
103 | | - // loop around for repetitions (only considering alphanumeric characters in the repetition) |
104 | | - exists(RepetitionMatcher repetition | t = repetition | |
105 | | - result = getADangerousMatchedPrefixSubstring(repetition) + repetition.getAChar() |
106 | | - ) |
107 | | -} |
108 | | - |
109 | | -class RepetitionMatcher extends EmptyReplaceRegExpTerm { |
110 | | - string char; |
111 | | - |
112 | | - pragma[noinline] |
113 | | - RepetitionMatcher() { |
114 | | - (this instanceof RegExpPlus or this instanceof RegExpStar) and |
115 | | - char = getADangerousMatchedChar(this.getAChild()) and |
116 | | - char.regexpMatch("\\w") |
117 | | - } |
118 | | - |
119 | | - pragma[noinline] |
120 | | - string getAChar() { result = char } |
121 | | -} |
122 | | - |
123 | | -/** |
124 | | - * Holds if `t` may match the dangerous `prefix` and some suffix, indicating intent to prevent a vulnerablity of kind `kind`. |
125 | | - */ |
126 | | -predicate matchesDangerousPrefix(EmptyReplaceRegExpTerm t, string prefix, string kind) { |
127 | | - prefix = getADangerousMatchedPrefix(t) and |
128 | | - ( |
129 | | - kind = "path injection" and |
130 | | - // upwards navigation |
131 | | - prefix = ["/..", "../"] and |
132 | | - not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_-].*") // explicit path name mentions make this an unlikely sanitizer |
133 | | - or |
134 | | - kind = "HTML element injection" and |
135 | | - ( |
136 | | - // comments |
137 | | - prefix = "<!--" and |
138 | | - not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_].*") // explicit comment content mentions make this an unlikely sanitizer |
139 | | - or |
140 | | - // specific tags |
141 | | - prefix = "<" + ["iframe", "script", "cript", "scrip", "style"] // the `cript|scrip` case has been observed in the wild several times |
142 | | - ) |
143 | | - ) |
144 | | - or |
145 | | - kind = "HTML attribute injection" and |
146 | | - prefix = |
147 | | - [ |
148 | | - // ordinary event handler prefix |
149 | | - "on", |
150 | | - // angular prefixes |
151 | | - "ng-", "ng:", "data-ng-", "x-ng-" |
152 | | - ] and |
153 | | - ( |
154 | | - // explicit matching: `onclick` and `ng-bind` |
155 | | - t.getAMatchedString().regexpMatch("(?i)" + prefix + "[a-z]+") |
156 | | - or |
157 | | - // regexp-based matching: `on[a-z]+` |
158 | | - exists(EmptyReplaceRegExpTerm start | start = t.getAChild() | |
159 | | - start.getConstantValue().regexpMatch("(?i)[^a-z]*" + prefix) and |
160 | | - isCommonWordMatcher(start.getSuccessor()) |
161 | | - ) |
162 | | - ) |
163 | | -} |
164 | | - |
165 | | -/** |
166 | | - * Holds if `t` is a common pattern for matching words |
167 | | - */ |
168 | | -predicate isCommonWordMatcher(RegExpTerm t) { |
169 | | - exists(RegExpTerm quantified | quantified = t.(RegExpQuantifier).getChild(0) | |
170 | | - // [a-z]+ and similar |
171 | | - quantified |
172 | | - .(RegExpCharacterClass) |
173 | | - .getAChild() |
174 | | - .(RegExpCharacterRange) |
175 | | - .isRange(["a", "A"], ["z", "Z"]) |
176 | | - or |
177 | | - // \w+ or [\w]+ |
178 | | - [quantified, quantified.(RegExpCharacterClass).getAChild()] |
179 | | - .(RegExpCharacterClassEscape) |
180 | | - .getValue() = "w" |
181 | | - ) |
182 | | -} |
| 17 | +private import semmle.javascript.security.IncompleteMultiCharacterSanitization |
183 | 18 |
|
184 | 19 | from |
185 | 20 | StringReplaceCall replace, EmptyReplaceRegExpTerm regexp, EmptyReplaceRegExpTerm dangerous, |
|
0 commit comments