Skip to content

Commit 4c9edaf

Browse files
author
Max Schaefer
authored
Merge pull request #1211 from esben-semmle/js/type-tracking-for-incomplete-hostname-regexp
JS: type tracking for js/incomplete-hostname-regexp
2 parents ce53a7d + 2d66069 commit 4c9edaf

File tree

10 files changed

+150
-104
lines changed

10 files changed

+150
-104
lines changed

change-notes/1.21/analysis-javascript.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
| Client-side URL redirect | More results and fewer false-positive results | This rule now recognizes additional uses of the document URL. This rule now treats URLs as safe in more cases where the hostname cannot be tampered with. |
3030
| Double escaping or unescaping | More results | This rule now considers the flow of regular expressions literals. |
3131
| Expression has no effect | Fewer false-positive results | This rule now treats uses of `Object.defineProperty` more conservatively. |
32+
| Incomplete regular expression for hostnames | More results | This rule now tracks regular expressions for host names further. |
3233
| Incomplete string escaping or encoding | More results | This rule now considers the flow of regular expressions literals. |
3334
| Replacement of a substring with itself | More results | This rule now considers the flow of regular expressions literals. |
3435
| Server-side URL redirect | Fewer false-positive results | This rule now treats URLs as safe in more cases where the hostname cannot be tampered with. |

javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,14 @@
5959
<p>
6060

6161
Address this vulnerability by escaping <code>.</code>
62-
appropriately: <code>let regex = /(www|beta|)\.example\.com/</code>.
62+
appropriately: <code>let regex = /((www|beta)\.)?example\.com/</code>.
6363

6464
</p>
6565

6666
</example>
6767

6868
<references>
69+
<li>MDN: <a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions">Regular Expressions</a></li>
6970
<li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">SSRF</a></li>
7071
<li>OWASP: <a href="https://www.owasp.org/index.php/Unvalidated_Redirects_and_Forwards_Cheat_Sheet">XSS Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
7172
</references>

javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,35 @@
1313
import javascript
1414

1515
/**
16-
* A taint tracking configuration for incomplete hostname regular expressions sources.
16+
* Gets a node whose value may flow (inter-procedurally) to a position where it is interpreted
17+
* as a regular expression.
1718
*/
18-
class Configuration extends TaintTracking::Configuration {
19-
Configuration() { this = "IncompleteHostnameRegExpTracking" }
19+
DataFlow::Node regExpSource(DataFlow::Node re, DataFlow::TypeBackTracker t) {
20+
t.start() and
21+
re = result and
22+
isInterpretedAsRegExp(result)
23+
or
24+
exists(DataFlow::TypeBackTracker t2, DataFlow::Node succ | succ = regExpSource(re, t2) |
25+
t2 = t.smallstep(result, succ)
26+
or
27+
any(TaintTracking::AdditionalTaintStep dts).step(result, succ) and
28+
t = t2
29+
)
30+
}
2031

21-
override predicate isSource(DataFlow::Node source) {
22-
isIncompleteHostNameRegExpPattern(source.getStringValue(), _)
23-
}
32+
DataFlow::Node regExpSource(DataFlow::Node re) {
33+
result = regExpSource(re, DataFlow::TypeBackTracker::end())
34+
}
2435

25-
override predicate isSink(DataFlow::Node sink) { isInterpretedAsRegExp(sink) }
36+
/** Holds if `re` is a regular expression with value `pattern`. */
37+
predicate regexp(DataFlow::Node re, string pattern, string kind, DataFlow::Node aux) {
38+
re.asExpr().(RegExpLiteral).getValue() = pattern and
39+
kind = "regular expression" and
40+
aux = re
41+
or
42+
re = regExpSource(aux) and
43+
pattern = re.getStringValue() and
44+
kind = "string, which is used as a regular expression $@,"
2645
}
2746

2847
/**
@@ -36,22 +55,14 @@ predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
3655
// an unescaped single `.`
3756
"(?<!\\\\)[.]" +
3857
// immediately followed by a sequence of subdomains, perhaps with some regex characters mixed in, followed by a known TLD
39-
"([():|?a-z0-9-]+(\\\\)?[.](" + RegExpPatterns::commonTLD() + "))" + ".*", 1)
58+
"([():|?a-z0-9-]+(\\\\)?[.]" + RegExpPatterns::commonTLD() + ")" + ".*", 1)
4059
}
4160

42-
from Expr e, string pattern, string hostPart
43-
where
44-
(
45-
e.(RegExpLiteral).getValue() = pattern
46-
or
47-
exists(Configuration cfg |
48-
cfg.hasFlow(e.flow(), _) and
49-
e.mayHaveStringValue(pattern)
50-
)
51-
) and
61+
from DataFlow::Node re, string pattern, string hostPart, string kind, DataFlow::Node aux
62+
where regexp(re, pattern, kind, aux) and
5263
isIncompleteHostNameRegExpPattern(pattern, hostPart) and
5364
// ignore patterns with capture groups after the TLD
5465
not pattern.regexpMatch("(?i).*[.](" + RegExpPatterns::commonTLD() + ").*[(][?]:.*[)].*")
55-
select e,
56-
"This regular expression has an unescaped '.' before '" + hostPart +
57-
"', so it might match more hosts than expected."
66+
select re,
67+
"This " + kind + " has an unescaped '.' before '" + hostPart +
68+
"', so it might match more hosts than expected.", aux, "here"

javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.ql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ where
3838
(
3939
// target contains a domain on a common TLD, and perhaps some other URL components
4040
target
41-
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+(" + RegExpPatterns::commonTLD() +
42-
")(:[0-9]+)?/?")
41+
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::commonTLD() +
42+
"(:[0-9]+)?/?")
4343
or
4444
// target is a HTTP URL to a domain on any TLD
4545
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")

javascript/ql/src/Security/CWE-020/examples/IncompleteHostnameRegExp.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ app.get('/some/path', function(req, res) {
22
let url = req.param('url'),
33
host = urlLib.parse(url).host;
44
// BAD: the host of `url` may be controlled by an attacker
5-
let regex = /(www|beta|).example.com/;
5+
let regex = /((www|beta).)?example.com/;
66
if (host.match(regex)) {
77
res.redirect(url);
88
}

javascript/ql/src/semmle/javascript/Regexp.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -397,10 +397,10 @@ predicate isInterpretedAsRegExp(DataFlow::Node source) {
397397
*/
398398
module RegExpPatterns {
399399
/**
400-
* Gets a pattern that matches common top-level domain names.
400+
* Gets a pattern that matches common top-level domain names in lower case.
401401
*/
402402
string commonTLD() {
403403
// according to ranking by http://google.com/search?q=site:.<<TLD>>
404-
result = "com|org|edu|gov|uk|net|io"
404+
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
405405
}
406406
}

javascript/ql/src/semmle/javascript/dataflow/Sources.qll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -179,10 +179,7 @@ class SourceNode extends DataFlow::Node {
179179
*/
180180
pragma[inline]
181181
DataFlow::SourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) {
182-
exists(StepSummary summary |
183-
StepSummary::step(result, this, summary) and
184-
t = t2.prepend(summary)
185-
)
182+
t2 = t.step(result, this)
186183
}
187184
}
188185

javascript/ql/src/semmle/javascript/dataflow/TypeTracking.qll

Lines changed: 71 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,9 @@ class StepSummary extends TStepSummary {
4141
or
4242
this instanceof ReturnStep and result = "return"
4343
or
44-
exists(string prop | this = StoreStep(prop) |
45-
result = "store " + prop
46-
)
44+
exists(string prop | this = StoreStep(prop) | result = "store " + prop)
4745
or
48-
exists(string prop | this = LoadStep(prop) |
49-
result = "load" + prop
50-
)
46+
exists(string prop | this = LoadStep(prop) | result = "load " + prop)
5147
}
5248
}
5349

@@ -56,40 +52,44 @@ module StepSummary {
5652
* INTERNAL: Use `SourceNode.track()` or `SourceNode.backtrack()` instead.
5753
*/
5854
predicate step(DataFlow::SourceNode pred, DataFlow::SourceNode succ, StepSummary summary) {
59-
exists(DataFlow::Node predNode | pred.flowsTo(predNode) |
60-
// Flow through properties of objects
61-
propertyFlowStep(predNode, succ) and
62-
summary = LevelStep()
63-
or
64-
// Flow through global variables
65-
globalFlowStep(predNode, succ) and
66-
summary = LevelStep()
67-
or
68-
// Flow into function
69-
callStep(predNode, succ) and
70-
summary = CallStep()
71-
or
72-
// Flow out of function
73-
returnStep(predNode, succ) and
74-
summary = ReturnStep()
75-
or
76-
// Flow through an instance field between members of the same class
77-
DataFlow::localFieldStep(predNode, succ) and
78-
summary = LevelStep()
55+
exists(DataFlow::Node mid | pred.flowsTo(mid) | smallstep(mid, succ, summary))
56+
}
57+
58+
/**
59+
* INTERNAL: Use `TypeBackTracker.smallstep()` instead.
60+
*/
61+
predicate smallstep(DataFlow::Node pred, DataFlow::Node succ, StepSummary summary) {
62+
// Flow through properties of objects
63+
propertyFlowStep(pred, succ) and
64+
summary = LevelStep()
65+
or
66+
// Flow through global variables
67+
globalFlowStep(pred, succ) and
68+
summary = LevelStep()
69+
or
70+
// Flow into function
71+
callStep(pred, succ) and
72+
summary = CallStep()
73+
or
74+
// Flow out of function
75+
returnStep(pred, succ) and
76+
summary = ReturnStep()
77+
or
78+
// Flow through an instance field between members of the same class
79+
DataFlow::localFieldStep(pred, succ) and
80+
summary = LevelStep()
81+
or
82+
exists(string prop |
83+
basicStoreStep(pred, succ, prop) and
84+
summary = StoreStep(prop)
7985
or
80-
exists(string prop |
81-
basicStoreStep(predNode, succ, prop) and
82-
summary = StoreStep(prop)
83-
or
84-
loadStep(predNode, succ, prop) and
85-
summary = LoadStep(prop)
86-
)
86+
loadStep(pred, succ, prop) and
87+
summary = LoadStep(prop)
8788
)
8889
}
8990
}
9091

91-
private newtype TTypeTracker =
92-
MkTypeTracker(Boolean hasCall, OptionalPropertyName prop)
92+
private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalPropertyName prop)
9393

9494
/**
9595
* EXPERIMENTAL.
@@ -136,9 +136,7 @@ class TypeTracker extends TTypeTracker {
136136
or
137137
step = LoadStep(prop) and result = MkTypeTracker(hasCall, "")
138138
or
139-
exists(string p |
140-
step = StoreStep(p) and prop = "" and result = MkTypeTracker(hasCall, p)
141-
)
139+
exists(string p | step = StoreStep(p) and prop = "" and result = MkTypeTracker(hasCall, p))
142140
}
143141

144142
/** Gets a textual representation of this summary. */
@@ -180,8 +178,7 @@ module TypeTracker {
180178
TypeTracker end() { result.end() }
181179
}
182180

183-
private newtype TTypeBackTracker =
184-
MkTypeBackTracker(Boolean hasReturn, OptionalPropertyName prop)
181+
private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalPropertyName prop)
185182

186183
/**
187184
* EXPERIMENTAL.
@@ -192,7 +189,7 @@ private newtype TTypeBackTracker =
192189
* therefore expected to called with a certain type of value.
193190
*
194191
* Note that type back-tracking does not provide a source/sink relation, that is,
195-
* it may determine that a node will be used in an API call somwwhere, but it won't
192+
* it may determine that a node will be used in an API call somewhere, but it won't
196193
* determine exactly where that use was, or the path that led to the use.
197194
*
198195
* It is recommended that all uses of this type is written on the following form,
@@ -203,7 +200,7 @@ private newtype TTypeBackTracker =
203200
* result = (< some API call >).getArgument(< n >).getALocalSource()
204201
* or
205202
* exists (DataFlow::TypeBackTracker t2 |
206-
* result = myCallback(t2).backtrack(t2, t)
203+
* t2 = t.step(result, myCallback(t2))
207204
* )
208205
* }
209206
*
@@ -225,9 +222,7 @@ class TypeBackTracker extends TTypeBackTracker {
225222
or
226223
step = ReturnStep() and result = MkTypeBackTracker(true, prop)
227224
or
228-
exists(string p |
229-
step = LoadStep(p) and prop = "" and result = MkTypeBackTracker(hasReturn, p)
230-
)
225+
exists(string p | step = LoadStep(p) and prop = "" and result = MkTypeBackTracker(hasReturn, p))
231226
or
232227
step = StoreStep(prop) and result = MkTypeBackTracker(hasReturn, "")
233228
}
@@ -265,6 +260,37 @@ class TypeBackTracker extends TTypeBackTracker {
265260
* This predicate is only defined if the type has not been tracked into a property.
266261
*/
267262
TypeBackTracker continue() { prop = "" and result = this }
263+
264+
/**
265+
* Gets the summary that corresponds to having taken a backwards
266+
* heap and/or inter-procedural step from `succ` to `pred`.
267+
*/
268+
pragma[inline]
269+
TypeBackTracker step(DataFlow::SourceNode pred, DataFlow::SourceNode succ) {
270+
exists(StepSummary summary |
271+
StepSummary::step(pred, succ, summary) and
272+
this = result.prepend(summary)
273+
)
274+
}
275+
276+
/**
277+
* Gets the summary that corresponds to having taken a backwards
278+
* local, heap and/or inter-procedural step from `succ` to `pred`.
279+
*
280+
* Unlike `TypeBackTracker::step`, this predicate exposes all edges
281+
* in the flowgraph, and not just the edges between
282+
* `SourceNode`s. It may therefore be less performant.
283+
*/
284+
pragma[inline]
285+
TypeBackTracker smallstep(DataFlow::Node pred, DataFlow::Node succ) {
286+
exists(StepSummary summary |
287+
StepSummary::smallstep(pred, succ, summary) and
288+
this = result.prepend(summary)
289+
)
290+
or
291+
pred = succ.getAPredecessor() and
292+
this = result
293+
}
268294
}
269295

270296
module TypeBackTracker {

0 commit comments

Comments
 (0)