Skip to content

Commit 962416f

Browse files
authored
Merge pull request #805 from asger-semmle/callback-taint-source
Approved by xiemaisi
2 parents 8b029a2 + ccbfaa7 commit 962416f

File tree

5 files changed

+181
-28
lines changed

5 files changed

+181
-28
lines changed

javascript/ql/src/semmle/javascript/dataflow/Configuration.qll

Lines changed: 62 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -660,28 +660,82 @@ private predicate flowThroughProperty(
660660
* All of this is done under configuration `cfg`, and `arg` flows along a path
661661
* summarized by `summary`, while `cb` is only tracked locally.
662662
*/
663-
private predicate higherOrderCall(
663+
private predicate summarizedHigherOrderCall(
664664
DataFlow::Node arg, DataFlow::Node cb, int i, DataFlow::Configuration cfg, PathSummary summary
665665
) {
666-
exists (Function f, DataFlow::InvokeNode outer, DataFlow::InvokeNode inner, int j,
667-
DataFlow::Node innerArg, DataFlow::ParameterNode cbParm, PathSummary oldSummary |
666+
exists(
667+
Function f, DataFlow::InvokeNode outer, DataFlow::InvokeNode inner, int j,
668+
DataFlow::Node innerArg, DataFlow::ParameterNode cbParm, PathSummary oldSummary
669+
|
668670
reachableFromInput(f, outer, arg, innerArg, cfg, oldSummary) and
669671
argumentPassing(outer, cb, f, cbParm) and
670-
innerArg = inner.getArgument(j) |
672+
innerArg = inner.getArgument(j)
673+
|
671674
// direct higher-order call
672675
cbParm.flowsTo(inner.getCalleeNode()) and
673676
i = j and
674677
summary = oldSummary
675678
or
676679
// indirect higher-order call
677-
exists (DataFlow::Node cbArg, PathSummary newSummary |
680+
exists(DataFlow::Node cbArg, PathSummary newSummary |
678681
cbParm.flowsTo(cbArg) and
679-
higherOrderCall(innerArg, cbArg, i, cfg, newSummary) and
682+
summarizedHigherOrderCall(innerArg, cbArg, i, cfg, newSummary) and
680683
summary = oldSummary.append(PathSummary::call()).append(newSummary)
681684
)
682685
)
683686
}
684687

688+
/**
689+
* Holds if `arg` is passed as the `i`th argument to `callback` through a callback invocation.
690+
*
691+
* This can be a summarized call, that is, `arg` and `callback` flow into a call,
692+
* `f(arg, callback)`, which performs the invocation.
693+
*
694+
* Alternatively, the callback can flow into a call `f(callback)` which itself provides the `arg`.
695+
* That is, `arg` refers to a value defined in `f` or one of its callees.
696+
*
697+
* In the latter case, the summary will consists of both a `return` and `call` step, for the following reasons:
698+
*
699+
* - Having `return` in the summary ensures that arguments passsed to `f` can't propagate back out along this edge.
700+
* This is, `arg` should be defined in `f` or one of its callees, since a context-dependent value (i.e. parameter)
701+
* should not propagate to every callback passed to `f`.
702+
* In reality, `arg` may refer to a parameter, but in that case, the `return` summary prevents the edge from ever
703+
* being used.
704+
*
705+
* - Having `call` in the summary ensures that values we propagate into the callback definition along this edge
706+
* can't propagate out to other callers of that function through a return statement.
707+
*
708+
* - The flow label mapping of the summary corresponds to the transformation from `arg` to the
709+
* invocation of the callback.
710+
*/
711+
predicate higherOrderCall(
712+
DataFlow::Node arg, DataFlow::SourceNode callback, int i, DataFlow::Configuration cfg,
713+
PathSummary summary
714+
) {
715+
// Summarized call
716+
exists(DataFlow::Node cb |
717+
summarizedHigherOrderCall(arg, cb, i, cfg, summary) and
718+
callback.flowsTo(cb)
719+
)
720+
or
721+
// Local invocation of a parameter
722+
isRelevant(arg, cfg) and
723+
exists(DataFlow::InvokeNode invoke |
724+
arg = invoke.getArgument(i) and
725+
invoke = callback.(DataFlow::ParameterNode).getACall() and
726+
summary = PathSummary::call()
727+
)
728+
or
729+
// Forwarding of the callback parameter (but not the argument).
730+
exists(DataFlow::Node cbArg, DataFlow::SourceNode innerCb, PathSummary oldSummary |
731+
higherOrderCall(arg, innerCb, i, cfg, oldSummary) and
732+
callStep(cbArg, innerCb) and
733+
callback.flowsTo(cbArg) and
734+
// Prepend a 'return' summary to prevent context-dependent values (i.e. parameters) from using this edge.
735+
summary = PathSummary::return().append(oldSummary)
736+
)
737+
}
738+
685739
/**
686740
* Holds if `pred` is passed as an argument to a function `f` which also takes a
687741
* callback parameter `cb` and then invokes `cb`, passing `pred` into parameter `succ`
@@ -693,12 +747,8 @@ private predicate higherOrderCall(
693747
private predicate flowIntoHigherOrderCall(
694748
DataFlow::Node pred, DataFlow::Node succ, DataFlow::Configuration cfg, PathSummary summary
695749
) {
696-
exists(
697-
DataFlow::Node fArg, DataFlow::FunctionNode cb,
698-
int i, PathSummary oldSummary
699-
|
700-
higherOrderCall(pred, fArg, i, cfg, oldSummary) and
701-
cb = fArg.getALocalSource() and
750+
exists(DataFlow::FunctionNode cb, int i, PathSummary oldSummary |
751+
higherOrderCall(pred, cb, i, cfg, oldSummary) and
702752
succ = cb.getParameter(i) and
703753
summary = oldSummary.append(PathSummary::call())
704754
)

javascript/ql/src/semmle/javascript/dataflow/TrackedNodes.qll

Lines changed: 49 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -216,28 +216,69 @@ private module NodeTracking {
216216
* invokes `cb`, passing `arg` as its `i`th argument. `arg` flows along a path summarized
217217
* by `summary`, while `cb` is only tracked locally.
218218
*/
219-
private predicate higherOrderCall(
219+
private predicate summarizedHigherOrderCall(
220220
DataFlow::Node arg, DataFlow::Node cb, int i, PathSummary summary
221221
) {
222-
exists (Function f, DataFlow::InvokeNode outer, DataFlow::InvokeNode inner, int j,
223-
DataFlow::Node innerArg, DataFlow::ParameterNode cbParm, PathSummary oldSummary |
222+
exists(
223+
Function f, DataFlow::InvokeNode outer, DataFlow::InvokeNode inner, int j,
224+
DataFlow::Node innerArg, DataFlow::ParameterNode cbParm, PathSummary oldSummary
225+
|
224226
reachableFromInput(f, outer, arg, innerArg, oldSummary) and
225227
argumentPassing(outer, cb, f, cbParm) and
226-
innerArg = inner.getArgument(j) |
228+
innerArg = inner.getArgument(j)
229+
|
227230
// direct higher-order call
228231
cbParm.flowsTo(inner.getCalleeNode()) and
229232
i = j and
230233
summary = oldSummary
231234
or
232235
// indirect higher-order call
233-
exists (DataFlow::Node cbArg, PathSummary newSummary |
236+
exists(DataFlow::Node cbArg, PathSummary newSummary |
234237
cbParm.flowsTo(cbArg) and
235-
higherOrderCall(innerArg, cbArg, i, newSummary) and
238+
summarizedHigherOrderCall(innerArg, cbArg, i, newSummary) and
236239
summary = oldSummary.append(PathSummary::call()).append(newSummary)
237240
)
238241
)
239242
}
240243

244+
/**
245+
* Holds if `arg` is passed as the `i`th argument to `callback` through a callback invocation.
246+
*
247+
* This can be a summarized call, that is, `arg` and `callback` flow into a call,
248+
* `f(arg, callback)`, which performs the invocation.
249+
*
250+
* Alternatively, the callback can flow into a call `f(callback)` which itself provides the `arg`.
251+
* That is, `arg` refers to a value defined in `f` or one of its callees.
252+
*/
253+
predicate higherOrderCall(
254+
DataFlow::Node arg, DataFlow::SourceNode callback, int i, PathSummary summary
255+
) {
256+
// Summarized call
257+
exists(DataFlow::Node cb |
258+
summarizedHigherOrderCall(arg, cb, i, summary) and
259+
callback.flowsTo(cb)
260+
)
261+
or
262+
// Local invocation of a parameter
263+
isRelevant(arg) and
264+
exists(DataFlow::InvokeNode invoke |
265+
arg = invoke.getArgument(i) and
266+
invoke = callback.(DataFlow::ParameterNode).getACall() and
267+
summary = PathSummary::call()
268+
)
269+
or
270+
// Forwarding of the callback parameter (but not the argument).
271+
// We use a return summary since flow moves back towards the call site.
272+
// This ensures that an argument that is only tainted in some contexts cannot flow
273+
// out to every callback.
274+
exists(DataFlow::Node cbArg, DataFlow::SourceNode innerCb, PathSummary oldSummary |
275+
higherOrderCall(arg, innerCb, i, oldSummary) and
276+
callStep(cbArg, innerCb) and
277+
callback.flowsTo(cbArg) and
278+
summary = PathSummary::return().append(oldSummary)
279+
)
280+
}
281+
241282
/**
242283
* Holds if `pred` is passed as an argument to a function `f` which also takes a
243284
* callback parameter `cb` and then invokes `cb`, passing `pred` into parameter `succ`
@@ -247,12 +288,8 @@ private module NodeTracking {
247288
private predicate flowIntoHigherOrderCall(
248289
DataFlow::Node pred, DataFlow::Node succ, PathSummary summary
249290
) {
250-
exists(
251-
DataFlow::Node fArg, DataFlow::FunctionNode cb,
252-
int i, PathSummary oldSummary
253-
|
254-
higherOrderCall(pred, fArg, i, oldSummary) and
255-
cb = fArg.getALocalSource() and
291+
exists(DataFlow::FunctionNode cb, int i, PathSummary oldSummary |
292+
higherOrderCall(pred, cb, i, oldSummary) and
256293
succ = cb.getParameter(i) and
257294
summary = oldSummary.append(PathSummary::call())
258295
)

javascript/ql/src/semmle/javascript/dataflow/internal/FlowSteps.qll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,7 @@ predicate argumentPassing(
109109
* Holds if there is a flow step from `pred` to `succ` through parameter passing
110110
* to a function call.
111111
*/
112-
predicate callStep(DataFlow::Node pred, DataFlow::Node succ) {
113-
argumentPassing(_, pred, _, succ)
114-
}
112+
predicate callStep(DataFlow::Node pred, DataFlow::Node succ) { argumentPassing(_, pred, _, succ) }
115113

116114
/**
117115
* Holds if there is a flow step from `pred` to `succ` through returning
@@ -258,12 +256,18 @@ predicate loadStep(DataFlow::Node pred, DataFlow::PropRead succ, string prop) {
258256
* invocation.
259257
*/
260258
predicate callback(DataFlow::Node arg, DataFlow::SourceNode cb) {
261-
exists (DataFlow::InvokeNode invk, DataFlow::ParameterNode cbParm, DataFlow::Node cbArg |
259+
exists(DataFlow::InvokeNode invk, DataFlow::ParameterNode cbParm, DataFlow::Node cbArg |
262260
arg = invk.getAnArgument() and
263261
cbParm.flowsTo(invk.getCalleeNode()) and
264262
callStep(cbArg, cbParm) and
265263
cb.flowsTo(cbArg)
266264
)
265+
or
266+
exists(DataFlow::ParameterNode cbParm, DataFlow::Node cbArg |
267+
callback(arg, cbParm) and
268+
callStep(cbArg, cbParm) and
269+
cb.flowsTo(cbArg)
270+
)
267271
}
268272

269273
/**

javascript/ql/test/library-tests/TaintTracking/BasicTaintTracking.expected

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,14 @@
11
| advanced-callgraph.js:2:13:2:20 | source() | advanced-callgraph.js:6:22:6:22 | v |
2+
| callbacks.js:4:6:4:13 | source() | callbacks.js:34:27:34:27 | x |
3+
| callbacks.js:4:6:4:13 | source() | callbacks.js:35:27:35:27 | x |
4+
| callbacks.js:5:6:5:13 | source() | callbacks.js:34:27:34:27 | x |
5+
| callbacks.js:5:6:5:13 | source() | callbacks.js:35:27:35:27 | x |
6+
| callbacks.js:25:16:25:23 | source() | callbacks.js:47:26:47:26 | x |
7+
| callbacks.js:25:16:25:23 | source() | callbacks.js:48:26:48:26 | x |
8+
| callbacks.js:37:17:37:24 | source() | callbacks.js:37:37:37:37 | x |
9+
| callbacks.js:44:17:44:24 | source() | callbacks.js:41:10:41:10 | x |
10+
| callbacks.js:50:18:50:25 | source() | callbacks.js:30:29:30:29 | y |
11+
| callbacks.js:51:18:51:25 | source() | callbacks.js:30:29:30:29 | y |
212
| constructor-calls.js:4:18:4:25 | source() | constructor-calls.js:18:8:18:14 | c.taint |
313
| constructor-calls.js:4:18:4:25 | source() | constructor-calls.js:22:8:22:19 | c_safe.taint |
414
| constructor-calls.js:10:16:10:23 | source() | constructor-calls.js:26:8:26:14 | d.taint |
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import * as dummy from 'dummy'; // treat as module
2+
3+
function provideTaint(cb) {
4+
cb(source());
5+
cb(source());
6+
}
7+
8+
function provideTaint2(cb) {
9+
provideTaint(cb);
10+
provideTaint(cb); // suppress precision gains from functions with unique call site
11+
}
12+
13+
function forwardTaint(x, cb) {
14+
cb(x);
15+
cb(x);
16+
}
17+
18+
function forwardTaint2(x, cb) {
19+
forwardTaint(x, cb);
20+
forwardTaint(x, cb);
21+
}
22+
23+
function middleSource(cb) {
24+
// The source occurs in-between the callback definition and the callback invocation.
25+
forwardTaint(source(), cb);
26+
}
27+
28+
function middleCallback(x) {
29+
// The callback definition occurs in-between the source and the callback invocation.
30+
forwardTaint(x, y => sink(y)); // NOT OK
31+
}
32+
33+
function test() {
34+
provideTaint2(x => sink(x)); // NOT OK
35+
provideTaint2(x => sink(x)); // NOT OK
36+
37+
forwardTaint2(source(), x => sink(x)); // NOT OK
38+
forwardTaint2("safe", x => sink(x)); // OK
39+
40+
function helper1(x) {
41+
sink(x); // NOT OK
42+
return x;
43+
}
44+
forwardTaint2(source(), helper1);
45+
sink(helper1("safe")); // OK
46+
47+
middleSource(x => sink(x)); // NOT OK
48+
middleSource(x => sink(x)); // NOT OK
49+
50+
middleCallback(source());
51+
middleCallback(source());
52+
}

0 commit comments

Comments
 (0)