Skip to content

Commit bdce247

Browse files
committed
C++: Add flow through arrays
This works by adding data-flow edges to skip over array expressions when reading from arrays. On the post-update side, there was already code to skip over array expressions when storing to arrays. That happens in `valueToUpdate` in `AddressFlow.qll`, which needed just a small tweak to support assignments with non-field expressions at the top-level LHS, like `*a = ...` or `a[0] = ...`. The new code in `AddressFlow.qll` is copy-pasted from `EscapesTree.qll`, and there is already a note in these files saying that they share a lot of code and must be maintained in sync.
1 parent 27b8dc2 commit bdce247

File tree

19 files changed

+380
-61
lines changed

19 files changed

+380
-61
lines changed

cpp/ql/src/semmle/code/cpp/dataflow/internal/AddressFlow.qll

Lines changed: 93 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ private predicate stdIdentityFunction(Function f) { f.hasQualifiedName("std", ["
2929
*/
3030
private predicate stdAddressOf(Function f) { f.hasQualifiedName("std", "addressof") }
3131

32-
private predicate lvalueToLvalueStep(Expr lvalueIn, Expr lvalueOut) {
32+
private predicate lvalueToLvalueStepPure(Expr lvalueIn, Expr lvalueOut) {
3333
lvalueIn.getConversion() = lvalueOut.(ParenthesisExpr)
3434
or
3535
// When an object is implicitly converted to a reference to one of its base
@@ -42,6 +42,10 @@ private predicate lvalueToLvalueStep(Expr lvalueIn, Expr lvalueOut) {
4242
// such casts.
4343
lvalueIn.getConversion() = lvalueOut and
4444
lvalueOut.(CStyleCast).isImplicit()
45+
}
46+
47+
private predicate lvalueToLvalueStep(Expr lvalueIn, Expr lvalueOut) {
48+
lvalueToLvalueStepPure(lvalueIn, lvalueOut)
4549
or
4650
// C++ only
4751
lvalueIn = lvalueOut.(PrefixCrementOperation).getOperand().getFullyConverted()
@@ -214,6 +218,69 @@ private predicate referenceToUpdate(Expr reference, Expr outer, ControlFlowNode
214218
)
215219
}
216220

221+
private predicate lvalueFromVariableAccess(VariableAccess va, Expr lvalue) {
222+
// Base case for non-reference types.
223+
lvalue = va and
224+
not va.getConversion() instanceof ReferenceDereferenceExpr
225+
or
226+
// Base case for reference types where we pretend that they are
227+
// non-reference types. The type of the target of `va` can be `ReferenceType`
228+
// or `FunctionReferenceType`.
229+
lvalue = va.getConversion().(ReferenceDereferenceExpr)
230+
or
231+
// lvalue -> lvalue
232+
exists(Expr prev |
233+
lvalueFromVariableAccess(va, prev) and
234+
lvalueToLvalueStep(prev, lvalue)
235+
)
236+
or
237+
// pointer -> lvalue
238+
exists(Expr prev |
239+
pointerFromVariableAccess(va, prev) and
240+
pointerToLvalueStep(prev, lvalue)
241+
)
242+
or
243+
// reference -> lvalue
244+
exists(Expr prev |
245+
referenceFromVariableAccess(va, prev) and
246+
referenceToLvalueStep(prev, lvalue)
247+
)
248+
}
249+
250+
private predicate pointerFromVariableAccess(VariableAccess va, Expr pointer) {
251+
// pointer -> pointer
252+
exists(Expr prev |
253+
pointerFromVariableAccess(va, prev) and
254+
pointerToPointerStep(prev, pointer)
255+
)
256+
or
257+
// reference -> pointer
258+
exists(Expr prev |
259+
referenceFromVariableAccess(va, prev) and
260+
referenceToPointerStep(prev, pointer)
261+
)
262+
or
263+
// lvalue -> pointer
264+
exists(Expr prev |
265+
lvalueFromVariableAccess(va, prev) and
266+
lvalueToPointerStep(prev, pointer)
267+
)
268+
}
269+
270+
private predicate referenceFromVariableAccess(VariableAccess va, Expr reference) {
271+
// reference -> reference
272+
exists(Expr prev |
273+
referenceFromVariableAccess(va, prev) and
274+
referenceToReferenceStep(prev, reference)
275+
)
276+
or
277+
// lvalue -> reference
278+
exists(Expr prev |
279+
lvalueFromVariableAccess(va, prev) and
280+
lvalueToReferenceStep(prev, reference)
281+
)
282+
}
283+
217284
/**
218285
* Holds if `node` is a control-flow node that may modify `inner` (or what it
219286
* points to) through `outer`. The two expressions may be `Conversion`s. Plain
@@ -236,7 +303,7 @@ predicate valueToUpdate(Expr inner, Expr outer, ControlFlowNode node) {
236303
(
237304
inner instanceof VariableAccess and
238305
// Don't track non-field assignments
239-
(assignmentTo(outer, _) implies inner instanceof FieldAccess)
306+
not (assignmentTo(outer, _) and outer.(VariableAccess).getTarget() instanceof StackVariable)
240307
or
241308
inner instanceof ThisExpr
242309
or
@@ -245,3 +312,27 @@ predicate valueToUpdate(Expr inner, Expr outer, ControlFlowNode node) {
245312
// can't do anything useful with those at the moment.
246313
)
247314
}
315+
316+
/**
317+
* Holds if `e` is a fully-converted expression that evaluates to an lvalue
318+
* derived from `va` and is used for reading from or assigning to. This is in
319+
* contrast with a variable access that is used for taking an address (`&x`)
320+
* or simply discarding its value (`x;`).
321+
*
322+
* This analysis does not propagate across assignments or calls, and unlike
323+
* `variableAccessedAsValue` in `semmle.code.cpp.dataflow.EscapesTree` it
324+
* propagates through array accesses but not field accesses. The analysis is
325+
* also not concerned with whether the lvalue `e` is converted to an rvalue --
326+
* to examine that, use the relevant member predicates on `Expr`.
327+
*
328+
* If `va` has reference type, the analysis concerns the value pointed to by
329+
* the reference rather than the reference itself. The expression `e` may be a
330+
* `Conversion`.
331+
*/
332+
predicate variablePartiallyAccessed(VariableAccess va, Expr e) {
333+
lvalueFromVariableAccess(va, e) and
334+
not lvalueToLvalueStepPure(e, _) and
335+
not lvalueToPointerStep(e, _) and
336+
not lvalueToReferenceStep(e, _) and
337+
not e = any(ExprInVoidContext eivc | e = eivc.getConversion*())
338+
}

cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowUtil.qll

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ private import cpp
66
private import semmle.code.cpp.dataflow.internal.FlowVar
77
private import semmle.code.cpp.models.interfaces.DataFlow
88
private import semmle.code.cpp.controlflow.Guards
9+
private import semmle.code.cpp.dataflow.internal.AddressFlow
910

1011
cached
1112
private newtype TNode =
@@ -610,6 +611,15 @@ private predicate exprToExprStep_nocfg(Expr fromExpr, Expr toExpr) {
610611
or
611612
toExpr.(AddressOfExpr).getOperand() = fromExpr
612613
or
614+
// This rule enables flow from an array to its elements. Example: `a` to
615+
// `a[i]` or `*a`, where `a` is an array type. It does not enable flow from a
616+
// pointer to its indirection as in `p[i]` where `p` is a pointer type.
617+
exists(Expr toConverted |
618+
variablePartiallyAccessed(fromExpr, toConverted) and
619+
toExpr = toConverted.getUnconverted() and
620+
not toExpr = fromExpr
621+
)
622+
or
613623
toExpr.(BuiltInOperationBuiltInAddressOf).getOperand() = fromExpr
614624
or
615625
// The following case is needed to track the qualifier object for flow

cpp/ql/test/library-tests/dataflow/dataflow-tests/clang.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,6 @@ void following_pointers(
4848

4949
int stackArray[2] = { source(), source() };
5050
stackArray[0] = source();
51-
sink(stackArray); // no flow
51+
sink(stackArray); // flow
5252
}
5353

cpp/ql/test/library-tests/dataflow/dataflow-tests/localFlow.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
| example.c:24:13:24:18 | coords [post update] | example.c:26:19:26:24 | coords |
1414
| example.c:24:13:24:30 | ... = ... | example.c:24:2:24:30 | ... = ... |
1515
| example.c:24:13:24:30 | ... = ... | example.c:24:20:24:20 | y [post update] |
16+
| example.c:24:20:24:20 | y | example.c:24:13:24:30 | ... = ... |
1617
| example.c:24:24:24:30 | ... + ... | example.c:24:13:24:30 | ... = ... |
1718
| example.c:26:2:26:25 | ... = ... | example.c:26:9:26:9 | x [post update] |
1819
| example.c:26:13:26:16 | call to getX | example.c:26:2:26:25 | ... = ... |

cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ void intPointerSourceCaller2() {
428428
int local[1];
429429
intPointerSource(local);
430430
sink(local); // tainted
431-
sink(*local); // clean
431+
sink(*local); // tainted
432432
}
433433

434434
void intArraySourceCaller() {
@@ -441,7 +441,7 @@ void intArraySourceCaller2() {
441441
int local[2];
442442
intArraySource(local, 2);
443443
sink(local); // tainted
444-
sink(*local); // clean
444+
sink(*local); // tainted
445445
}
446446

447447
///////////////////////////////////////////////////////////////////////////////
@@ -468,5 +468,5 @@ void intOutparamSource(int *p) {
468468
void viaOutparam() {
469469
int x = 0;
470470
intOutparamSource(&x);
471-
sink(x); // tainted [FALSE NEGATIVE]
471+
sink(x); // tainted
472472
}

cpp/ql/test/library-tests/dataflow/dataflow-tests/test.expected

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
| clang.cpp:30:27:30:34 | call to getFirst | clang.cpp:28:27:28:32 | call to source |
1717
| clang.cpp:37:10:37:11 | m2 | clang.cpp:34:32:34:37 | call to source |
1818
| clang.cpp:45:17:45:18 | m2 | clang.cpp:43:35:43:40 | call to source |
19+
| clang.cpp:51:8:51:17 | stackArray | clang.cpp:50:19:50:24 | call to source |
1920
| dispatch.cpp:11:38:11:38 | x | dispatch.cpp:37:19:37:24 | call to source |
2021
| dispatch.cpp:11:38:11:38 | x | dispatch.cpp:45:18:45:23 | call to source |
2122
| dispatch.cpp:35:16:35:25 | call to notSource1 | dispatch.cpp:9:37:9:42 | call to source |
@@ -79,12 +80,17 @@
7980
| test.cpp:424:8:424:12 | local | test.cpp:423:20:423:25 | ref arg & ... |
8081
| test.cpp:430:8:430:12 | local | test.cpp:428:7:428:11 | local |
8182
| test.cpp:430:8:430:12 | local | test.cpp:429:20:429:24 | ref arg local |
83+
| test.cpp:431:8:431:13 | * ... | test.cpp:428:7:428:11 | local |
84+
| test.cpp:431:8:431:13 | * ... | test.cpp:429:20:429:24 | ref arg local |
8285
| test.cpp:437:8:437:12 | local | test.cpp:435:7:435:11 | local |
8386
| test.cpp:437:8:437:12 | local | test.cpp:436:18:436:23 | ref arg & ... |
8487
| test.cpp:443:8:443:12 | local | test.cpp:441:7:441:11 | local |
8588
| test.cpp:443:8:443:12 | local | test.cpp:442:18:442:22 | ref arg local |
89+
| test.cpp:444:8:444:13 | * ... | test.cpp:441:7:441:11 | local |
90+
| test.cpp:444:8:444:13 | * ... | test.cpp:442:18:442:22 | ref arg local |
8691
| test.cpp:450:9:450:22 | (statement expression) | test.cpp:449:26:449:32 | source1 |
8792
| test.cpp:461:8:461:12 | local | test.cpp:449:26:449:32 | source1 |
93+
| test.cpp:471:8:471:8 | x | test.cpp:465:8:465:13 | call to source |
8894
| true_upon_entry.cpp:21:8:21:8 | x | true_upon_entry.cpp:17:11:17:16 | call to source |
8995
| true_upon_entry.cpp:29:8:29:8 | x | true_upon_entry.cpp:27:9:27:14 | call to source |
9096
| true_upon_entry.cpp:39:8:39:8 | x | true_upon_entry.cpp:33:11:33:16 | call to source |

cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
| BarrierGuard.cpp:60:11:60:16 | BarrierGuard.cpp:62:14:62:14 | AST only |
33
| clang.cpp:12:9:12:20 | clang.cpp:22:8:22:20 | AST only |
44
| clang.cpp:39:42:39:47 | clang.cpp:41:18:41:19 | IR only |
5+
| clang.cpp:50:19:50:24 | clang.cpp:51:8:51:17 | AST only |
56
| dispatch.cpp:16:37:16:42 | dispatch.cpp:32:16:32:24 | IR only |
67
| dispatch.cpp:16:37:16:42 | dispatch.cpp:40:15:40:23 | IR only |
78
| dispatch.cpp:22:37:22:42 | dispatch.cpp:31:16:31:24 | IR only |
@@ -41,11 +42,16 @@
4142
| test.cpp:422:7:422:11 | test.cpp:424:8:424:12 | AST only |
4243
| test.cpp:423:20:423:25 | test.cpp:424:8:424:12 | AST only |
4344
| test.cpp:428:7:428:11 | test.cpp:430:8:430:12 | AST only |
45+
| test.cpp:428:7:428:11 | test.cpp:431:8:431:13 | AST only |
4446
| test.cpp:429:20:429:24 | test.cpp:430:8:430:12 | AST only |
47+
| test.cpp:429:20:429:24 | test.cpp:431:8:431:13 | AST only |
4548
| test.cpp:435:7:435:11 | test.cpp:437:8:437:12 | AST only |
4649
| test.cpp:436:18:436:23 | test.cpp:437:8:437:12 | AST only |
4750
| test.cpp:441:7:441:11 | test.cpp:443:8:443:12 | AST only |
51+
| test.cpp:441:7:441:11 | test.cpp:444:8:444:13 | AST only |
4852
| test.cpp:442:18:442:22 | test.cpp:443:8:443:12 | AST only |
53+
| test.cpp:442:18:442:22 | test.cpp:444:8:444:13 | AST only |
54+
| test.cpp:465:8:465:13 | test.cpp:471:8:471:8 | AST only |
4955
| true_upon_entry.cpp:9:11:9:16 | true_upon_entry.cpp:13:8:13:8 | IR only |
5056
| true_upon_entry.cpp:62:11:62:16 | true_upon_entry.cpp:66:8:66:8 | IR only |
5157
| true_upon_entry.cpp:98:11:98:16 | true_upon_entry.cpp:105:8:105:8 | IR only |

cpp/ql/test/library-tests/dataflow/fields/arrays.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,17 @@ void *user_input(void);
44
void local_array() {
55
void *arr[10] = { 0 };
66
arr[0] = user_input();
7-
sink(arr[0]); // $ir $f-:ast
8-
sink(arr[1]);
9-
sink(*arr); // $ir $f-:ast
10-
sink(*&arr[0]); // $ir $f-:ast
7+
sink(arr[0]); // $ast,ir
8+
sink(arr[1]); // $f+:ast
9+
sink(*arr); // $ast,ir
10+
sink(*&arr[0]); // $ast,ir
1111
}
1212

1313
void local_array_convoluted_assign() {
1414
void *arr[10] = { 0 };
1515
*&arr[0] = user_input();
16-
sink(arr[0]); // $ir $f-:ast
17-
sink(arr[1]);
16+
sink(arr[0]); // $ast,ir
17+
sink(arr[1]); // $f+:ast
1818
}
1919

2020
struct inner {
@@ -34,14 +34,14 @@ struct outer {
3434

3535
void nested_array_1(outer o) {
3636
o.nested.arr[1].data = user_input();
37-
sink(o.nested.arr[1].data); // $ir $f-:ast
38-
sink(o.nested.arr[0].data);
37+
sink(o.nested.arr[1].data); // $ast,ir
38+
sink(o.nested.arr[0].data); // $f+:ast
3939
}
4040

4141
void nested_array_2(outer o) {
4242
o.indirect->arr[1].data = user_input();
43-
sink(o.indirect->arr[1].data); // $f-:ast,ir
44-
sink(o.indirect->arr[0].data);
43+
sink(o.indirect->arr[1].data); // $ast $f-:ir
44+
sink(o.indirect->arr[0].data); // $f+:ast
4545
}
4646

4747
void nested_array_3(outer o) {

cpp/ql/test/library-tests/dataflow/fields/by_reference.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,11 +109,11 @@ void test_outer_with_ptr(Outer *pouter) {
109109

110110
sink(outer.inner_nested.a); // $ast,ir
111111
sink(outer.inner_ptr->a); // $ast $f-:ir
112-
sink(outer.a); // $f-:ast,ir
112+
sink(outer.a); // $ast $f-:ir
113113

114114
sink(pouter->inner_nested.a); // $ast,ir
115115
sink(pouter->inner_ptr->a); // $ast $f-:ir
116-
sink(pouter->a); // $f-:ast,ir
116+
sink(pouter->a); // $ast $f-:ir
117117
}
118118

119119
void test_outer_with_ref(Outer *pouter) {

cpp/ql/test/library-tests/dataflow/fields/flow-diff.expected

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,17 @@
2121
| aliasing.cpp:79:11:79:20 | call to user_input | aliasing.cpp:80:12:80:13 | m1 | IR only |
2222
| aliasing.cpp:86:10:86:19 | call to user_input | aliasing.cpp:87:12:87:13 | m1 | IR only |
2323
| aliasing.cpp:98:10:98:19 | call to user_input | aliasing.cpp:102:8:102:10 | * ... | IR only |
24-
| arrays.cpp:6:12:6:21 | call to user_input | arrays.cpp:7:8:7:13 | access to array | IR only |
25-
| arrays.cpp:6:12:6:21 | call to user_input | arrays.cpp:9:8:9:11 | * ... | IR only |
26-
| arrays.cpp:6:12:6:21 | call to user_input | arrays.cpp:10:8:10:15 | * ... | IR only |
27-
| arrays.cpp:15:14:15:23 | call to user_input | arrays.cpp:16:8:16:13 | access to array | IR only |
28-
| arrays.cpp:36:26:36:35 | call to user_input | arrays.cpp:37:24:37:27 | data | IR only |
24+
| arrays.cpp:6:12:6:21 | call to user_input | arrays.cpp:8:8:8:13 | access to array | AST only |
25+
| arrays.cpp:15:14:15:23 | call to user_input | arrays.cpp:17:8:17:13 | access to array | AST only |
26+
| arrays.cpp:36:26:36:35 | call to user_input | arrays.cpp:38:24:38:27 | data | AST only |
27+
| arrays.cpp:42:29:42:38 | call to user_input | arrays.cpp:43:27:43:30 | data | AST only |
28+
| arrays.cpp:42:29:42:38 | call to user_input | arrays.cpp:44:27:44:30 | data | AST only |
2929
| by_reference.cpp:84:14:84:23 | call to user_input | by_reference.cpp:111:25:111:25 | a | AST only |
3030
| by_reference.cpp:84:14:84:23 | call to user_input | by_reference.cpp:115:27:115:27 | a | AST only |
3131
| by_reference.cpp:88:13:88:22 | call to user_input | by_reference.cpp:131:25:131:25 | a | AST only |
3232
| by_reference.cpp:88:13:88:22 | call to user_input | by_reference.cpp:135:27:135:27 | a | AST only |
33+
| by_reference.cpp:92:9:92:18 | call to user_input | by_reference.cpp:112:14:112:14 | a | AST only |
34+
| by_reference.cpp:92:9:92:18 | call to user_input | by_reference.cpp:116:16:116:16 | a | AST only |
3335
| by_reference.cpp:96:8:96:17 | call to user_input | by_reference.cpp:132:14:132:14 | a | AST only |
3436
| by_reference.cpp:96:8:96:17 | call to user_input | by_reference.cpp:136:16:136:16 | a | AST only |
3537
| complex.cpp:62:19:62:28 | call to user_input | complex.cpp:52:18:52:18 | call to b | AST only |

0 commit comments

Comments
 (0)