Skip to content

Commit c8a3baf

Browse files
authored
Merge pull request #4272 from jbj/dataflow-partial-access
C++: Add AST flow through arrays
2 parents 17bd678 + 7856083 commit c8a3baf

24 files changed

+668
-49
lines changed

cpp/ql/src/semmle/code/cpp/dataflow/internal/AddressFlow.qll

Lines changed: 93 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ private predicate stdIdentityFunction(Function f) { f.hasQualifiedName("std", ["
2929
*/
3030
private predicate stdAddressOf(Function f) { f.hasQualifiedName("std", "addressof") }
3131

32-
private predicate lvalueToLvalueStep(Expr lvalueIn, Expr lvalueOut) {
32+
private predicate lvalueToLvalueStepPure(Expr lvalueIn, Expr lvalueOut) {
3333
lvalueIn.getConversion() = lvalueOut.(ParenthesisExpr)
3434
or
3535
// When an object is implicitly converted to a reference to one of its base
@@ -42,6 +42,10 @@ private predicate lvalueToLvalueStep(Expr lvalueIn, Expr lvalueOut) {
4242
// such casts.
4343
lvalueIn.getConversion() = lvalueOut and
4444
lvalueOut.(CStyleCast).isImplicit()
45+
}
46+
47+
private predicate lvalueToLvalueStep(Expr lvalueIn, Expr lvalueOut) {
48+
lvalueToLvalueStepPure(lvalueIn, lvalueOut)
4549
or
4650
// C++ only
4751
lvalueIn = lvalueOut.(PrefixCrementOperation).getOperand().getFullyConverted()
@@ -214,6 +218,69 @@ private predicate referenceToUpdate(Expr reference, Expr outer, ControlFlowNode
214218
)
215219
}
216220

221+
private predicate lvalueFromVariableAccess(VariableAccess va, Expr lvalue) {
222+
// Base case for non-reference types.
223+
lvalue = va and
224+
not va.getConversion() instanceof ReferenceDereferenceExpr
225+
or
226+
// Base case for reference types where we pretend that they are
227+
// non-reference types. The type of the target of `va` can be `ReferenceType`
228+
// or `FunctionReferenceType`.
229+
lvalue = va.getConversion().(ReferenceDereferenceExpr)
230+
or
231+
// lvalue -> lvalue
232+
exists(Expr prev |
233+
lvalueFromVariableAccess(va, prev) and
234+
lvalueToLvalueStep(prev, lvalue)
235+
)
236+
or
237+
// pointer -> lvalue
238+
exists(Expr prev |
239+
pointerFromVariableAccess(va, prev) and
240+
pointerToLvalueStep(prev, lvalue)
241+
)
242+
or
243+
// reference -> lvalue
244+
exists(Expr prev |
245+
referenceFromVariableAccess(va, prev) and
246+
referenceToLvalueStep(prev, lvalue)
247+
)
248+
}
249+
250+
private predicate pointerFromVariableAccess(VariableAccess va, Expr pointer) {
251+
// pointer -> pointer
252+
exists(Expr prev |
253+
pointerFromVariableAccess(va, prev) and
254+
pointerToPointerStep(prev, pointer)
255+
)
256+
or
257+
// reference -> pointer
258+
exists(Expr prev |
259+
referenceFromVariableAccess(va, prev) and
260+
referenceToPointerStep(prev, pointer)
261+
)
262+
or
263+
// lvalue -> pointer
264+
exists(Expr prev |
265+
lvalueFromVariableAccess(va, prev) and
266+
lvalueToPointerStep(prev, pointer)
267+
)
268+
}
269+
270+
private predicate referenceFromVariableAccess(VariableAccess va, Expr reference) {
271+
// reference -> reference
272+
exists(Expr prev |
273+
referenceFromVariableAccess(va, prev) and
274+
referenceToReferenceStep(prev, reference)
275+
)
276+
or
277+
// lvalue -> reference
278+
exists(Expr prev |
279+
lvalueFromVariableAccess(va, prev) and
280+
lvalueToReferenceStep(prev, reference)
281+
)
282+
}
283+
217284
/**
218285
* Holds if `node` is a control-flow node that may modify `inner` (or what it
219286
* points to) through `outer`. The two expressions may be `Conversion`s. Plain
@@ -236,7 +303,7 @@ predicate valueToUpdate(Expr inner, Expr outer, ControlFlowNode node) {
236303
(
237304
inner instanceof VariableAccess and
238305
// Don't track non-field assignments
239-
(assignmentTo(outer, _) implies inner instanceof FieldAccess)
306+
not (assignmentTo(outer, _) and outer.(VariableAccess).getTarget() instanceof StackVariable)
240307
or
241308
inner instanceof ThisExpr
242309
or
@@ -245,3 +312,27 @@ predicate valueToUpdate(Expr inner, Expr outer, ControlFlowNode node) {
245312
// can't do anything useful with those at the moment.
246313
)
247314
}
315+
316+
/**
317+
* Holds if `e` is a fully-converted expression that evaluates to an lvalue
318+
* derived from `va` and is used for reading from or assigning to. This is in
319+
* contrast with a variable access that is used for taking an address (`&x`)
320+
* or simply discarding its value (`x;`).
321+
*
322+
* This analysis does not propagate across assignments or calls, and unlike
323+
* `variableAccessedAsValue` in `semmle.code.cpp.dataflow.EscapesTree` it
324+
* propagates through array accesses but not field accesses. The analysis is
325+
* also not concerned with whether the lvalue `e` is converted to an rvalue --
326+
* to examine that, use the relevant member predicates on `Expr`.
327+
*
328+
* If `va` has reference type, the analysis concerns the value pointed to by
329+
* the reference rather than the reference itself. The expression `e` may be a
330+
* `Conversion`.
331+
*/
332+
predicate variablePartiallyAccessed(VariableAccess va, Expr e) {
333+
lvalueFromVariableAccess(va, e) and
334+
not lvalueToLvalueStepPure(e, _) and
335+
not lvalueToPointerStep(e, _) and
336+
not lvalueToReferenceStep(e, _) and
337+
not e = any(ExprInVoidContext eivc | e = eivc.getConversion*())
338+
}

cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowUtil.qll

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ private import cpp
66
private import semmle.code.cpp.dataflow.internal.FlowVar
77
private import semmle.code.cpp.models.interfaces.DataFlow
88
private import semmle.code.cpp.controlflow.Guards
9+
private import semmle.code.cpp.dataflow.internal.AddressFlow
910

1011
cached
1112
private newtype TNode =
@@ -610,6 +611,15 @@ private predicate exprToExprStep_nocfg(Expr fromExpr, Expr toExpr) {
610611
or
611612
toExpr.(AddressOfExpr).getOperand() = fromExpr
612613
or
614+
// This rule enables flow from an array to its elements. Example: `a` to
615+
// `a[i]` or `*a`, where `a` is an array type. It does not enable flow from a
616+
// pointer to its indirection as in `p[i]` where `p` is a pointer type.
617+
exists(Expr toConverted |
618+
variablePartiallyAccessed(fromExpr, toConverted) and
619+
toExpr = toConverted.getUnconverted() and
620+
not toExpr = fromExpr
621+
)
622+
or
613623
toExpr.(BuiltInOperationBuiltInAddressOf).getOperand() = fromExpr
614624
or
615625
// The following case is needed to track the qualifier object for flow

cpp/ql/test/library-tests/dataflow/dataflow-tests/clang.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,6 @@ void following_pointers(
4848

4949
int stackArray[2] = { source(), source() };
5050
stackArray[0] = source();
51-
sink(stackArray); // no flow
51+
sink(stackArray); // flow
5252
}
5353

cpp/ql/test/library-tests/dataflow/dataflow-tests/localFlow.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
| example.c:24:13:24:18 | coords [post update] | example.c:26:19:26:24 | coords |
1414
| example.c:24:13:24:30 | ... = ... | example.c:24:2:24:30 | ... = ... |
1515
| example.c:24:13:24:30 | ... = ... | example.c:24:20:24:20 | y [post update] |
16+
| example.c:24:20:24:20 | y | example.c:24:13:24:30 | ... = ... |
1617
| example.c:24:24:24:30 | ... + ... | example.c:24:13:24:30 | ... = ... |
1718
| example.c:26:2:26:25 | ... = ... | example.c:26:9:26:9 | x [post update] |
1819
| example.c:26:13:26:16 | call to getX | example.c:26:2:26:25 | ... = ... |

cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ void intPointerSourceCaller2() {
428428
int local[1];
429429
intPointerSource(local);
430430
sink(local); // tainted
431-
sink(*local); // clean
431+
sink(*local); // tainted
432432
}
433433

434434
void intArraySourceCaller() {
@@ -441,7 +441,7 @@ void intArraySourceCaller2() {
441441
int local[2];
442442
intArraySource(local, 2);
443443
sink(local); // tainted
444-
sink(*local); // clean
444+
sink(*local); // tainted
445445
}
446446

447447
///////////////////////////////////////////////////////////////////////////////
@@ -468,5 +468,5 @@ void intOutparamSource(int *p) {
468468
void viaOutparam() {
469469
int x = 0;
470470
intOutparamSource(&x);
471-
sink(x); // tainted [FALSE NEGATIVE]
471+
sink(x); // tainted
472472
}

cpp/ql/test/library-tests/dataflow/dataflow-tests/test.expected

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
| clang.cpp:30:27:30:34 | call to getFirst | clang.cpp:28:27:28:32 | call to source |
1717
| clang.cpp:37:10:37:11 | m2 | clang.cpp:34:32:34:37 | call to source |
1818
| clang.cpp:45:17:45:18 | m2 | clang.cpp:43:35:43:40 | call to source |
19+
| clang.cpp:51:8:51:17 | stackArray | clang.cpp:50:19:50:24 | call to source |
1920
| dispatch.cpp:11:38:11:38 | x | dispatch.cpp:37:19:37:24 | call to source |
2021
| dispatch.cpp:11:38:11:38 | x | dispatch.cpp:45:18:45:23 | call to source |
2122
| dispatch.cpp:35:16:35:25 | call to notSource1 | dispatch.cpp:9:37:9:42 | call to source |
@@ -79,12 +80,17 @@
7980
| test.cpp:424:8:424:12 | local | test.cpp:423:20:423:25 | ref arg & ... |
8081
| test.cpp:430:8:430:12 | local | test.cpp:428:7:428:11 | local |
8182
| test.cpp:430:8:430:12 | local | test.cpp:429:20:429:24 | ref arg local |
83+
| test.cpp:431:8:431:13 | * ... | test.cpp:428:7:428:11 | local |
84+
| test.cpp:431:8:431:13 | * ... | test.cpp:429:20:429:24 | ref arg local |
8285
| test.cpp:437:8:437:12 | local | test.cpp:435:7:435:11 | local |
8386
| test.cpp:437:8:437:12 | local | test.cpp:436:18:436:23 | ref arg & ... |
8487
| test.cpp:443:8:443:12 | local | test.cpp:441:7:441:11 | local |
8588
| test.cpp:443:8:443:12 | local | test.cpp:442:18:442:22 | ref arg local |
89+
| test.cpp:444:8:444:13 | * ... | test.cpp:441:7:441:11 | local |
90+
| test.cpp:444:8:444:13 | * ... | test.cpp:442:18:442:22 | ref arg local |
8691
| test.cpp:450:9:450:22 | (statement expression) | test.cpp:449:26:449:32 | source1 |
8792
| test.cpp:461:8:461:12 | local | test.cpp:449:26:449:32 | source1 |
93+
| test.cpp:471:8:471:8 | x | test.cpp:465:8:465:13 | call to source |
8894
| true_upon_entry.cpp:21:8:21:8 | x | true_upon_entry.cpp:17:11:17:16 | call to source |
8995
| true_upon_entry.cpp:29:8:29:8 | x | true_upon_entry.cpp:27:9:27:14 | call to source |
9096
| true_upon_entry.cpp:39:8:39:8 | x | true_upon_entry.cpp:33:11:33:16 | call to source |

cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
| BarrierGuard.cpp:60:11:60:16 | BarrierGuard.cpp:62:14:62:14 | AST only |
33
| clang.cpp:12:9:12:20 | clang.cpp:22:8:22:20 | AST only |
44
| clang.cpp:39:42:39:47 | clang.cpp:41:18:41:19 | IR only |
5+
| clang.cpp:50:19:50:24 | clang.cpp:51:8:51:17 | AST only |
56
| dispatch.cpp:16:37:16:42 | dispatch.cpp:32:16:32:24 | IR only |
67
| dispatch.cpp:16:37:16:42 | dispatch.cpp:40:15:40:23 | IR only |
78
| dispatch.cpp:22:37:22:42 | dispatch.cpp:31:16:31:24 | IR only |
@@ -41,11 +42,16 @@
4142
| test.cpp:422:7:422:11 | test.cpp:424:8:424:12 | AST only |
4243
| test.cpp:423:20:423:25 | test.cpp:424:8:424:12 | AST only |
4344
| test.cpp:428:7:428:11 | test.cpp:430:8:430:12 | AST only |
45+
| test.cpp:428:7:428:11 | test.cpp:431:8:431:13 | AST only |
4446
| test.cpp:429:20:429:24 | test.cpp:430:8:430:12 | AST only |
47+
| test.cpp:429:20:429:24 | test.cpp:431:8:431:13 | AST only |
4548
| test.cpp:435:7:435:11 | test.cpp:437:8:437:12 | AST only |
4649
| test.cpp:436:18:436:23 | test.cpp:437:8:437:12 | AST only |
4750
| test.cpp:441:7:441:11 | test.cpp:443:8:443:12 | AST only |
51+
| test.cpp:441:7:441:11 | test.cpp:444:8:444:13 | AST only |
4852
| test.cpp:442:18:442:22 | test.cpp:443:8:443:12 | AST only |
53+
| test.cpp:442:18:442:22 | test.cpp:444:8:444:13 | AST only |
54+
| test.cpp:465:8:465:13 | test.cpp:471:8:471:8 | AST only |
4955
| true_upon_entry.cpp:9:11:9:16 | true_upon_entry.cpp:13:8:13:8 | IR only |
5056
| true_upon_entry.cpp:62:11:62:16 | true_upon_entry.cpp:66:8:66:8 | IR only |
5157
| true_upon_entry.cpp:98:11:98:16 | true_upon_entry.cpp:105:8:105:8 | IR only |
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
void sink(void *o);
2+
void *user_input(void);
3+
4+
void local_array() {
5+
void *arr[10] = { 0 };
6+
arr[0] = user_input();
7+
sink(arr[0]); // $ast,ir
8+
sink(arr[1]); // $f+:ast
9+
sink(*arr); // $ast,ir
10+
sink(*&arr[0]); // $ast,ir
11+
}
12+
13+
void local_array_convoluted_assign() {
14+
void *arr[10] = { 0 };
15+
*&arr[0] = user_input();
16+
sink(arr[0]); // $ast,ir
17+
sink(arr[1]); // $f+:ast
18+
}
19+
20+
struct inner {
21+
void *data;
22+
int unrelated;
23+
};
24+
25+
struct middle {
26+
inner arr[10];
27+
inner *ptr;
28+
};
29+
30+
struct outer {
31+
middle nested;
32+
middle *indirect;
33+
};
34+
35+
void nested_array_1(outer o) {
36+
o.nested.arr[1].data = user_input();
37+
sink(o.nested.arr[1].data); // $ast,ir
38+
sink(o.nested.arr[0].data); // $f+:ast
39+
}
40+
41+
void nested_array_2(outer o) {
42+
o.indirect->arr[1].data = user_input();
43+
sink(o.indirect->arr[1].data); // $ast $f-:ir
44+
sink(o.indirect->arr[0].data); // $f+:ast
45+
}
46+
47+
void nested_array_3(outer o) {
48+
o.indirect->ptr[1].data = user_input();
49+
sink(o.indirect->ptr[1].data); // $f-:ast,ir
50+
sink(o.indirect->ptr[0].data);
51+
}

cpp/ql/test/library-tests/dataflow/fields/by_reference.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,11 +109,11 @@ void test_outer_with_ptr(Outer *pouter) {
109109

110110
sink(outer.inner_nested.a); // $ast,ir
111111
sink(outer.inner_ptr->a); // $ast $f-:ir
112-
sink(outer.a); // $f-:ast $f-:ir
112+
sink(outer.a); // $ast $f-:ir
113113

114114
sink(pouter->inner_nested.a); // $ast,ir
115115
sink(pouter->inner_ptr->a); // $ast $f-:ir
116-
sink(pouter->a); // $f-:ast $f-:ir
116+
sink(pouter->a); // $ast $f-:ir
117117
}
118118

119119
void test_outer_with_ref(Outer *pouter) {

cpp/ql/test/library-tests/dataflow/fields/dataflow-consistency.expected

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ argHasPostUpdate
3030
| D.cpp:43:24:43:40 | new | ArgumentNode is missing PostUpdateNode. |
3131
| D.cpp:50:24:50:40 | new | ArgumentNode is missing PostUpdateNode. |
3232
| D.cpp:57:25:57:41 | new | ArgumentNode is missing PostUpdateNode. |
33+
| arrays.cpp:7:8:7:13 | access to array | ArgumentNode is missing PostUpdateNode. |
34+
| arrays.cpp:8:8:8:13 | access to array | ArgumentNode is missing PostUpdateNode. |
35+
| arrays.cpp:9:8:9:11 | * ... | ArgumentNode is missing PostUpdateNode. |
36+
| arrays.cpp:10:8:10:15 | * ... | ArgumentNode is missing PostUpdateNode. |
37+
| arrays.cpp:16:8:16:13 | access to array | ArgumentNode is missing PostUpdateNode. |
38+
| arrays.cpp:17:8:17:13 | access to array | ArgumentNode is missing PostUpdateNode. |
3339
| by_reference.cpp:51:8:51:8 | s | ArgumentNode is missing PostUpdateNode. |
3440
| by_reference.cpp:57:8:57:8 | s | ArgumentNode is missing PostUpdateNode. |
3541
| by_reference.cpp:63:8:63:8 | s | ArgumentNode is missing PostUpdateNode. |

0 commit comments

Comments
 (0)