Skip to content

Commit b8f8ed5

Browse files
authored
Merge pull request #1000 from jbj/dataflow-defbyref
C++: Support definition by reference in data flow library
2 parents c26b655 + 7afb489 commit b8f8ed5

File tree

10 files changed

+246
-26
lines changed

10 files changed

+246
-26
lines changed

change-notes/1.20/analysis-cpp.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@
3636

3737
## Changes to QL libraries
3838

39+
* The `semmle.code.cpp.dataflow.DataFlow` library now supports _definition by reference_ via output parameters of known functions.
40+
* Data flows through `memcpy` and `memmove` by default.
41+
* Custom flow into or out of arguments assigned by reference can be modelled with the new class `DataFlow::DefinitionByReferenceNode`.
42+
* The data flow library adds flow through library functions that are modeled in `semmle.code.cpp.models.interfaces.DataFlow`. Queries can add subclasses of `DataFlowFunction` to specify additional flow.
3943
* There is a new `Namespace.isInline()` predicate, which holds if the namespace was declared as `inline namespace`.
4044
* The `Expr.isConstant()` predicate now also holds for _address constant expressions_, which are addresses that will be constant after the program has been linked. These address constants do not have a result for `Expr.getValue()`.
4145
* There are new `Function.isDeclaredConstexpr()` and `Function.isConstexpr()` predicates. They can be used to tell whether a function was declared as `constexpr`, and whether it actually is `constexpr`.

cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowUtil.qll

Lines changed: 85 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,14 @@
33
*/
44
import cpp
55
private import semmle.code.cpp.dataflow.internal.FlowVar
6+
private import semmle.code.cpp.models.interfaces.DataFlow
67

78
private newtype TNode =
89
TExprNode(Expr e) or
910
TParameterNode(Parameter p) { exists(p.getFunction().getBlock()) } or
11+
TDefinitionByReferenceNode(VariableAccess va, Expr argument) {
12+
definitionByReference(va, argument)
13+
} or
1014
TUninitializedNode(LocalVariable v) {
1115
not v.hasInitializer()
1216
}
@@ -20,13 +24,7 @@ private newtype TNode =
2024
*/
2125
class Node extends TNode {
2226
/** Gets the function to which this node belongs. */
23-
Function getFunction() {
24-
result = this.asExpr().getEnclosingFunction()
25-
or
26-
result = this.asParameter().getFunction()
27-
or
28-
result = this.asUninitialized().getFunction()
29-
}
27+
Function getFunction() { none() } // overridden in subclasses
3028

3129
/**
3230
* INTERNAL: Do not use. Alternative name for `getFunction`.
@@ -36,18 +34,17 @@ class Node extends TNode {
3634
}
3735

3836
/** Gets the type of this node. */
39-
Type getType() {
40-
result = this.asExpr().getType()
41-
or
42-
result = asVariable(this).getType()
43-
}
37+
Type getType() { none() } // overridden in subclasses
4438

4539
/** Gets the expression corresponding to this node, if any. */
4640
Expr asExpr() { result = this.(ExprNode).getExpr() }
4741

4842
/** Gets the parameter corresponding to this node, if any. */
4943
Parameter asParameter() { result = this.(ParameterNode).getParameter() }
5044

45+
/** Gets the argument that defines this `DefinitionByReferenceNode`, if any. */
46+
Expr asDefiningArgument() { result = this.(DefinitionByReferenceNode).getArgument() }
47+
5148
/**
5249
* Gets the uninitialized local variable corresponding to this node, if
5350
* any.
@@ -74,6 +71,8 @@ class Node extends TNode {
7471
class ExprNode extends Node, TExprNode {
7572
Expr expr;
7673
ExprNode() { this = TExprNode(expr) }
74+
override Function getFunction() { result = expr.getEnclosingFunction() }
75+
override Type getType() { result = expr.getType() }
7776
override string toString() { result = expr.toString() }
7877
override Location getLocation() { result = expr.getLocation() }
7978
/** Gets the expression corresponding to this node. */
@@ -87,6 +86,8 @@ class ExprNode extends Node, TExprNode {
8786
class ParameterNode extends Node, TParameterNode {
8887
Parameter param;
8988
ParameterNode() { this = TParameterNode(param) }
89+
override Function getFunction() { result = param.getFunction() }
90+
override Type getType() { result = param.getType() }
9091
override string toString() { result = param.toString() }
9192
override Location getLocation() { result = param.getLocation() }
9293
/** Gets the parameter corresponding to this node. */
@@ -100,13 +101,44 @@ class ParameterNode extends Node, TParameterNode {
100101
}
101102
}
102103

104+
/**
105+
* A node that represents the value of a variable after a function call that
106+
* may have changed the variable because it's passed by reference.
107+
*
108+
* A typical example would be a call `f(&x)`. Firstly, there will be flow into
109+
* `x` from previous definitions of `x`. Secondly, there will be a
110+
* `DefinitionByReferenceNode` to represent the value of `x` after the call has
111+
* returned. This node will have its `getArgument()` equal to `&x`.
112+
*/
113+
class DefinitionByReferenceNode extends Node, TDefinitionByReferenceNode {
114+
VariableAccess va;
115+
Expr argument;
116+
117+
DefinitionByReferenceNode() { this = TDefinitionByReferenceNode(va, argument) }
118+
override Function getFunction() { result = va.getEnclosingFunction() }
119+
override Type getType() { result = va.getType() }
120+
override string toString() { result = "ref arg " + argument.toString() }
121+
override Location getLocation() { result = argument.getLocation() }
122+
/** Gets the argument corresponding to this node. */
123+
Expr getArgument() { result = argument }
124+
/** Gets the parameter through which this value is assigned. */
125+
Parameter getParameter() {
126+
exists(FunctionCall call, int i |
127+
argument = call.getArgument(i) and
128+
result = call.getTarget().getParameter(i)
129+
)
130+
}
131+
}
132+
103133
/**
104134
* The value of an uninitialized local variable, viewed as a node in a data
105135
* flow graph.
106136
*/
107137
class UninitializedNode extends Node, TUninitializedNode {
108138
LocalVariable v;
109139
UninitializedNode() { this = TUninitializedNode(v) }
140+
override Function getFunction() { result = v.getFunction() }
141+
override Type getType() { result = v.getType() }
110142
override string toString() { result = v.toString() }
111143
override Location getLocation() { result = v.getLocation() }
112144
/** Gets the uninitialized local variable corresponding to this node. */
@@ -143,6 +175,14 @@ ExprNode exprNode(Expr e) { result.getExpr() = e }
143175
*/
144176
ParameterNode parameterNode(Parameter p) { result.getParameter() = p }
145177

178+
/**
179+
* Gets the `Node` corresponding to a definition by reference of the variable
180+
* that is passed as `argument` of a call.
181+
*/
182+
DefinitionByReferenceNode definitionByReferenceNodeFromArgument(Expr argument) {
183+
result.getArgument() = argument
184+
}
185+
146186
/**
147187
* Gets the `Node` corresponding to the value of an uninitialized local
148188
* variable `v`.
@@ -151,12 +191,6 @@ UninitializedNode uninitializedNode(LocalVariable v) {
151191
result.getLocalVariable() = v
152192
}
153193

154-
private Variable asVariable(Node node) {
155-
result = node.asParameter()
156-
or
157-
result = node.asUninitialized()
158-
}
159-
160194
/**
161195
* Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
162196
* (intra-procedural) step.
@@ -170,10 +204,17 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) {
170204
(
171205
exprToVarStep(nodeFrom.asExpr(), var)
172206
or
173-
varSourceBaseCase(var, asVariable(nodeFrom))
207+
varSourceBaseCase(var, nodeFrom.asParameter())
208+
or
209+
varSourceBaseCase(var, nodeFrom.asUninitialized())
210+
or
211+
var.definedByReference(nodeFrom.asDefiningArgument())
174212
) and
175213
varToExprStep(var, nodeTo.asExpr())
176214
)
215+
or
216+
// Expr -> DefinitionByReferenceNode
217+
exprToDefinitionByReferenceStep(nodeFrom.asExpr(), nodeTo.asDefiningArgument())
177218
}
178219

179220
/**
@@ -232,10 +273,31 @@ private predicate exprToExprStep_nocfg(Expr fromExpr, Expr toExpr) {
232273
fromExpr = op.getOperand()
233274
)
234275
or
235-
toExpr = any(FunctionCall moveCall |
236-
moveCall.getTarget().getNamespace().getName() = "std" and
237-
moveCall.getTarget().getName() = "move" and
238-
fromExpr = moveCall.getArgument(0)
276+
toExpr = any(Call call |
277+
exists(DataFlowFunction f, FunctionInput inModel , FunctionOutput outModel, int iIn |
278+
call.getTarget() = f and
279+
f.hasDataFlow(inModel, outModel) and
280+
outModel.isOutReturnValue() and
281+
inModel.isInParameter(iIn) and
282+
fromExpr = call.getArgument(iIn)
283+
)
284+
)
285+
}
286+
287+
private predicate exprToDefinitionByReferenceStep(Expr exprIn, Expr argOut) {
288+
exists(DataFlowFunction f, Call call, FunctionOutput outModel, int argOutIndex |
289+
call.getTarget() = f and
290+
argOut = call.getArgument(argOutIndex) and
291+
outModel.isOutParameterPointer(argOutIndex) and
292+
exists(int argInIndex, FunctionInput inModel |
293+
f.hasDataFlow(inModel, outModel)
294+
|
295+
inModel.isInParameterPointer(argInIndex) and
296+
call.passesByReference(argInIndex, exprIn)
297+
or
298+
inModel.isInParameter(argInIndex) and
299+
exprIn = call.getArgument(argInIndex)
300+
)
239301
)
240302
}
241303

cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,12 @@ cached class FlowVar extends TFlowVar {
5151
*/
5252
cached abstract predicate definedByExpr(Expr e, ControlFlowNode node);
5353

54+
/**
55+
* Holds if this `FlowVar` corresponds to the data written by a call that
56+
* passes a variable as argument `arg`.
57+
*/
58+
cached abstract predicate definedByReference(Expr arg);
59+
5460
/**
5561
* Holds if this `FlowVar` corresponds to the initial value of `v`. The following
5662
* is an exhaustive list of cases where this may happen.
@@ -137,6 +143,8 @@ module FlowVar_internal {
137143
or
138144
assignmentLikeOperation(sbb, v, _)
139145
or
146+
blockVarDefinedByReference(sbb, v, _)
147+
or
140148
blockVarDefinedByVariable(sbb, v)
141149
)
142150
}
@@ -174,6 +182,11 @@ module FlowVar_internal {
174182
else node = def.getDefinition())
175183
}
176184

185+
override predicate definedByReference(Expr arg) {
186+
definitionByReference(v.getAnAccess(), arg) and
187+
arg = def.getDefinition()
188+
}
189+
177190
override predicate definedByInitialValue(LocalScopeVariable param) {
178191
def.definedByParameter(param) and
179192
param = v
@@ -191,6 +204,8 @@ module FlowVar_internal {
191204
this.definedByExpr(_, _)
192205
or
193206
this.definedByInitialValue(_)
207+
or
208+
this.definedByReference(_)
194209
}
195210

196211
/**
@@ -221,7 +236,17 @@ module FlowVar_internal {
221236
BlockVar() { this = TBlockVar(sbb, v) }
222237

223238
override VariableAccess getAnAccess() {
224-
variableAccessInSBB(v, getAReachedBlockVarSBB(this), result)
239+
exists(SubBasicBlock reached |
240+
reached = getAReachedBlockVarSBB(this)
241+
|
242+
variableAccessInSBB(v, reached, result)
243+
or
244+
// Allow flow into a `VariableAccess` that is used as definition by
245+
// reference. This flow is blocked by `getAReachedBlockVarSBB` because
246+
// flow should not propagate past that.
247+
result = reached.getASuccessor().(VariableAccess) and
248+
blockVarDefinedByReference(result, v, _)
249+
)
225250
}
226251

227252
override predicate definedByInitialValue(LocalScopeVariable lsv) {
@@ -237,6 +262,10 @@ module FlowVar_internal {
237262
node = sbb.getANode()
238263
}
239264

265+
override predicate definedByReference(Expr arg) {
266+
blockVarDefinedByReference(sbb, v, arg)
267+
}
268+
240269
override string toString() {
241270
exists(Expr e |
242271
this.definedByExpr(e, _) and
@@ -246,9 +275,15 @@ module FlowVar_internal {
246275
this.definedByInitialValue(_) and
247276
result = "initial value of "+ v
248277
or
278+
exists(Expr arg |
279+
this.definedByReference(arg) and
280+
result = "ref def: "+ arg
281+
)
282+
or
249283
// impossible case
250284
not this.definedByExpr(_, _) and
251285
not this.definedByInitialValue(_) and
286+
not this.definedByReference(_) and
252287
result = "undefined "+ v
253288
}
254289

@@ -373,7 +408,8 @@ module FlowVar_internal {
373408
mid = getAReachedBlockVarSBB(start) and
374409
result = mid.getASuccessor() and
375410
not skipLoop(mid, result, sbbDef, v) and
376-
not assignmentLikeOperation(result, v, _)
411+
not assignmentLikeOperation(result, v, _) and
412+
not blockVarDefinedByReference(result, v, _)
377413
)
378414
}
379415

@@ -481,6 +517,9 @@ module FlowVar_internal {
481517
*/
482518
predicate overwrite(VariableAccess va, ControlFlowNode node) {
483519
va = node.(AssignExpr).getLValue()
520+
or
521+
va = node and
522+
definitionByReference(node, _)
484523
}
485524

486525
/**
@@ -515,6 +554,11 @@ module FlowVar_internal {
515554
)
516555
}
517556

557+
predicate blockVarDefinedByReference(ControlFlowNode node, Variable v, Expr argument) {
558+
node = v.getAnAccess() and
559+
definitionByReference(node, argument)
560+
}
561+
518562
/**
519563
* Holds if `v` is initialized by `init` to have value `assignedExpr`.
520564
*/
@@ -534,8 +578,11 @@ module FlowVar_internal {
534578
class DataFlowSubBasicBlockCutNode extends SubBasicBlockCutNode {
535579
DataFlowSubBasicBlockCutNode() {
536580
exists(Variable v |
537-
not fullySupportedSsaVariable(v) and
581+
not fullySupportedSsaVariable(v)
582+
|
538583
assignmentLikeOperation(this, v, _)
584+
or
585+
blockVarDefinedByReference(this, v, _)
539586
// It is not necessary to cut the basic blocks at `Initializer` nodes
540587
// because the affected variable can have no _other_ value before its
541588
// initializer. It is not necessary to cut basic blocks at procedure

cpp/ql/test/library-tests/dataflow/dataflow-tests/DataflowTestCommon.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ class TestAllocationConfig extends DataFlow::Configuration {
1212
or
1313
source.asParameter().getName().matches("source%")
1414
or
15+
source.(DataFlow::DefinitionByReferenceNode).getParameter().getName().matches("ref_source%")
16+
or
1517
// Track uninitialized variables
1618
exists(source.asUninitialized())
1719
}

cpp/ql/test/library-tests/dataflow/dataflow-tests/localFlow.expected

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
| example.c:24:13:24:30 | ... = ... | example.c:24:2:24:30 | ... = ... |
66
| example.c:24:24:24:30 | ... + ... | example.c:24:13:24:30 | ... = ... |
77
| example.c:26:13:26:16 | call to getX | example.c:26:2:26:25 | ... = ... |
8+
| example.c:26:18:26:24 | ref arg & ... | example.c:26:2:26:7 | coords |
89
| test.cpp:6:12:6:17 | call to source | test.cpp:7:8:7:9 | t1 |
910
| test.cpp:6:12:6:17 | call to source | test.cpp:8:8:8:9 | t1 |
1011
| test.cpp:6:12:6:17 | call to source | test.cpp:9:8:9:9 | t1 |
@@ -28,3 +29,17 @@
2829
| test.cpp:24:10:24:11 | t2 | test.cpp:23:23:23:24 | t1 |
2930
| test.cpp:24:10:24:11 | t2 | test.cpp:24:5:24:11 | ... = ... |
3031
| test.cpp:24:10:24:11 | t2 | test.cpp:26:8:26:9 | t1 |
32+
| test.cpp:430:48:430:54 | source1 | test.cpp:432:17:432:23 | source1 |
33+
| test.cpp:431:12:431:13 | 0 | test.cpp:432:11:432:13 | tmp |
34+
| test.cpp:432:10:432:13 | & ... | test.cpp:432:3:432:8 | call to memcpy |
35+
| test.cpp:432:10:432:13 | ref arg & ... | test.cpp:433:8:433:10 | tmp |
36+
| test.cpp:432:17:432:23 | source1 | test.cpp:432:10:432:13 | ref arg & ... |
37+
| test.cpp:436:53:436:59 | source1 | test.cpp:439:17:439:23 | source1 |
38+
| test.cpp:436:66:436:66 | b | test.cpp:441:7:441:7 | b |
39+
| test.cpp:437:12:437:13 | 0 | test.cpp:438:19:438:21 | tmp |
40+
| test.cpp:437:12:437:13 | 0 | test.cpp:439:11:439:13 | tmp |
41+
| test.cpp:439:10:439:13 | & ... | test.cpp:439:3:439:8 | call to memcpy |
42+
| test.cpp:439:10:439:13 | ref arg & ... | test.cpp:439:33:439:35 | tmp |
43+
| test.cpp:439:10:439:13 | ref arg & ... | test.cpp:440:8:440:10 | tmp |
44+
| test.cpp:439:10:439:13 | ref arg & ... | test.cpp:442:10:442:12 | tmp |
45+
| test.cpp:439:17:439:23 | source1 | test.cpp:439:10:439:13 | ref arg & ... |

0 commit comments

Comments
 (0)