Skip to content

Commit b6cf341

Browse files
authored
Merge pull request #1715 from jbj/ast-field-flow
C++: Initial AST-based flow through fields
2 parents f5bc8b5 + fdd8de7 commit b6cf341

File tree

19 files changed

+987
-72
lines changed

19 files changed

+987
-72
lines changed

cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowPrivate.qll

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ private import DataFlowDispatch
55
/** Gets the instance argument of a non-static call. */
66
private Node getInstanceArgument(Call call) {
77
result.asExpr() = call.getQualifier()
8+
or
9+
result.(PreConstructorCallNode).getConstructorCall() = call
810
// This does not include the implicit `this` argument on auto-generated
911
// base class destructor calls as those do not have an AST element.
1012
}
@@ -167,7 +169,15 @@ private class ArrayContent extends Content, TArrayContent {
167169
* value of `node1`.
168170
*/
169171
predicate storeStep(Node node1, Content f, PostUpdateNode node2) {
170-
none() // stub implementation
172+
exists(FieldAccess fa |
173+
exists(Assignment a |
174+
node1.asExpr() = a and
175+
a.getLValue() = fa
176+
) and
177+
not fa.getTarget().isStatic() and
178+
node2.getPreUpdateNode().asExpr() = fa.getQualifier() and
179+
f.(FieldContent).getField() = fa.getTarget()
180+
)
171181
}
172182

173183
/**
@@ -176,7 +186,12 @@ predicate storeStep(Node node1, Content f, PostUpdateNode node2) {
176186
* `node2`.
177187
*/
178188
predicate readStep(Node node1, Content f, Node node2) {
179-
none() // stub implementation
189+
exists(FieldAccess fr |
190+
node1.asExpr() = fr.getQualifier() and
191+
fr.getTarget() = f.(FieldContent).getField() and
192+
fr = node2.asExpr() and
193+
not fr = any(AssignExpr a).getLValue()
194+
)
180195
}
181196

182197
/**

cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowUtil.qll

Lines changed: 166 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ private import semmle.code.cpp.models.interfaces.DataFlow
99
cached
1010
private newtype TNode =
1111
TExprNode(Expr e) or
12-
TParameterNode(Parameter p) { exists(p.getFunction().getBlock()) } or
13-
TDefinitionByReferenceNode(VariableAccess va, Expr argument) {
14-
definitionByReference(va, argument)
15-
} or
12+
TPartialDefinitionNode(PartialDefinition pd) or
13+
TPreConstructorCallNode(ConstructorCall call) or
14+
TExplicitParameterNode(Parameter p) { exists(p.getFunction().getBlock()) } or
15+
TInstanceParameterNode(MemberFunction f) { exists(f.getBlock()) and not f.isStatic() } or
1616
TUninitializedNode(LocalVariable v) { not v.hasInitializer() }
1717

1818
/**
@@ -38,11 +38,23 @@ class Node extends TNode {
3838
Expr asExpr() { result = this.(ExprNode).getExpr() }
3939

4040
/** Gets the parameter corresponding to this node, if any. */
41-
Parameter asParameter() { result = this.(ParameterNode).getParameter() }
41+
Parameter asParameter() { result = this.(ExplicitParameterNode).getParameter() }
4242

4343
/** Gets the argument that defines this `DefinitionByReferenceNode`, if any. */
4444
Expr asDefiningArgument() { result = this.(DefinitionByReferenceNode).getArgument() }
4545

46+
/**
47+
* Gets the expression that is partially defined by this node, if any.
48+
*
49+
* Partial definitions are created for field stores (`x.y = taint();` is a partial
50+
* definition of `x`), and for calls that may change the value of an object (so
51+
* `x.set(taint())` is a partial definition of `x`, and `transfer(&x, taint())` is
52+
* a partial definition of `&x`).
53+
*/
54+
Expr asPartialDefinition() {
55+
result = this.(PartialDefinitionNode).getPartialDefinition().getDefinedExpr()
56+
}
57+
4658
/**
4759
* Gets the uninitialized local variable corresponding to this node, if
4860
* any.
@@ -94,14 +106,22 @@ class ExprNode extends Node, TExprNode {
94106
Expr getExpr() { result = expr }
95107
}
96108

109+
abstract class ParameterNode extends Node, TNode {
110+
/**
111+
* Holds if this node is the parameter of `c` at the specified (zero-based)
112+
* position. The implicit `this` parameter is considered to have index `-1`.
113+
*/
114+
abstract predicate isParameterOf(Function f, int i);
115+
}
116+
97117
/**
98118
* The value of a parameter at function entry, viewed as a node in a data
99119
* flow graph.
100120
*/
101-
class ParameterNode extends Node, TParameterNode {
121+
class ExplicitParameterNode extends ParameterNode, TExplicitParameterNode {
102122
Parameter param;
103123

104-
ParameterNode() { this = TParameterNode(param) }
124+
ExplicitParameterNode() { this = TExplicitParameterNode(param) }
105125

106126
override Function getFunction() { result = param.getFunction() }
107127

@@ -114,11 +134,23 @@ class ParameterNode extends Node, TParameterNode {
114134
/** Gets the parameter corresponding to this node. */
115135
Parameter getParameter() { result = param }
116136

117-
/**
118-
* Holds if this node is the parameter of `c` at the specified (zero-based)
119-
* position. The implicit `this` parameter is considered to have index `-1`.
120-
*/
121-
predicate isParameterOf(Function f, int i) { f.getParameter(i) = param }
137+
override predicate isParameterOf(Function f, int i) { f.getParameter(i) = param }
138+
}
139+
140+
class ImplicitParameterNode extends ParameterNode, TInstanceParameterNode {
141+
MemberFunction f;
142+
143+
ImplicitParameterNode() { this = TInstanceParameterNode(f) }
144+
145+
override Function getFunction() { result = f }
146+
147+
override Type getType() { result = f.getDeclaringType() }
148+
149+
override string toString() { result = "`this` parameter in " + f.getName() }
150+
151+
override Location getLocation() { result = f.getLocation() }
152+
153+
override predicate isParameterOf(Function fun, int i) { f = fun and i = -1 }
122154
}
123155

124156
/**
@@ -130,12 +162,18 @@ class ParameterNode extends Node, TParameterNode {
130162
* `DefinitionByReferenceNode` to represent the value of `x` after the call has
131163
* returned. This node will have its `getArgument()` equal to `&x`.
132164
*/
133-
class DefinitionByReferenceNode extends Node, TDefinitionByReferenceNode {
165+
class DefinitionByReferenceNode extends PartialDefinitionNode {
134166
VariableAccess va;
135167

136168
Expr argument;
137169

138-
DefinitionByReferenceNode() { this = TDefinitionByReferenceNode(va, argument) }
170+
DefinitionByReferenceNode() {
171+
exists(DefinitionByReference def |
172+
def = this.getPartialDefinition() and
173+
argument = def.getDefinedExpr() and
174+
va = def.getVariableAccess()
175+
)
176+
}
139177

140178
override Function getFunction() { result = va.getEnclosingFunction() }
141179

@@ -190,13 +228,64 @@ class UninitializedNode extends Node, TUninitializedNode {
190228
* to the value before the update with the exception of `ClassInstanceExpr`,
191229
* which represents the value after the constructor has run.
192230
*/
193-
class PostUpdateNode extends Node {
194-
PostUpdateNode() { none() } // stub implementation
195-
231+
abstract class PostUpdateNode extends Node {
196232
/**
197233
* Gets the node before the state update.
198234
*/
199-
Node getPreUpdateNode() { none() } // stub implementation
235+
abstract Node getPreUpdateNode();
236+
237+
override Function getFunction() { result = getPreUpdateNode().getFunction() }
238+
239+
override Type getType() { result = getPreUpdateNode().getType() }
240+
241+
override Location getLocation() { result = getPreUpdateNode().getLocation() }
242+
}
243+
244+
class PartialDefinitionNode extends PostUpdateNode, TPartialDefinitionNode {
245+
PartialDefinition pd;
246+
247+
PartialDefinitionNode() { this = TPartialDefinitionNode(pd) }
248+
249+
override Node getPreUpdateNode() { result.asExpr() = pd.getDefinedExpr() }
250+
251+
override Location getLocation() { result = pd.getLocation() }
252+
253+
PartialDefinition getPartialDefinition() { result = pd }
254+
255+
override string toString() { result = getPreUpdateNode().toString() + " [post update]" }
256+
}
257+
258+
/**
259+
* A node representing the object that was just constructed and is identified
260+
* with the "return value" of the constructor call.
261+
*/
262+
private class PostConstructorCallNode extends PostUpdateNode, TExprNode {
263+
PostConstructorCallNode() { this = TExprNode(any(ConstructorCall c)) }
264+
265+
override PreConstructorCallNode getPreUpdateNode() {
266+
TExprNode(result.getConstructorCall()) = this
267+
}
268+
269+
// No override of `toString` since these nodes already have a `toString` from
270+
// their overlap with `ExprNode`.
271+
}
272+
273+
/**
274+
* INTERNAL: do not use.
275+
*
276+
* A synthetic data-flow node that plays the role of the qualifier (or
277+
* `this`-argument) to a constructor call.
278+
*/
279+
class PreConstructorCallNode extends Node, TPreConstructorCallNode {
280+
ConstructorCall getConstructorCall() { this = TPreConstructorCallNode(result) }
281+
282+
override Function getFunction() { result = getConstructorCall().getEnclosingFunction() }
283+
284+
override Type getType() { result = getConstructorCall().getType() }
285+
286+
override Location getLocation() { result = getConstructorCall().getLocation() }
287+
288+
override string toString() { result = getConstructorCall().toString() + " [pre constructor call]" }
200289
}
201290

202291
/**
@@ -207,7 +296,7 @@ ExprNode exprNode(Expr e) { result.getExpr() = e }
207296
/**
208297
* Gets the `Node` corresponding to the value of `p` at function entry.
209298
*/
210-
ParameterNode parameterNode(Parameter p) { result.getParameter() = p }
299+
ParameterNode parameterNode(Parameter p) { result.(ExplicitParameterNode).getParameter() = p }
211300

212301
/**
213302
* Gets the `Node` corresponding to a definition by reference of the variable
@@ -223,6 +312,43 @@ DefinitionByReferenceNode definitionByReferenceNodeFromArgument(Expr argument) {
223312
*/
224313
UninitializedNode uninitializedNode(LocalVariable v) { result.getLocalVariable() = v }
225314

315+
private module ThisFlow {
316+
private Node thisAccessNode(ControlFlowNode cfn) {
317+
result.(ImplicitParameterNode).getFunction().getBlock() = cfn or
318+
result.asExpr().(ThisExpr) = cfn
319+
}
320+
321+
private int basicBlockThisIndex(BasicBlock b, Node thisNode) {
322+
thisNode = thisAccessNode(b.getNode(result))
323+
}
324+
325+
private int thisRank(BasicBlock b, Node thisNode) {
326+
thisNode = rank[result](thisAccessNode(_) as node order by basicBlockThisIndex(b, node))
327+
}
328+
329+
private int lastThisRank(BasicBlock b) { result = max(thisRank(b, _)) }
330+
331+
private predicate thisAccessBlockReaches(BasicBlock b1, BasicBlock b2) {
332+
exists(basicBlockThisIndex(b1, _)) and b2 = b1.getASuccessor()
333+
or
334+
exists(BasicBlock mid |
335+
thisAccessBlockReaches(b1, mid) and
336+
b2 = mid.getASuccessor() and
337+
not exists(basicBlockThisIndex(mid, _))
338+
)
339+
}
340+
341+
predicate adjacentThisRefs(Node n1, Node n2) {
342+
exists(BasicBlock b | thisRank(b, n1) + 1 = thisRank(b, n2))
343+
or
344+
exists(BasicBlock b1, BasicBlock b2 |
345+
lastThisRank(b1) = thisRank(b1, n1) and
346+
thisAccessBlockReaches(b1, b2) and
347+
thisRank(b2, n2) = 1
348+
)
349+
}
350+
}
351+
226352
/**
227353
* Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
228354
* (intra-procedural) step.
@@ -232,6 +358,8 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) {
232358
// Expr -> Expr
233359
exprToExprStep_nocfg(nodeFrom.asExpr(), nodeTo.asExpr())
234360
or
361+
exprToExprStep_nocfg(nodeFrom.(PostUpdateNode).getPreUpdateNode().asExpr(), nodeTo.asExpr())
362+
or
235363
// Node -> FlowVar -> VariableAccess
236364
exists(FlowVar var |
237365
(
@@ -241,13 +369,19 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) {
241369
or
242370
varSourceBaseCase(var, nodeFrom.asUninitialized())
243371
or
244-
var.definedByReference(nodeFrom.asDefiningArgument())
372+
var.definedPartiallyAt(nodeFrom.asPartialDefinition())
245373
) and
246374
varToExprStep(var, nodeTo.asExpr())
247375
)
248376
or
249377
// Expr -> DefinitionByReferenceNode
250378
exprToDefinitionByReferenceStep(nodeFrom.asExpr(), nodeTo.asDefiningArgument())
379+
or
380+
// `this` -> adjacent-`this`
381+
ThisFlow::adjacentThisRefs(nodeFrom, nodeTo)
382+
or
383+
// post-update-`this` -> following-`this`-ref
384+
ThisFlow::adjacentThisRefs(nodeFrom.(PostUpdateNode).getPreUpdateNode(), nodeTo)
251385
}
252386

253387
/**
@@ -294,6 +428,18 @@ private predicate exprToExprStep_nocfg(Expr fromExpr, Expr toExpr) {
294428
or
295429
toExpr = any(StmtExpr stmtExpr | fromExpr = stmtExpr.getResultExpr())
296430
or
431+
// The following case is needed to track the qualifier object for flow
432+
// through fields. It gives flow from `T(x)` to `new T(x)`. That's not
433+
// strictly _data_ flow but _taint_ flow because the type of `fromExpr` is
434+
// `T` while the type of `toExpr` is `T*`.
435+
//
436+
// This discrepancy is an artifact of how `new`-expressions are represented
437+
// in the database in a way that slightly varies from what the standard
438+
// specifies. In the C++ standard, there is no constructor call expression
439+
// `T(x)` after `new`. Instead there is a type `T` and an optional
440+
// initializer `(x)`.
441+
toExpr.(NewExpr).getInitializer() = fromExpr
442+
or
297443
toExpr = any(Call call |
298444
exists(DataFlowFunction f, FunctionInput inModel, FunctionOutput outModel, int iIn |
299445
call.getTarget() = f and

0 commit comments

Comments
 (0)