@@ -9,10 +9,10 @@ private import semmle.code.cpp.models.interfaces.DataFlow
99cached
1010private newtype TNode =
1111 TExprNode ( Expr e ) or
12- TParameterNode ( Parameter p ) { exists ( p . getFunction ( ) . getBlock ( ) ) } or
13- TDefinitionByReferenceNode ( VariableAccess va , Expr argument ) {
14- definitionByReference ( va , argument )
15- } or
12+ TPartialDefinitionNode ( PartialDefinition pd ) or
13+ TPreConstructorCallNode ( ConstructorCall call ) or
14+ TExplicitParameterNode ( Parameter p ) { exists ( p . getFunction ( ) . getBlock ( ) ) } or
15+ TInstanceParameterNode ( MemberFunction f ) { exists ( f . getBlock ( ) ) and not f . isStatic ( ) } or
1616 TUninitializedNode ( LocalVariable v ) { not v .hasInitializer ( ) }
1717
1818/**
@@ -38,11 +38,23 @@ class Node extends TNode {
3838 Expr asExpr ( ) { result = this .( ExprNode ) .getExpr ( ) }
3939
4040 /** Gets the parameter corresponding to this node, if any. */
41- Parameter asParameter ( ) { result = this .( ParameterNode ) .getParameter ( ) }
41+ Parameter asParameter ( ) { result = this .( ExplicitParameterNode ) .getParameter ( ) }
4242
4343 /** Gets the argument that defines this `DefinitionByReferenceNode`, if any. */
4444 Expr asDefiningArgument ( ) { result = this .( DefinitionByReferenceNode ) .getArgument ( ) }
4545
46+ /**
47+ * Gets the expression that is partially defined by this node, if any.
48+ *
49+ * Partial definitions are created for field stores (`x.y = taint();` is a partial
50+ * definition of `x`), and for calls that may change the value of an object (so
51+ * `x.set(taint())` is a partial definition of `x`, and `transfer(&x, taint())` is
52+ * a partial definition of `&x`).
53+ */
54+ Expr asPartialDefinition ( ) {
55+ result = this .( PartialDefinitionNode ) .getPartialDefinition ( ) .getDefinedExpr ( )
56+ }
57+
4658 /**
4759 * Gets the uninitialized local variable corresponding to this node, if
4860 * any.
@@ -94,14 +106,22 @@ class ExprNode extends Node, TExprNode {
94106 Expr getExpr ( ) { result = expr }
95107}
96108
109+ abstract class ParameterNode extends Node , TNode {
110+ /**
111+ * Holds if this node is the parameter of `c` at the specified (zero-based)
112+ * position. The implicit `this` parameter is considered to have index `-1`.
113+ */
114+ abstract predicate isParameterOf ( Function f , int i ) ;
115+ }
116+
97117/**
98118 * The value of a parameter at function entry, viewed as a node in a data
99119 * flow graph.
100120 */
101- class ParameterNode extends Node , TParameterNode {
121+ class ExplicitParameterNode extends ParameterNode , TExplicitParameterNode {
102122 Parameter param ;
103123
104- ParameterNode ( ) { this = TParameterNode ( param ) }
124+ ExplicitParameterNode ( ) { this = TExplicitParameterNode ( param ) }
105125
106126 override Function getFunction ( ) { result = param .getFunction ( ) }
107127
@@ -114,11 +134,23 @@ class ParameterNode extends Node, TParameterNode {
114134 /** Gets the parameter corresponding to this node. */
115135 Parameter getParameter ( ) { result = param }
116136
117- /**
118- * Holds if this node is the parameter of `c` at the specified (zero-based)
119- * position. The implicit `this` parameter is considered to have index `-1`.
120- */
121- predicate isParameterOf ( Function f , int i ) { f .getParameter ( i ) = param }
137+ override predicate isParameterOf ( Function f , int i ) { f .getParameter ( i ) = param }
138+ }
139+
140+ class ImplicitParameterNode extends ParameterNode , TInstanceParameterNode {
141+ MemberFunction f ;
142+
143+ ImplicitParameterNode ( ) { this = TInstanceParameterNode ( f ) }
144+
145+ override Function getFunction ( ) { result = f }
146+
147+ override Type getType ( ) { result = f .getDeclaringType ( ) }
148+
149+ override string toString ( ) { result = "`this` parameter in " + f .getName ( ) }
150+
151+ override Location getLocation ( ) { result = f .getLocation ( ) }
152+
153+ override predicate isParameterOf ( Function fun , int i ) { f = fun and i = - 1 }
122154}
123155
124156/**
@@ -130,12 +162,18 @@ class ParameterNode extends Node, TParameterNode {
130162 * `DefinitionByReferenceNode` to represent the value of `x` after the call has
131163 * returned. This node will have its `getArgument()` equal to `&x`.
132164 */
133- class DefinitionByReferenceNode extends Node , TDefinitionByReferenceNode {
165+ class DefinitionByReferenceNode extends PartialDefinitionNode {
134166 VariableAccess va ;
135167
136168 Expr argument ;
137169
138- DefinitionByReferenceNode ( ) { this = TDefinitionByReferenceNode ( va , argument ) }
170+ DefinitionByReferenceNode ( ) {
171+ exists ( DefinitionByReference def |
172+ def = this .getPartialDefinition ( ) and
173+ argument = def .getDefinedExpr ( ) and
174+ va = def .getVariableAccess ( )
175+ )
176+ }
139177
140178 override Function getFunction ( ) { result = va .getEnclosingFunction ( ) }
141179
@@ -190,13 +228,64 @@ class UninitializedNode extends Node, TUninitializedNode {
190228 * to the value before the update with the exception of `ClassInstanceExpr`,
191229 * which represents the value after the constructor has run.
192230 */
193- class PostUpdateNode extends Node {
194- PostUpdateNode ( ) { none ( ) } // stub implementation
195-
231+ abstract class PostUpdateNode extends Node {
196232 /**
197233 * Gets the node before the state update.
198234 */
199- Node getPreUpdateNode ( ) { none ( ) } // stub implementation
235+ abstract Node getPreUpdateNode ( ) ;
236+
237+ override Function getFunction ( ) { result = getPreUpdateNode ( ) .getFunction ( ) }
238+
239+ override Type getType ( ) { result = getPreUpdateNode ( ) .getType ( ) }
240+
241+ override Location getLocation ( ) { result = getPreUpdateNode ( ) .getLocation ( ) }
242+ }
243+
244+ class PartialDefinitionNode extends PostUpdateNode , TPartialDefinitionNode {
245+ PartialDefinition pd ;
246+
247+ PartialDefinitionNode ( ) { this = TPartialDefinitionNode ( pd ) }
248+
249+ override Node getPreUpdateNode ( ) { result .asExpr ( ) = pd .getDefinedExpr ( ) }
250+
251+ override Location getLocation ( ) { result = pd .getLocation ( ) }
252+
253+ PartialDefinition getPartialDefinition ( ) { result = pd }
254+
255+ override string toString ( ) { result = getPreUpdateNode ( ) .toString ( ) + " [post update]" }
256+ }
257+
258+ /**
259+ * A node representing the object that was just constructed and is identified
260+ * with the "return value" of the constructor call.
261+ */
262+ private class PostConstructorCallNode extends PostUpdateNode , TExprNode {
263+ PostConstructorCallNode ( ) { this = TExprNode ( any ( ConstructorCall c ) ) }
264+
265+ override PreConstructorCallNode getPreUpdateNode ( ) {
266+ TExprNode ( result .getConstructorCall ( ) ) = this
267+ }
268+
269+ // No override of `toString` since these nodes already have a `toString` from
270+ // their overlap with `ExprNode`.
271+ }
272+
273+ /**
274+ * INTERNAL: do not use.
275+ *
276+ * A synthetic data-flow node that plays the role of the qualifier (or
277+ * `this`-argument) to a constructor call.
278+ */
279+ class PreConstructorCallNode extends Node , TPreConstructorCallNode {
280+ ConstructorCall getConstructorCall ( ) { this = TPreConstructorCallNode ( result ) }
281+
282+ override Function getFunction ( ) { result = getConstructorCall ( ) .getEnclosingFunction ( ) }
283+
284+ override Type getType ( ) { result = getConstructorCall ( ) .getType ( ) }
285+
286+ override Location getLocation ( ) { result = getConstructorCall ( ) .getLocation ( ) }
287+
288+ override string toString ( ) { result = getConstructorCall ( ) .toString ( ) + " [pre constructor call]" }
200289}
201290
202291/**
@@ -207,7 +296,7 @@ ExprNode exprNode(Expr e) { result.getExpr() = e }
207296/**
208297 * Gets the `Node` corresponding to the value of `p` at function entry.
209298 */
210- ParameterNode parameterNode ( Parameter p ) { result .getParameter ( ) = p }
299+ ParameterNode parameterNode ( Parameter p ) { result .( ExplicitParameterNode ) . getParameter ( ) = p }
211300
212301/**
213302 * Gets the `Node` corresponding to a definition by reference of the variable
@@ -223,6 +312,43 @@ DefinitionByReferenceNode definitionByReferenceNodeFromArgument(Expr argument) {
223312 */
224313UninitializedNode uninitializedNode ( LocalVariable v ) { result .getLocalVariable ( ) = v }
225314
315+ private module ThisFlow {
316+ private Node thisAccessNode ( ControlFlowNode cfn ) {
317+ result .( ImplicitParameterNode ) .getFunction ( ) .getBlock ( ) = cfn or
318+ result .asExpr ( ) .( ThisExpr ) = cfn
319+ }
320+
321+ private int basicBlockThisIndex ( BasicBlock b , Node thisNode ) {
322+ thisNode = thisAccessNode ( b .getNode ( result ) )
323+ }
324+
325+ private int thisRank ( BasicBlock b , Node thisNode ) {
326+ thisNode = rank [ result ] ( thisAccessNode ( _) as node order by basicBlockThisIndex ( b , node ) )
327+ }
328+
329+ private int lastThisRank ( BasicBlock b ) { result = max ( thisRank ( b , _) ) }
330+
331+ private predicate thisAccessBlockReaches ( BasicBlock b1 , BasicBlock b2 ) {
332+ exists ( basicBlockThisIndex ( b1 , _) ) and b2 = b1 .getASuccessor ( )
333+ or
334+ exists ( BasicBlock mid |
335+ thisAccessBlockReaches ( b1 , mid ) and
336+ b2 = mid .getASuccessor ( ) and
337+ not exists ( basicBlockThisIndex ( mid , _) )
338+ )
339+ }
340+
341+ predicate adjacentThisRefs ( Node n1 , Node n2 ) {
342+ exists ( BasicBlock b | thisRank ( b , n1 ) + 1 = thisRank ( b , n2 ) )
343+ or
344+ exists ( BasicBlock b1 , BasicBlock b2 |
345+ lastThisRank ( b1 ) = thisRank ( b1 , n1 ) and
346+ thisAccessBlockReaches ( b1 , b2 ) and
347+ thisRank ( b2 , n2 ) = 1
348+ )
349+ }
350+ }
351+
226352/**
227353 * Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
228354 * (intra-procedural) step.
@@ -232,6 +358,8 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) {
232358 // Expr -> Expr
233359 exprToExprStep_nocfg ( nodeFrom .asExpr ( ) , nodeTo .asExpr ( ) )
234360 or
361+ exprToExprStep_nocfg ( nodeFrom .( PostUpdateNode ) .getPreUpdateNode ( ) .asExpr ( ) , nodeTo .asExpr ( ) )
362+ or
235363 // Node -> FlowVar -> VariableAccess
236364 exists ( FlowVar var |
237365 (
@@ -241,13 +369,19 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) {
241369 or
242370 varSourceBaseCase ( var , nodeFrom .asUninitialized ( ) )
243371 or
244- var .definedByReference ( nodeFrom .asDefiningArgument ( ) )
372+ var .definedPartiallyAt ( nodeFrom .asPartialDefinition ( ) )
245373 ) and
246374 varToExprStep ( var , nodeTo .asExpr ( ) )
247375 )
248376 or
249377 // Expr -> DefinitionByReferenceNode
250378 exprToDefinitionByReferenceStep ( nodeFrom .asExpr ( ) , nodeTo .asDefiningArgument ( ) )
379+ or
380+ // `this` -> adjacent-`this`
381+ ThisFlow:: adjacentThisRefs ( nodeFrom , nodeTo )
382+ or
383+ // post-update-`this` -> following-`this`-ref
384+ ThisFlow:: adjacentThisRefs ( nodeFrom .( PostUpdateNode ) .getPreUpdateNode ( ) , nodeTo )
251385}
252386
253387/**
@@ -294,6 +428,18 @@ private predicate exprToExprStep_nocfg(Expr fromExpr, Expr toExpr) {
294428 or
295429 toExpr = any ( StmtExpr stmtExpr | fromExpr = stmtExpr .getResultExpr ( ) )
296430 or
431+ // The following case is needed to track the qualifier object for flow
432+ // through fields. It gives flow from `T(x)` to `new T(x)`. That's not
433+ // strictly _data_ flow but _taint_ flow because the type of `fromExpr` is
434+ // `T` while the type of `toExpr` is `T*`.
435+ //
436+ // This discrepancy is an artifact of how `new`-expressions are represented
437+ // in the database in a way that slightly varies from what the standard
438+ // specifies. In the C++ standard, there is no constructor call expression
439+ // `T(x)` after `new`. Instead there is a type `T` and an optional
440+ // initializer `(x)`.
441+ toExpr .( NewExpr ) .getInitializer ( ) = fromExpr
442+ or
297443 toExpr = any ( Call call |
298444 exists ( DataFlowFunction f , FunctionInput inModel , FunctionOutput outModel , int iIn |
299445 call .getTarget ( ) = f and
0 commit comments