Skip to content

Commit fc84286

Browse files
authored
Merge pull request #3830 from yoff/SharedDataflow_FieldFlow
Python: Shared dataflow: Field flow
2 parents ea5feb2 + 4621e6d commit fc84286

25 files changed

+1546
-144
lines changed

python/ql/src/experimental/dataflow/internal/DataFlowPrivate.qll

Lines changed: 162 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -16,58 +16,101 @@ class DataFlowCfgNode extends ControlFlowNode {
1616
DataFlowCfgNode() { isExpressionNode(this) }
1717
}
1818

19-
/** A data flow node which should have an associated post-update node. */
20-
abstract class PreUpdateNode extends Node { }
19+
/** A data flow node for which we should synthesise an associated pre-update node. */
20+
abstract class NeedsSyntheticPreUpdateNode extends Node {
21+
/** A label for this kind of node. This will figure in the textual representation of the synthesized pre-update node. */
22+
abstract string label();
23+
}
24+
25+
class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
26+
NeedsSyntheticPreUpdateNode post;
27+
28+
SyntheticPreUpdateNode() { this = TSyntheticPreUpdateNode(post) }
29+
30+
/** Gets the node for which this is a synthetic pre-update node. */
31+
Node getPostUpdateNode() { result = post }
32+
33+
override string toString() { result = "[pre " + post.label() + "] " + post.toString() }
34+
35+
override Scope getScope() { result = post.getScope() }
36+
37+
override Location getLocation() { result = post.getLocation() }
38+
}
39+
40+
/** A data flow node for which we should synthesise an associated post-update node. */
41+
abstract class NeedsSyntheticPostUpdateNode extends Node {
42+
/** A label for this kind of node. This will figure in the textual representation of the synthesized post-update node. */
43+
abstract string label();
44+
}
2145

2246
/** An argument might have its value changed as a result of a call. */
23-
class ArgumentPreUpdateNode extends PreUpdateNode, ArgumentNode { }
47+
class ArgumentPreUpdateNode extends NeedsSyntheticPostUpdateNode, ArgumentNode {
48+
// Certain arguments, such as implicit self arguments are already post-update nodes
49+
// and should not have an extra node synthesised.
50+
ArgumentPreUpdateNode() {
51+
this = any(CallNodeCall c).getArg(_)
52+
or
53+
this = any(SpecialCall c).getArg(_)
54+
or
55+
// Avoid argument 0 of class calls as those have non-synthetic post-update nodes.
56+
exists(ClassCall c, int n | n > 0 | this = c.getArg(n))
57+
}
58+
59+
override string label() { result = "arg" }
60+
}
2461

2562
/** An object might have its value changed after a store. */
26-
class StorePreUpdateNode extends PreUpdateNode, CfgNode {
63+
class StorePreUpdateNode extends NeedsSyntheticPostUpdateNode, CfgNode {
2764
StorePreUpdateNode() {
2865
exists(Attribute a |
2966
node = a.getObject().getAFlowNode() and
3067
a.getCtx() instanceof Store
3168
)
3269
}
70+
71+
override string label() { result = "store" }
3372
}
3473

3574
/** A node marking the state change of an object after a read. */
36-
class ReadPreUpdateNode extends PreUpdateNode, CfgNode {
75+
class ReadPreUpdateNode extends NeedsSyntheticPostUpdateNode, CfgNode {
3776
ReadPreUpdateNode() {
3877
exists(Attribute a |
3978
node = a.getObject().getAFlowNode() and
4079
a.getCtx() instanceof Load
4180
)
4281
}
82+
83+
override string label() { result = "read" }
4384
}
4485

45-
/**
46-
* A node associated with an object after an operation that might have
47-
* changed its state.
48-
*
49-
* This can be either the argument to a callable after the callable returns
50-
* (which might have mutated the argument), or the qualifier of a field after
51-
* an update to the field.
52-
*
53-
* Nodes corresponding to AST elements, for example `ExprNode`, usually refer
54-
* to the value before the update.
55-
*/
56-
class PostUpdateNode extends Node, TPostUpdateNode {
57-
PreUpdateNode pre;
86+
/** A post-update node is synthesized for all nodes which satisfy `NeedsSyntheticPostUpdateNode`. */
87+
class SyntheticPostUpdateNode extends PostUpdateNode, TSyntheticPostUpdateNode {
88+
NeedsSyntheticPostUpdateNode pre;
5889

59-
PostUpdateNode() { this = TPostUpdateNode(pre) }
90+
SyntheticPostUpdateNode() { this = TSyntheticPostUpdateNode(pre) }
6091

61-
/** Gets the node before the state update. */
62-
Node getPreUpdateNode() { result = pre }
92+
override Node getPreUpdateNode() { result = pre }
6393

64-
override string toString() { result = "[post] " + pre.toString() }
94+
override string toString() { result = "[post " + pre.label() + "] " + pre.toString() }
6595

6696
override Scope getScope() { result = pre.getScope() }
6797

6898
override Location getLocation() { result = pre.getLocation() }
6999
}
70100

101+
/**
102+
* Calls to constructors are treated as post-update nodes for the synthesized argument
103+
* that is mapped to the `self` parameter. That way, constructor calls represent the value of the
104+
* object after the constructor (currently only `__init__`) has run.
105+
*/
106+
class ObjectCreationNode extends PostUpdateNode, NeedsSyntheticPreUpdateNode, CfgNode {
107+
ObjectCreationNode() { node.(CallNode) = any(ClassCall c).getNode() }
108+
109+
override Node getPreUpdateNode() { result.(SyntheticPreUpdateNode).getPostUpdateNode() = this }
110+
111+
override string label() { result = "objCreate" }
112+
}
113+
71114
class DataFlowExpr = Expr;
72115

73116
/**
@@ -191,16 +234,18 @@ private Node update(Node node) {
191234
//--------
192235
// Global flow
193236
//--------
237+
//
194238
/**
195239
* IPA type for DataFlowCallable.
196-
* A callable is either a callable value or a class.
240+
*
241+
* A callable is either a callable value or a module (for enclosing `ModuleVariableNode`s).
242+
* A module has no calls.
197243
*/
198244
newtype TDataFlowCallable =
199245
TCallableValue(CallableValue callable) or
200-
TClassValue(ClassValue c) or
201246
TModule(Module m)
202247

203-
/** Represents a callable */
248+
/** Represents a callable. */
204249
abstract class DataFlowCallable extends TDataFlowCallable {
205250
/** Gets a textual representation of this element. */
206251
abstract string toString();
@@ -218,6 +263,7 @@ abstract class DataFlowCallable extends TDataFlowCallable {
218263
abstract string getName();
219264
}
220265

266+
/** A class representing a callable value. */
221267
class DataFlowCallableValue extends DataFlowCallable, TCallableValue {
222268
CallableValue callable;
223269

@@ -234,24 +280,6 @@ class DataFlowCallableValue extends DataFlowCallable, TCallableValue {
234280
override string getName() { result = callable.getName() }
235281
}
236282

237-
class DataFlowClassValue extends DataFlowCallable, TClassValue {
238-
ClassValue c;
239-
240-
DataFlowClassValue() { this = TClassValue(c) }
241-
242-
override string toString() { result = c.toString() }
243-
244-
override CallNode getACall() { result = c.getACall() }
245-
246-
override Scope getScope() { result = c.getScope() }
247-
248-
override NameNode getParameter(int n) {
249-
result.getNode() = c.getScope().getInitMethod().getArg(n + 1).asName()
250-
}
251-
252-
override string getName() { result = c.getName() }
253-
}
254-
255283
/** A class representing the scope in which a `ModuleVariableNode` appears. */
256284
class DataFlowModuleScope extends DataFlowCallable, TModule {
257285
Module mod;
@@ -269,10 +297,24 @@ class DataFlowModuleScope extends DataFlowCallable, TModule {
269297
override string getName() { result = mod.getName() }
270298
}
271299

300+
/**
301+
* IPA type for DataFlowCall.
302+
*
303+
* Calls corresponding to `CallNode`s are either to callable values or to classes.
304+
* The latter is directed to the callable corresponding to the `__init__` method of the class.
305+
*
306+
* An `__init__` method can also be called directly, so that the callable can be targeted by
307+
* different types of calls. In that case, the parameter mappings will be different,
308+
* as the class call will synthesize an argument node to be mapped to the `self` parameter.
309+
*
310+
* A call corresponding to a special method call is handled by the corresponding `SpecialMethodCallNode`.
311+
*/
272312
newtype TDataFlowCall =
273-
TCallNode(CallNode call) or
313+
TCallNode(CallNode call) { call = any(CallableValue c).getACall() } or
314+
TClassCall(CallNode call) { call = any(ClassValue c).getACall() } or
274315
TSpecialCall(SpecialMethodCallNode special)
275316

317+
/** Represents a call. */
276318
abstract class DataFlowCall extends TDataFlowCall {
277319
/** Gets a textual representation of this element. */
278320
abstract string toString();
@@ -281,7 +323,7 @@ abstract class DataFlowCall extends TDataFlowCall {
281323
abstract DataFlowCallable getCallable();
282324

283325
/** Get the specified argument to this call. */
284-
abstract ControlFlowNode getArg(int n);
326+
abstract Node getArg(int n);
285327

286328
/** Get the control flow node representing this call. */
287329
abstract ControlFlowNode getNode();
@@ -290,7 +332,7 @@ abstract class DataFlowCall extends TDataFlowCall {
290332
abstract DataFlowCallable getEnclosingCallable();
291333
}
292334

293-
/** Represents a call to a callable. */
335+
/** Represents a call to a callable (currently only callable values). */
294336
class CallNodeCall extends DataFlowCall, TCallNode {
295337
CallNode call;
296338
DataFlowCallable callable;
@@ -302,7 +344,7 @@ class CallNodeCall extends DataFlowCall, TCallNode {
302344

303345
override string toString() { result = call.toString() }
304346

305-
override ControlFlowNode getArg(int n) { result = call.getArg(n) }
347+
override Node getArg(int n) { result = TCfgNode(call.getArg(n)) }
306348

307349
override ControlFlowNode getNode() { result = call }
308350

@@ -311,6 +353,36 @@ class CallNodeCall extends DataFlowCall, TCallNode {
311353
override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getNode().getScope() }
312354
}
313355

356+
/** Represents a call to a class. */
357+
class ClassCall extends DataFlowCall, TClassCall {
358+
CallNode call;
359+
ClassValue c;
360+
361+
ClassCall() {
362+
this = TClassCall(call) and
363+
call = c.getACall()
364+
}
365+
366+
override string toString() { result = call.toString() }
367+
368+
override Node getArg(int n) {
369+
n > 0 and result = TCfgNode(call.getArg(n - 1))
370+
or
371+
n = 0 and result = TSyntheticPreUpdateNode(TCfgNode(call))
372+
}
373+
374+
override ControlFlowNode getNode() { result = call }
375+
376+
override DataFlowCallable getCallable() {
377+
exists(CallableValue callable |
378+
result = TCallableValue(callable) and
379+
c.getScope().getInitMethod() = callable.getScope()
380+
)
381+
}
382+
383+
override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getScope() }
384+
}
385+
314386
/** Represents a call to a special method. */
315387
class SpecialCall extends DataFlowCall, TSpecialCall {
316388
SpecialMethodCallNode special;
@@ -319,7 +391,7 @@ class SpecialCall extends DataFlowCall, TSpecialCall {
319391

320392
override string toString() { result = special.toString() }
321393

322-
override ControlFlowNode getArg(int n) { result = special.(SpecialMethod::Potential).getArg(n) }
394+
override Node getArg(int n) { result = TCfgNode(special.(SpecialMethod::Potential).getArg(n)) }
323395

324396
override ControlFlowNode getNode() { result = special }
325397

@@ -333,11 +405,11 @@ class SpecialCall extends DataFlowCall, TSpecialCall {
333405
}
334406

335407
/** A data flow node that represents a call argument. */
336-
class ArgumentNode extends CfgNode {
337-
ArgumentNode() { exists(DataFlowCall call, int pos | node = call.getArg(pos)) }
408+
class ArgumentNode extends Node {
409+
ArgumentNode() { this = any(DataFlowCall c).getArg(_) }
338410

339411
/** Holds if this argument occurs at the given position in the given call. */
340-
predicate argumentOf(DataFlowCall call, int pos) { node = call.getArg(pos) }
412+
predicate argumentOf(DataFlowCall call, int pos) { this = call.getArg(pos) }
341413

342414
/** Gets the call in which this node is an argument. */
343415
final DataFlowCall getCall() { this.argumentOf(result, _) }
@@ -453,6 +525,8 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
453525
dictStoreStep(nodeFrom, c, nodeTo)
454526
or
455527
comprehensionStoreStep(nodeFrom, c, nodeTo)
528+
or
529+
attributeStoreStep(nodeFrom, c, nodeTo)
456530
}
457531

458532
/** Data flows from an element of a list to the list. */
@@ -531,6 +605,23 @@ predicate comprehensionStoreStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
531605
c instanceof ListElementContent
532606
}
533607

608+
/**
609+
* Holds if `nodeFrom` flows into an attribute (corresponding to `c`) of `nodeTo` via an attribute assignment.
610+
*
611+
* For example, in
612+
* ```python
613+
* obj.foo = x
614+
* ```
615+
* data flows from `x` to (the post-update node for) `obj` via assignment to `foo`.
616+
*/
617+
predicate attributeStoreStep(CfgNode nodeFrom, AttributeContent c, PostUpdateNode nodeTo) {
618+
exists(AttrNode attr |
619+
nodeFrom.asCfgNode() = attr.(DefinitionNode).getValue() and
620+
attr.getName() = c.getAttribute() and
621+
attr.getObject() = nodeTo.getPreUpdateNode().(CfgNode).getNode()
622+
)
623+
}
624+
534625
/**
535626
* Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
536627
*/
@@ -540,6 +631,8 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
540631
popReadStep(nodeFrom, c, nodeTo)
541632
or
542633
comprehensionReadStep(nodeFrom, c, nodeTo)
634+
or
635+
attributeReadStep(nodeFrom, c, nodeTo)
543636
}
544637

545638
/** Data flows from a sequence to a subscript of the sequence. */
@@ -626,6 +719,24 @@ predicate comprehensionReadStep(CfgNode nodeFrom, Content c, EssaNode nodeTo) {
626719
)
627720
}
628721

722+
/**
723+
* Holds if `nodeTo` is a read of an attribute (corresponding to `c`) of the object in `nodeFrom`.
724+
*
725+
* For example, in
726+
* ```python
727+
* obj.foo
728+
* ```
729+
* data flows from `obj` to `obj.foo` via a read from `foo`.
730+
*/
731+
predicate attributeReadStep(CfgNode nodeFrom, AttributeContent c, CfgNode nodeTo) {
732+
exists(AttrNode attr |
733+
nodeFrom.asCfgNode() = attr.getObject() and
734+
nodeTo.asCfgNode() = attr and
735+
attr.getName() = c.getAttribute() and
736+
attr.isLoad()
737+
)
738+
}
739+
629740
/**
630741
* Holds if values stored inside content `c` are cleared at node `n`. For example,
631742
* any value stored inside `f` is cleared at the pre-update node associated with `x`

0 commit comments

Comments
 (0)