@@ -16,58 +16,101 @@ class DataFlowCfgNode extends ControlFlowNode {
1616 DataFlowCfgNode ( ) { isExpressionNode ( this ) }
1717}
1818
19- /** A data flow node which should have an associated post-update node. */
20- abstract class PreUpdateNode extends Node { }
19+ /** A data flow node for which we should synthesise an associated pre-update node. */
20+ abstract class NeedsSyntheticPreUpdateNode extends Node {
21+ /** A label for this kind of node. This will figure in the textual representation of the synthesized pre-update node. */
22+ abstract string label ( ) ;
23+ }
24+
25+ class SyntheticPreUpdateNode extends Node , TSyntheticPreUpdateNode {
26+ NeedsSyntheticPreUpdateNode post ;
27+
28+ SyntheticPreUpdateNode ( ) { this = TSyntheticPreUpdateNode ( post ) }
29+
30+ /** Gets the node for which this is a synthetic pre-update node. */
31+ Node getPostUpdateNode ( ) { result = post }
32+
33+ override string toString ( ) { result = "[pre " + post .label ( ) + "] " + post .toString ( ) }
34+
35+ override Scope getScope ( ) { result = post .getScope ( ) }
36+
37+ override Location getLocation ( ) { result = post .getLocation ( ) }
38+ }
39+
40+ /** A data flow node for which we should synthesise an associated post-update node. */
41+ abstract class NeedsSyntheticPostUpdateNode extends Node {
42+ /** A label for this kind of node. This will figure in the textual representation of the synthesized post-update node. */
43+ abstract string label ( ) ;
44+ }
2145
2246/** An argument might have its value changed as a result of a call. */
23- class ArgumentPreUpdateNode extends PreUpdateNode , ArgumentNode { }
47+ class ArgumentPreUpdateNode extends NeedsSyntheticPostUpdateNode , ArgumentNode {
48+ // Certain arguments, such as implicit self arguments are already post-update nodes
49+ // and should not have an extra node synthesised.
50+ ArgumentPreUpdateNode ( ) {
51+ this = any ( CallNodeCall c ) .getArg ( _)
52+ or
53+ this = any ( SpecialCall c ) .getArg ( _)
54+ or
55+ // Avoid argument 0 of class calls as those have non-synthetic post-update nodes.
56+ exists ( ClassCall c , int n | n > 0 | this = c .getArg ( n ) )
57+ }
58+
59+ override string label ( ) { result = "arg" }
60+ }
2461
2562/** An object might have its value changed after a store. */
26- class StorePreUpdateNode extends PreUpdateNode , CfgNode {
63+ class StorePreUpdateNode extends NeedsSyntheticPostUpdateNode , CfgNode {
2764 StorePreUpdateNode ( ) {
2865 exists ( Attribute a |
2966 node = a .getObject ( ) .getAFlowNode ( ) and
3067 a .getCtx ( ) instanceof Store
3168 )
3269 }
70+
71+ override string label ( ) { result = "store" }
3372}
3473
3574/** A node marking the state change of an object after a read. */
36- class ReadPreUpdateNode extends PreUpdateNode , CfgNode {
75+ class ReadPreUpdateNode extends NeedsSyntheticPostUpdateNode , CfgNode {
3776 ReadPreUpdateNode ( ) {
3877 exists ( Attribute a |
3978 node = a .getObject ( ) .getAFlowNode ( ) and
4079 a .getCtx ( ) instanceof Load
4180 )
4281 }
82+
83+ override string label ( ) { result = "read" }
4384}
4485
45- /**
46- * A node associated with an object after an operation that might have
47- * changed its state.
48- *
49- * This can be either the argument to a callable after the callable returns
50- * (which might have mutated the argument), or the qualifier of a field after
51- * an update to the field.
52- *
53- * Nodes corresponding to AST elements, for example `ExprNode`, usually refer
54- * to the value before the update.
55- */
56- class PostUpdateNode extends Node , TPostUpdateNode {
57- PreUpdateNode pre ;
86+ /** A post-update node is synthesized for all nodes which satisfy `NeedsSyntheticPostUpdateNode`. */
87+ class SyntheticPostUpdateNode extends PostUpdateNode , TSyntheticPostUpdateNode {
88+ NeedsSyntheticPostUpdateNode pre ;
5889
59- PostUpdateNode ( ) { this = TPostUpdateNode ( pre ) }
90+ SyntheticPostUpdateNode ( ) { this = TSyntheticPostUpdateNode ( pre ) }
6091
61- /** Gets the node before the state update. */
62- Node getPreUpdateNode ( ) { result = pre }
92+ override Node getPreUpdateNode ( ) { result = pre }
6393
64- override string toString ( ) { result = "[post] " + pre .toString ( ) }
94+ override string toString ( ) { result = "[post " + pre . label ( ) + " ] " + pre .toString ( ) }
6595
6696 override Scope getScope ( ) { result = pre .getScope ( ) }
6797
6898 override Location getLocation ( ) { result = pre .getLocation ( ) }
6999}
70100
101+ /**
102+ * Calls to constructors are treated as post-update nodes for the synthesized argument
103+ * that is mapped to the `self` parameter. That way, constructor calls represent the value of the
104+ * object after the constructor (currently only `__init__`) has run.
105+ */
106+ class ObjectCreationNode extends PostUpdateNode , NeedsSyntheticPreUpdateNode , CfgNode {
107+ ObjectCreationNode ( ) { node .( CallNode ) = any ( ClassCall c ) .getNode ( ) }
108+
109+ override Node getPreUpdateNode ( ) { result .( SyntheticPreUpdateNode ) .getPostUpdateNode ( ) = this }
110+
111+ override string label ( ) { result = "objCreate" }
112+ }
113+
71114class DataFlowExpr = Expr ;
72115
73116/**
@@ -191,16 +234,18 @@ private Node update(Node node) {
191234//--------
192235// Global flow
193236//--------
237+ //
194238/**
195239 * IPA type for DataFlowCallable.
196- * A callable is either a callable value or a class.
240+ *
241+ * A callable is either a callable value or a module (for enclosing `ModuleVariableNode`s).
242+ * A module has no calls.
197243 */
198244newtype TDataFlowCallable =
199245 TCallableValue ( CallableValue callable ) or
200- TClassValue ( ClassValue c ) or
201246 TModule ( Module m )
202247
203- /** Represents a callable */
248+ /** Represents a callable. */
204249abstract class DataFlowCallable extends TDataFlowCallable {
205250 /** Gets a textual representation of this element. */
206251 abstract string toString ( ) ;
@@ -218,6 +263,7 @@ abstract class DataFlowCallable extends TDataFlowCallable {
218263 abstract string getName ( ) ;
219264}
220265
266+ /** A class representing a callable value. */
221267class DataFlowCallableValue extends DataFlowCallable , TCallableValue {
222268 CallableValue callable ;
223269
@@ -234,24 +280,6 @@ class DataFlowCallableValue extends DataFlowCallable, TCallableValue {
234280 override string getName ( ) { result = callable .getName ( ) }
235281}
236282
237- class DataFlowClassValue extends DataFlowCallable , TClassValue {
238- ClassValue c ;
239-
240- DataFlowClassValue ( ) { this = TClassValue ( c ) }
241-
242- override string toString ( ) { result = c .toString ( ) }
243-
244- override CallNode getACall ( ) { result = c .getACall ( ) }
245-
246- override Scope getScope ( ) { result = c .getScope ( ) }
247-
248- override NameNode getParameter ( int n ) {
249- result .getNode ( ) = c .getScope ( ) .getInitMethod ( ) .getArg ( n + 1 ) .asName ( )
250- }
251-
252- override string getName ( ) { result = c .getName ( ) }
253- }
254-
255283/** A class representing the scope in which a `ModuleVariableNode` appears. */
256284class DataFlowModuleScope extends DataFlowCallable , TModule {
257285 Module mod ;
@@ -269,10 +297,24 @@ class DataFlowModuleScope extends DataFlowCallable, TModule {
269297 override string getName ( ) { result = mod .getName ( ) }
270298}
271299
300+ /**
301+ * IPA type for DataFlowCall.
302+ *
303+ * Calls corresponding to `CallNode`s are either to callable values or to classes.
304+ * The latter is directed to the callable corresponding to the `__init__` method of the class.
305+ *
306+ * An `__init__` method can also be called directly, so that the callable can be targeted by
307+ * different types of calls. In that case, the parameter mappings will be different,
308+ * as the class call will synthesize an argument node to be mapped to the `self` parameter.
309+ *
310+ * A call corresponding to a special method call is handled by the corresponding `SpecialMethodCallNode`.
311+ */
272312newtype TDataFlowCall =
273- TCallNode ( CallNode call ) or
313+ TCallNode ( CallNode call ) { call = any ( CallableValue c ) .getACall ( ) } or
314+ TClassCall ( CallNode call ) { call = any ( ClassValue c ) .getACall ( ) } or
274315 TSpecialCall ( SpecialMethodCallNode special )
275316
317+ /** Represents a call. */
276318abstract class DataFlowCall extends TDataFlowCall {
277319 /** Gets a textual representation of this element. */
278320 abstract string toString ( ) ;
@@ -281,7 +323,7 @@ abstract class DataFlowCall extends TDataFlowCall {
281323 abstract DataFlowCallable getCallable ( ) ;
282324
283325 /** Get the specified argument to this call. */
284- abstract ControlFlowNode getArg ( int n ) ;
326+ abstract Node getArg ( int n ) ;
285327
286328 /** Get the control flow node representing this call. */
287329 abstract ControlFlowNode getNode ( ) ;
@@ -290,7 +332,7 @@ abstract class DataFlowCall extends TDataFlowCall {
290332 abstract DataFlowCallable getEnclosingCallable ( ) ;
291333}
292334
293- /** Represents a call to a callable. */
335+ /** Represents a call to a callable (currently only callable values) . */
294336class CallNodeCall extends DataFlowCall , TCallNode {
295337 CallNode call ;
296338 DataFlowCallable callable ;
@@ -302,7 +344,7 @@ class CallNodeCall extends DataFlowCall, TCallNode {
302344
303345 override string toString ( ) { result = call .toString ( ) }
304346
305- override ControlFlowNode getArg ( int n ) { result = call .getArg ( n ) }
347+ override Node getArg ( int n ) { result = TCfgNode ( call .getArg ( n ) ) }
306348
307349 override ControlFlowNode getNode ( ) { result = call }
308350
@@ -311,6 +353,36 @@ class CallNodeCall extends DataFlowCall, TCallNode {
311353 override DataFlowCallable getEnclosingCallable ( ) { result .getScope ( ) = call .getNode ( ) .getScope ( ) }
312354}
313355
356+ /** Represents a call to a class. */
357+ class ClassCall extends DataFlowCall , TClassCall {
358+ CallNode call ;
359+ ClassValue c ;
360+
361+ ClassCall ( ) {
362+ this = TClassCall ( call ) and
363+ call = c .getACall ( )
364+ }
365+
366+ override string toString ( ) { result = call .toString ( ) }
367+
368+ override Node getArg ( int n ) {
369+ n > 0 and result = TCfgNode ( call .getArg ( n - 1 ) )
370+ or
371+ n = 0 and result = TSyntheticPreUpdateNode ( TCfgNode ( call ) )
372+ }
373+
374+ override ControlFlowNode getNode ( ) { result = call }
375+
376+ override DataFlowCallable getCallable ( ) {
377+ exists ( CallableValue callable |
378+ result = TCallableValue ( callable ) and
379+ c .getScope ( ) .getInitMethod ( ) = callable .getScope ( )
380+ )
381+ }
382+
383+ override DataFlowCallable getEnclosingCallable ( ) { result .getScope ( ) = call .getScope ( ) }
384+ }
385+
314386/** Represents a call to a special method. */
315387class SpecialCall extends DataFlowCall , TSpecialCall {
316388 SpecialMethodCallNode special ;
@@ -319,7 +391,7 @@ class SpecialCall extends DataFlowCall, TSpecialCall {
319391
320392 override string toString ( ) { result = special .toString ( ) }
321393
322- override ControlFlowNode getArg ( int n ) { result = special .( SpecialMethod:: Potential ) .getArg ( n ) }
394+ override Node getArg ( int n ) { result = TCfgNode ( special .( SpecialMethod:: Potential ) .getArg ( n ) ) }
323395
324396 override ControlFlowNode getNode ( ) { result = special }
325397
@@ -333,11 +405,11 @@ class SpecialCall extends DataFlowCall, TSpecialCall {
333405}
334406
335407/** A data flow node that represents a call argument. */
336- class ArgumentNode extends CfgNode {
337- ArgumentNode ( ) { exists ( DataFlowCall call , int pos | node = call .getArg ( pos ) ) }
408+ class ArgumentNode extends Node {
409+ ArgumentNode ( ) { this = any ( DataFlowCall c ) .getArg ( _ ) }
338410
339411 /** Holds if this argument occurs at the given position in the given call. */
340- predicate argumentOf ( DataFlowCall call , int pos ) { node = call .getArg ( pos ) }
412+ predicate argumentOf ( DataFlowCall call , int pos ) { this = call .getArg ( pos ) }
341413
342414 /** Gets the call in which this node is an argument. */
343415 final DataFlowCall getCall ( ) { this .argumentOf ( result , _) }
@@ -453,6 +525,8 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
453525 dictStoreStep ( nodeFrom , c , nodeTo )
454526 or
455527 comprehensionStoreStep ( nodeFrom , c , nodeTo )
528+ or
529+ attributeStoreStep ( nodeFrom , c , nodeTo )
456530}
457531
458532/** Data flows from an element of a list to the list. */
@@ -531,6 +605,23 @@ predicate comprehensionStoreStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
531605 c instanceof ListElementContent
532606}
533607
608+ /**
609+ * Holds if `nodeFrom` flows into an attribute (corresponding to `c`) of `nodeTo` via an attribute assignment.
610+ *
611+ * For example, in
612+ * ```python
613+ * obj.foo = x
614+ * ```
615+ * data flows from `x` to (the post-update node for) `obj` via assignment to `foo`.
616+ */
617+ predicate attributeStoreStep ( CfgNode nodeFrom , AttributeContent c , PostUpdateNode nodeTo ) {
618+ exists ( AttrNode attr |
619+ nodeFrom .asCfgNode ( ) = attr .( DefinitionNode ) .getValue ( ) and
620+ attr .getName ( ) = c .getAttribute ( ) and
621+ attr .getObject ( ) = nodeTo .getPreUpdateNode ( ) .( CfgNode ) .getNode ( )
622+ )
623+ }
624+
534625/**
535626 * Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
536627 */
@@ -540,6 +631,8 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
540631 popReadStep ( nodeFrom , c , nodeTo )
541632 or
542633 comprehensionReadStep ( nodeFrom , c , nodeTo )
634+ or
635+ attributeReadStep ( nodeFrom , c , nodeTo )
543636}
544637
545638/** Data flows from a sequence to a subscript of the sequence. */
@@ -626,6 +719,24 @@ predicate comprehensionReadStep(CfgNode nodeFrom, Content c, EssaNode nodeTo) {
626719 )
627720}
628721
722+ /**
723+ * Holds if `nodeTo` is a read of an attribute (corresponding to `c`) of the object in `nodeFrom`.
724+ *
725+ * For example, in
726+ * ```python
727+ * obj.foo
728+ * ```
729+ * data flows from `obj` to `obj.foo` via a read from `foo`.
730+ */
731+ predicate attributeReadStep ( CfgNode nodeFrom , AttributeContent c , CfgNode nodeTo ) {
732+ exists ( AttrNode attr |
733+ nodeFrom .asCfgNode ( ) = attr .getObject ( ) and
734+ nodeTo .asCfgNode ( ) = attr and
735+ attr .getName ( ) = c .getAttribute ( ) and
736+ attr .isLoad ( )
737+ )
738+ }
739+
629740/**
630741 * Holds if values stored inside content `c` are cleared at node `n`. For example,
631742 * any value stored inside `f` is cleared at the pre-update node associated with `x`
0 commit comments