Skip to content

Commit 557db33

Browse files
authored
Merge pull request #4265 from tausbn/python-add-global-flow-steps
Python: Add `ModuleVariableNode` to keep track of global reads and writes
2 parents d3ea20c + 9d7a2d2 commit 557db33

File tree

22 files changed

+464
-139
lines changed

22 files changed

+464
-139
lines changed

python/ql/src/experimental/dataflow/TypeTracker.qll

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,11 @@ class StepSummary extends TStepSummary {
4747
module StepSummary {
4848
cached
4949
predicate step(Node nodeFrom, Node nodeTo, StepSummary summary) {
50-
exists(Node mid | EssaFlow::essaFlowStep*(nodeFrom, mid) and smallstep(mid, nodeTo, summary))
50+
exists(Node mid | typePreservingStep*(nodeFrom, mid) and smallstep(mid, nodeTo, summary))
5151
}
5252

5353
predicate smallstep(Node nodeFrom, Node nodeTo, StepSummary summary) {
54-
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
54+
typePreservingStep(nodeFrom, nodeTo) and
5555
summary = LevelStep()
5656
or
5757
callStep(nodeFrom, nodeTo) and summary = CallStep()
@@ -68,6 +68,12 @@ module StepSummary {
6868
}
6969
}
7070

71+
/** Holds if it's reasonable to expect the data flow step from `nodeFrom` to `nodeTo` to preserve types. */
72+
private predicate typePreservingStep(Node nodeFrom, Node nodeTo) {
73+
EssaFlow::essaFlowStep(nodeFrom, nodeTo) or
74+
jumpStep(nodeFrom, nodeTo)
75+
}
76+
7177
/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
7278
predicate callStep(ArgumentNode nodeFrom, ParameterNode nodeTo) {
7379
// TODO: Support special methods?
@@ -111,7 +117,7 @@ predicate returnStep(ReturnNode nodeFrom, Node nodeTo) {
111117
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
112118
exists(AttributeAssignment a, Node var |
113119
a.getName() = attr and
114-
EssaFlow::essaFlowStep*(nodeTo, var) and
120+
simpleLocalFlowStep*(nodeTo, var) and
115121
var.asVar() = a.getInput() and
116122
nodeFrom.asCfgNode() = a.getValue()
117123
)
@@ -276,7 +282,7 @@ class TypeTracker extends TTypeTracker {
276282
result = this.append(summary)
277283
)
278284
or
279-
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
285+
typePreservingStep(nodeFrom, nodeTo) and
280286
result = this
281287
}
282288
}

python/ql/src/experimental/dataflow/internal/DataFlowPrivate.qll

Lines changed: 45 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -150,10 +150,28 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
150150
// If there is ESSA-flow out of a node `node`, we want flow
151151
// both out of `node` and any post-update node of `node`.
152152
exists(Node node |
153-
not node.(EssaNode).getVar() instanceof GlobalSsaVariable and
154-
not nodeTo.(EssaNode).getVar() instanceof GlobalSsaVariable and
155153
EssaFlow::essaFlowStep(node, nodeTo) and
156-
nodeFrom = update(node)
154+
nodeFrom = update(node) and
155+
(
156+
not node instanceof EssaNode or
157+
not nodeTo instanceof EssaNode or
158+
localEssaStep(node, nodeTo)
159+
)
160+
)
161+
}
162+
163+
/**
164+
* Holds if there is an Essa flow step from `nodeFrom` to `nodeTo` that does not switch between
165+
* local and global SSA variables.
166+
*/
167+
private predicate localEssaStep(EssaNode nodeFrom, EssaNode nodeTo) {
168+
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
169+
(
170+
nodeFrom.getVar() instanceof GlobalSsaVariable and
171+
nodeTo.getVar() instanceof GlobalSsaVariable
172+
or
173+
not nodeFrom.getVar() instanceof GlobalSsaVariable and
174+
not nodeTo.getVar() instanceof GlobalSsaVariable
157175
)
158176
}
159177

@@ -179,7 +197,8 @@ private Node update(Node node) {
179197
*/
180198
newtype TDataFlowCallable =
181199
TCallableValue(CallableValue callable) or
182-
TClassValue(ClassValue c)
200+
TClassValue(ClassValue c) or
201+
TModule(Module m)
183202

184203
/** Represents a callable */
185204
abstract class DataFlowCallable extends TDataFlowCallable {
@@ -233,6 +252,23 @@ class DataFlowClassValue extends DataFlowCallable, TClassValue {
233252
override string getName() { result = c.getName() }
234253
}
235254

255+
/** A class representing the scope in which a `ModuleVariableNode` appears. */
256+
class DataFlowModuleScope extends DataFlowCallable, TModule {
257+
Module mod;
258+
259+
DataFlowModuleScope() { this = TModule(mod) }
260+
261+
override string toString() { result = mod.toString() }
262+
263+
override CallNode getACall() { none() }
264+
265+
override Scope getScope() { result = mod }
266+
267+
override NameNode getParameter(int n) { none() }
268+
269+
override string getName() { result = mod.getName() }
270+
}
271+
236272
newtype TDataFlowCall =
237273
TCallNode(CallNode call) or
238274
TSpecialCall(SpecialMethodCallNode special)
@@ -389,21 +425,11 @@ string ppReprType(DataFlowType t) { none() }
389425
* taken into account.
390426
*/
391427
predicate jumpStep(Node nodeFrom, Node nodeTo) {
392-
// As we have ESSA variables for global variables,
393-
// we include ESSA flow steps involving global variables.
394-
(
395-
nodeFrom.(EssaNode).getVar() instanceof GlobalSsaVariable
396-
or
397-
nodeTo.(EssaNode).getVar() instanceof GlobalSsaVariable
398-
) and
399-
(
400-
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
401-
or
402-
// As jump steps do not respect chronology,
403-
// we add jump steps for each def-use pair.
404-
nodeFrom.asVar() instanceof GlobalSsaVariable and
405-
nodeTo.asCfgNode() = nodeFrom.asVar().getASourceUse()
406-
)
428+
// Module variable read
429+
nodeFrom.(ModuleVariableNode).getARead() = nodeTo
430+
or
431+
// Module variable write
432+
nodeFrom = nodeTo.(ModuleVariableNode).getAWrite()
407433
}
408434

409435
//--------

python/ql/src/experimental/dataflow/internal/DataFlowPublic.qll

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@ newtype TNode =
2424
/** A node corresponding to a control flow node. */
2525
TCfgNode(DataFlowCfgNode node) or
2626
/** A node representing the value of an object after a state change */
27-
TPostUpdateNode(PreUpdateNode pre)
27+
TPostUpdateNode(PreUpdateNode pre) or
28+
/** A node representing a global (module-level) variable in a specific module */
29+
TModuleVariableNode(Module m, GlobalVariable v) { v.getScope() = m and v.escapes() }
2830

2931
/**
3032
* An element, viewed as a node in a data flow graph. Either an SSA variable
@@ -149,6 +151,72 @@ class ParameterNode extends EssaNode {
149151
override DataFlowCallable getEnclosingCallable() { this.isParameterOf(result, _) }
150152
}
151153

154+
/**
155+
* A data flow node corresponding to a module-level (global) variable that is accessed outside of the module scope.
156+
*
157+
* Global variables may appear twice in the data flow graph, as both `EssaNode`s and
158+
* `ModuleVariableNode`s. The former is used to represent data flow between global variables as it
159+
* occurs during module initialization, and the latter is used to represent data flow via global
160+
* variable reads and writes during run-time.
161+
*
162+
* It is possible for data to flow from assignments made at module initialization time to reads made
163+
* at run-time, but not vice versa. For example, there will be flow from `SOURCE` to `SINK` in the
164+
* following snippet:
165+
*
166+
* ```python
167+
* g = SOURCE
168+
*
169+
* def foo():
170+
* SINK(g)
171+
* ```
172+
* but not the other way round:
173+
*
174+
* ```python
175+
* SINK(g)
176+
*
177+
* def bar()
178+
* global g
179+
* g = SOURCE
180+
* ```
181+
*
182+
* Data flow through `ModuleVariableNode`s is represented as `jumpStep`s, and so any write of a
183+
* global variable can flow to any read of the same variable.
184+
*/
185+
class ModuleVariableNode extends Node, TModuleVariableNode {
186+
Module mod;
187+
GlobalVariable var;
188+
189+
ModuleVariableNode() { this = TModuleVariableNode(mod, var) }
190+
191+
override Scope getScope() { result = mod }
192+
193+
override string toString() {
194+
result = "ModuleVariableNode for " + var.toString() + " in " + mod.toString()
195+
}
196+
197+
/** Gets the module in which this variable appears. */
198+
Module getModule() { result = mod }
199+
200+
/** Gets the global variable corresponding to this node. */
201+
GlobalVariable getVariable() { result = var }
202+
203+
/** Gets a node that reads this variable. */
204+
Node getARead() {
205+
result.asCfgNode() = var.getALoad().getAFlowNode() and
206+
// Ignore reads that happen when the module is imported. These are only executed once.
207+
not result.getScope() = mod
208+
}
209+
210+
/** Gets an `EssaNode` that corresponds to an assignment of this global variable. */
211+
EssaNode getAWrite() {
212+
result.asVar().getDefinition().(EssaNodeDefinition).definedBy(var, any(DefinitionNode defn))
213+
}
214+
215+
override DataFlowCallable getEnclosingCallable() { result.(DataFlowModuleScope).getScope() = mod }
216+
217+
override Location getLocation() { result = mod.getLocation() }
218+
}
219+
152220
/**
153221
* A node that controls whether other nodes are evaluated.
154222
*/

python/ql/test/experimental/dataflow/basic/globalStep.expected

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
22
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
3+
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
4+
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
35
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
46
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
57
| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y |
@@ -70,6 +72,10 @@
7072
| test.py:6:1:6:1 | GSSA Variable a | test.py:7:19:7:19 | ControlFlowNode for a |
7173
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | GSSA Variable a |
7274
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | GSSA Variable a |
75+
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:5:7:20 | GSSA Variable a |
76+
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:5:7:20 | GSSA Variable a |
77+
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:19:7:19 | ControlFlowNode for a |
78+
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:19:7:19 | ControlFlowNode for a |
7379
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | GSSA Variable b |
7480
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | GSSA Variable b |
7581
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | SSA variable x |

python/ql/test/experimental/dataflow/basic/local.expected

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,11 @@
33
| test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | GSSA Variable b |
44
| test.py:0:0:0:0 | SSA variable $ | test.py:0:0:0:0 | SSA variable $ |
55
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
6+
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
7+
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
68
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
79
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
10+
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
811
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:1:19:1:19 | ControlFlowNode for x |
912
| test.py:1:19:1:19 | SSA variable x | test.py:1:19:1:19 | SSA variable x |
1013
| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y |
@@ -31,10 +34,16 @@
3134
| test.py:4:10:4:10 | ControlFlowNode for z | test.py:4:10:4:10 | ControlFlowNode for z |
3235
| test.py:6:1:6:1 | ControlFlowNode for a | test.py:6:1:6:1 | ControlFlowNode for a |
3336
| test.py:6:1:6:1 | GSSA Variable a | test.py:6:1:6:1 | GSSA Variable a |
37+
| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
38+
| test.py:6:1:6:1 | GSSA Variable a | test.py:7:19:7:19 | ControlFlowNode for a |
39+
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | GSSA Variable a |
3440
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral |
41+
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:5:7:20 | GSSA Variable a |
42+
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:19:7:19 | ControlFlowNode for a |
3543
| test.py:7:1:7:1 | ControlFlowNode for b | test.py:7:1:7:1 | ControlFlowNode for b |
3644
| test.py:7:1:7:1 | GSSA Variable b | test.py:7:1:7:1 | GSSA Variable b |
3745
| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
46+
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | GSSA Variable b |
3847
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
3948
| test.py:7:5:7:20 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
4049
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a |
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
1+
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
2+
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
13
| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x |
24
| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y |
35
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y |
46
| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
57
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z |
8+
| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
9+
| test.py:6:1:6:1 | GSSA Variable a | test.py:7:19:7:19 | ControlFlowNode for a |
10+
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | GSSA Variable a |
11+
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | GSSA Variable b |

0 commit comments

Comments
 (0)