Skip to content

Commit 52d8f7d

Browse files
authored
Merge pull request #4235 from yoff/SharedDataflow_UseUseFlow
Python: Port use-use implementation from Java
2 parents f716f96 + 92e7a56 commit 52d8f7d

File tree

5 files changed

+182
-15
lines changed

5 files changed

+182
-15
lines changed

python/ql/src/experimental/dataflow/internal/DataFlowPrivate.qll

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
private import python
22
private import DataFlowPublic
33
import semmle.python.SpecialMethods
4+
private import semmle.python.essa.SsaCompute
45

56
//--------
67
// Data flow graph
@@ -31,7 +32,7 @@ class StorePreUpdateNode extends PreUpdateNode, CfgNode {
3132
}
3233
}
3334

34-
/** A node marking the state change of an object after a read */
35+
/** A node marking the state change of an object after a read. */
3536
class ReadPreUpdateNode extends PreUpdateNode, CfgNode {
3637
ReadPreUpdateNode() {
3738
exists(Attribute a |
@@ -97,12 +98,19 @@ module EssaFlow {
9798
contextManager.strictlyDominates(var)
9899
)
99100
or
100-
// Use
101+
// First use after definition
101102
// `y = 42`
102103
// `x = f(y)`
103104
// nodeFrom is `y` on first line, essa var
104105
// nodeTo is `y` on second line, cfg node
105-
nodeFrom.(EssaNode).getVar().getASourceUse() = nodeTo.(CfgNode).getNode()
106+
defToFirstUse(nodeFrom.asVar(), nodeTo.asCfgNode())
107+
or
108+
// Next use after use
109+
// `x = f(y)`
110+
// `z = y + 1`
111+
// nodeFrom is 'y' on first line, cfg node
112+
// nodeTo is `y` on second line, cfg node
113+
useToNextUse(nodeFrom.asCfgNode(), nodeTo.asCfgNode())
106114
or
107115
// Refinements
108116
exists(EssaEdgeRefinement r |
@@ -120,6 +128,14 @@ module EssaFlow {
120128
nodeFrom.(EssaNode).getVar() = p.getAnInput()
121129
)
122130
}
131+
132+
predicate useToNextUse(NameNode nodeFrom, NameNode nodeTo) {
133+
AdjacentUses::adjacentUseUseSameVar(nodeFrom, nodeTo)
134+
}
135+
136+
predicate defToFirstUse(EssaVariable var, NameNode nodeTo) {
137+
AdjacentUses::firstUse(var.getDefinition(), nodeTo)
138+
}
123139
}
124140

125141
//--------
@@ -131,18 +147,25 @@ module EssaFlow {
131147
* excludes SSA flow through instance fields.
132148
*/
133149
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
134-
not nodeFrom.(EssaNode).getVar() instanceof GlobalSsaVariable and
135-
not nodeTo.(EssaNode).getVar() instanceof GlobalSsaVariable and
136-
EssaFlow::essaFlowStep(update(nodeFrom), nodeTo)
150+
// If there is ESSA-flow out of a node `node`, we want flow
151+
// both out of `node` and any post-update node of `node`.
152+
exists(Node node |
153+
not node.(EssaNode).getVar() instanceof GlobalSsaVariable and
154+
not nodeTo.(EssaNode).getVar() instanceof GlobalSsaVariable and
155+
EssaFlow::essaFlowStep(node, nodeTo) and
156+
nodeFrom = update(node)
157+
)
137158
}
138159

160+
/**
161+
* Holds if `result` is either `node`, or the post-update node for `node`.
162+
*/
139163
private Node update(Node node) {
140164
exists(PostUpdateNode pun |
141165
node = pun.getPreUpdateNode() and
142166
result = pun
143167
)
144168
or
145-
not exists(PostUpdateNode pun | node = pun.getPreUpdateNode()) and
146169
result = node
147170
}
148171

@@ -365,15 +388,22 @@ string ppReprType(DataFlowType t) { none() }
365388
* another. Additional steps specified by the configuration are *not*
366389
* taken into account.
367390
*/
368-
predicate jumpStep(Node pred, Node succ) {
391+
predicate jumpStep(Node nodeFrom, Node nodeTo) {
369392
// As we have ESSA variables for global variables,
370393
// we include ESSA flow steps involving global variables.
371394
(
372-
pred.(EssaNode).getVar() instanceof GlobalSsaVariable
395+
nodeFrom.(EssaNode).getVar() instanceof GlobalSsaVariable
373396
or
374-
succ.(EssaNode).getVar() instanceof GlobalSsaVariable
397+
nodeTo.(EssaNode).getVar() instanceof GlobalSsaVariable
375398
) and
376-
EssaFlow::essaFlowStep(pred, succ)
399+
(
400+
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
401+
or
402+
// As jump steps do not respect chronology,
403+
// we add jump steps for each def-use pair.
404+
nodeFrom.asVar() instanceof GlobalSsaVariable and
405+
nodeTo.asCfgNode() = nodeFrom.asVar().getASourceUse()
406+
)
377407
}
378408

379409
//--------

python/ql/src/semmle/python/essa/SsaCompute.qll

Lines changed: 137 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ private module SsaComputeImpl {
194194
defUseRank(v, b, rankix, i)
195195
}
196196

197-
/** A `VarAccess` `use` of `v` in `b` at index `i`. */
197+
/** A variable access `use` of `v` in `b` at index `i`. */
198198
cached
199199
predicate variableUse(SsaSourceVariable v, ControlFlowNode use, BasicBlock b, int i) {
200200
(v.getAUse() = use or v.hasRefinement(use, _)) and
@@ -348,11 +348,147 @@ private module SsaComputeImpl {
348348
)
349349
}
350350
}
351+
352+
cached
353+
module AdjacentUsesImpl {
354+
/**
355+
* Holds if `rankix` is the rank the index `i` at which there is an SSA definition or explicit use of
356+
* `v` in the basic block `b`.
357+
*/
358+
cached
359+
predicate defSourceUseRank(SsaSourceVariable v, BasicBlock b, int rankix, int i) {
360+
i = rank[rankix](int j | variableDefine(v, _, b, j) or variableSourceUse(v, _, b, j))
361+
}
362+
363+
/** A variable access `use` of `v` in `b` at index `i`. */
364+
cached
365+
predicate variableSourceUse(SsaSourceVariable v, ControlFlowNode use, BasicBlock b, int i) {
366+
v.getASourceUse() = use and
367+
exists(int j |
368+
b.getNode(j) = use and
369+
i = 2 * j
370+
)
371+
}
372+
373+
/** Gets the maximum rank index for the given variable and basic block. */
374+
private int lastSourceUseRank(SsaSourceVariable v, BasicBlock b) {
375+
result = max(int rankix | defSourceUseRank(v, b, rankix, _))
376+
}
377+
378+
/** Holds if `v` is defined or used in `b`. */
379+
private predicate varOccursInBlock(SsaSourceVariable v, BasicBlock b) {
380+
defSourceUseRank(v, b, _, _)
381+
}
382+
383+
/** Holds if `v` occurs in `b` or one of `b`'s transitive successors. */
384+
private predicate blockPrecedesVar(SsaSourceVariable v, BasicBlock b) {
385+
varOccursInBlock(v, b.getASuccessor*())
386+
}
387+
388+
/**
389+
* Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` and
390+
* in `b2` or one of its transitive successors but not in any block on the path
391+
* between `b1` and `b2`.
392+
*/
393+
private predicate varBlockReaches(SsaSourceVariable v, BasicBlock b1, BasicBlock b2) {
394+
varOccursInBlock(v, b1) and
395+
b2 = b1.getASuccessor() and
396+
blockPrecedesVar(v, b2)
397+
or
398+
exists(BasicBlock mid |
399+
varBlockReaches(v, b1, mid) and
400+
b2 = mid.getASuccessor() and
401+
not varOccursInBlock(v, mid) and
402+
blockPrecedesVar(v, b2)
403+
)
404+
}
405+
406+
/**
407+
* Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` and
408+
* `b2` but not in any block on the path between `b1` and `b2`.
409+
*/
410+
private predicate varBlockStep(SsaSourceVariable v, BasicBlock b1, BasicBlock b2) {
411+
varBlockReaches(v, b1, b2) and
412+
varOccursInBlock(v, b2)
413+
}
414+
415+
/**
416+
* Holds if `v` occurs at index `i1` in `b1` and at index `i2` in `b2` and
417+
* there is a path between them without any occurrence of `v`.
418+
*/
419+
cached
420+
predicate adjacentVarRefs(SsaSourceVariable v, BasicBlock b1, int i1, BasicBlock b2, int i2) {
421+
exists(int rankix |
422+
b1 = b2 and
423+
defSourceUseRank(v, b1, rankix, i1) and
424+
defSourceUseRank(v, b2, rankix + 1, i2)
425+
)
426+
or
427+
defSourceUseRank(v, b1, lastSourceUseRank(v, b1), i1) and
428+
varBlockStep(v, b1, b2) and
429+
defSourceUseRank(v, b2, 1, i2)
430+
}
431+
432+
/**
433+
* Holds if `use1` and `use2` form an adjacent use-use-pair of the same SSA
434+
* variable, that is, the value read in `use1` can reach `use2` without passing
435+
* through any other use or any SSA definition of the variable.
436+
*/
437+
cached
438+
predicate adjacentUseUseSameVar(ControlFlowNode use1, ControlFlowNode use2) {
439+
exists(SsaSourceVariable v, BasicBlock b1, int i1, BasicBlock b2, int i2 |
440+
adjacentVarRefs(v, b1, i1, b2, i2) and
441+
variableSourceUse(v, use1, b1, i1) and
442+
variableSourceUse(v, use2, b2, i2)
443+
)
444+
}
445+
446+
/**
447+
* Holds if the value defined at `def` can reach `use` without passing through
448+
* any other uses, but possibly through phi nodes and uncertain implicit updates.
449+
*/
450+
cached
451+
predicate firstUse(EssaDefinition def, ControlFlowNode use) {
452+
exists(SsaSourceVariable v, BasicBlock b1, int i1, BasicBlock b2, int i2 |
453+
adjacentVarRefs(v, b1, i1, b2, i2) and
454+
definesAt(def, v, b1, i1) and
455+
variableSourceUse(v, use, b2, i2)
456+
)
457+
or
458+
exists(
459+
SsaSourceVariable v, EssaDefinition redef, BasicBlock b1, int i1, BasicBlock b2, int i2
460+
|
461+
redef instanceof PhiFunction
462+
|
463+
adjacentVarRefs(v, b1, i1, b2, i2) and
464+
definesAt(def, v, b1, i1) and
465+
definesAt(redef, v, b2, i2) and
466+
firstUse(redef, use)
467+
)
468+
}
469+
470+
/**
471+
* Holds if `def` defines `v` at the specified position.
472+
* Phi nodes are placed at index -1.
473+
*/
474+
cached
475+
predicate definesAt(EssaDefinition def, SsaSourceVariable v, BasicBlock b, int i) {
476+
exists(ControlFlowNode defNode |
477+
def.(EssaNodeDefinition).definedBy(v, defNode) and
478+
variableDefine(v, defNode, b, i)
479+
)
480+
or
481+
v = def.(PhiFunction).getSourceVariable() and
482+
b = def.(PhiFunction).getBasicBlock() and
483+
i = -1
484+
}
485+
}
351486
}
352487

353488
import SsaComputeImpl::SsaDefinitionsImpl as SsaDefinitions
354489
import SsaComputeImpl::EssaDefinitionsImpl as EssaDefinitions
355490
import SsaComputeImpl::LivenessImpl as Liveness
491+
import SsaComputeImpl::AdjacentUsesImpl as AdjacentUses
356492

357493
/* This is exported primarily for testing */
358494
/*

python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ uniqueEnclosingCallable
55
| module.py:2:8:2:13 | ControlFlowNode for Str | Node should have one enclosing callable but has 0. |
66
| module.py:5:1:5:21 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
77
| module.py:5:5:5:18 | GSSA Variable dangerous_func | Node should have one enclosing callable but has 0. |
8+
| module.py:9:9:9:14 | ControlFlowNode for SOURCE | Node should have one enclosing callable but has 0. |
89
| module.py:10:1:10:5 | GSSA Variable safe2 | Node should have one enclosing callable but has 0. |
910
| module.py:10:9:10:14 | ControlFlowNode for Str | Node should have one enclosing callable but has 0. |
1011
| test.py:6:1:6:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |

python/ql/test/experimental/dataflow/tainttracking/customSanitizer/TestTaint.expected

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
test_taint
2-
| test.py:22 | fail | test_custom_sanitizer | s |
2+
| test.py:22 | ok | test_custom_sanitizer | s |
33
| test.py:36 | fail | test_custom_sanitizer_guard | s |
44
| test.py:38 | ok | test_custom_sanitizer_guard | s |
55
| test.py:49 | ok | test_escape | s2 |

python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/TestTaint.expected

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
| test_collections.py:149 | ok | list_index_aug_assign | my_list |
5151
| test_collections.py:152 | fail | list_index_aug_assign | my_list |
5252
| test_collections.py:159 | ok | list_append | my_list |
53-
| test_collections.py:162 | fail | list_append | my_list |
53+
| test_collections.py:162 | ok | list_append | my_list |
5454
| test_collections.py:169 | ok | list_extend | my_list |
5555
| test_collections.py:172 | fail | list_extend | my_list |
5656
| test_collections.py:179 | ok | dict_update_dict | my_dict |
@@ -63,7 +63,7 @@
6363
| test_collections.py:212 | fail | dict_manual_update | my_dict |
6464
| test_collections.py:220 | fail | dict_merge | merged |
6565
| test_collections.py:227 | ok | set_add | my_set |
66-
| test_collections.py:230 | fail | set_add | my_set |
66+
| test_collections.py:230 | ok | set_add | my_set |
6767
| test_json.py:26 | ok | test | json.dumps(..) |
6868
| test_json.py:27 | ok | test | json.loads(..) |
6969
| test_json.py:34 | fail | test | tainted_filelike |

0 commit comments

Comments
 (0)