Skip to content

Commit 634041d

Browse files
authored
Merge pull request #5047 from yoff/python-dataflow-unpacking-unifying-experiments
Python: dataflow, unify iterated unpacking
2 parents bc448fe + a7ca065 commit 634041d

File tree

5 files changed

+248
-61
lines changed

5 files changed

+248
-61
lines changed

python/ql/src/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 108 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -161,15 +161,6 @@ module EssaFlow {
161161
nodeFrom.(CfgNode).getNode() =
162162
nodeTo.(EssaNode).getVar().getDefinition().(AssignmentDefinition).getValue()
163163
or
164-
// Definition
165-
// `[a, b] = iterable`
166-
// nodeFrom = `iterable`, cfg node
167-
// nodeTo = `TIterableSequence([a, b])`
168-
exists(UnpackingAssignmentDirectTarget target |
169-
nodeFrom.asExpr() = target.getValue() and
170-
nodeTo = TIterableSequenceNode(target)
171-
)
172-
or
173164
// With definition
174165
// `with f(42) as x:`
175166
// nodeFrom is `f(42)`, cfg node
@@ -210,7 +201,7 @@ module EssaFlow {
210201
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(IfExprNode).getAnOperand()
211202
or
212203
// Flow inside an unpacking assignment
213-
unpackingAssignmentFlowStep(nodeFrom, nodeTo)
204+
iterableUnpackingFlowStep(nodeFrom, nodeTo)
214205
or
215206
// Overflow keyword argument
216207
exists(CallNode call, CallableValue callable |
@@ -907,7 +898,7 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
907898
or
908899
comprehensionStoreStep(nodeFrom, c, nodeTo)
909900
or
910-
unpackingAssignmentStoreStep(nodeFrom, c, nodeTo)
901+
iterableUnpackingStoreStep(nodeFrom, c, nodeTo)
911902
or
912903
attributeStoreStep(nodeFrom, c, nodeTo)
913904
or
@@ -1041,11 +1032,11 @@ predicate kwOverflowStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node
10411032
predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
10421033
subscriptReadStep(nodeFrom, c, nodeTo)
10431034
or
1044-
unpackingAssignmentReadStep(nodeFrom, c, nodeTo)
1035+
iterableUnpackingReadStep(nodeFrom, c, nodeTo)
10451036
or
10461037
popReadStep(nodeFrom, c, nodeTo)
10471038
or
1048-
comprehensionReadStep(nodeFrom, c, nodeTo)
1039+
forReadStep(nodeFrom, c, nodeTo)
10491040
or
10501041
attributeReadStep(nodeFrom, c, nodeTo)
10511042
or
@@ -1142,9 +1133,15 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
11421133
* also transfer other content, but only tuple content is further read from `sequence` into its elements.
11431134
*
11441135
* The strategy is then via several read-, store-, and flow steps:
1145-
* 1. [Flow] Content is transferred from `iterable` to `TIterableSequence(sequence)` via a
1136+
* 1. a) [Flow] Content is transferred from `iterable` to `TIterableSequence(sequence)` via a
11461137
* flow step. From here, everything happens on the LHS.
11471138
*
1139+
* b) [Read] If the unpacking happens inside a for as in
1140+
* ```python
1141+
* for sequence in iterable
1142+
* ```
1143+
* then content is read from `iterable` to `TIterableSequence(sequence)`.
1144+
*
11481145
* 2. [Flow] Content is transferred from `TIterableSequence(sequence)` to `sequence` via a
11491146
* flow step. (Here only tuple content is relevant.)
11501147
*
@@ -1179,7 +1176,7 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
11791176
* Looking at the content propagation to `a`:
11801177
* `["a", SOURCE]`: [ListElementContent]
11811178
*
1182-
* --Step 1-->
1179+
* --Step 1a-->
11831180
*
11841181
* `TIterableSequence((a, b))`: [ListElementContent]
11851182
*
@@ -1205,7 +1202,7 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
12051202
*
12061203
* `["a", [SOURCE]]`: [ListElementContent; ListElementContent]
12071204
*
1208-
* --Step 1-->
1205+
* --Step 1a-->
12091206
*
12101207
* `TIterableSequence((a, [b, *c]))`: [ListElementContent; ListElementContent]
12111208
*
@@ -1237,14 +1234,54 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
12371234
*
12381235
* `c`: [ListElementContent]
12391236
*/
1240-
module UnpackingAssignment {
1237+
module IterableUnpacking {
1238+
/**
1239+
* The target of a `for`, e.g. `x` in `for x in list` or in `[42 for x in list]`.
1240+
* This class also records the source, which in both above cases is `list`.
1241+
* This class abstracts away the differing representations of comprehensions and
1242+
* for statements.
1243+
*/
1244+
class ForTarget extends ControlFlowNode {
1245+
Expr source;
1246+
1247+
ForTarget() {
1248+
exists(For for |
1249+
source = for.getIter() and
1250+
this.getNode() = for.getTarget() and
1251+
not for = any(Comp comp).getNthInnerLoop(0)
1252+
)
1253+
or
1254+
exists(Comp comp |
1255+
source = comp.getIterable() and
1256+
this.getNode() = comp.getNthInnerLoop(0).getTarget()
1257+
)
1258+
}
1259+
1260+
Expr getSource() { result = source }
1261+
}
1262+
1263+
/** The LHS of an assignment, it also records the assigned value. */
1264+
class AssignmentTarget extends ControlFlowNode {
1265+
Expr value;
1266+
1267+
AssignmentTarget() {
1268+
exists(Assign assign | this.getNode() = assign.getATarget() | value = assign.getValue())
1269+
}
1270+
1271+
Expr getValue() { result = value }
1272+
}
1273+
12411274
/** A direct (or top-level) target of an unpacking assignment. */
12421275
class UnpackingAssignmentDirectTarget extends ControlFlowNode {
12431276
Expr value;
12441277

12451278
UnpackingAssignmentDirectTarget() {
12461279
this instanceof SequenceNode and
1247-
exists(Assign assign | this.getNode() = assign.getATarget() | value = assign.getValue())
1280+
(
1281+
value = this.(AssignmentTarget).getValue()
1282+
or
1283+
value = this.(ForTarget).getSource()
1284+
)
12481285
}
12491286

12501287
Expr getValue() { result = value }
@@ -1268,11 +1305,39 @@ module UnpackingAssignment {
12681305
ControlFlowNode getAnElement() { result = this.getElement(_) }
12691306
}
12701307

1308+
/**
1309+
* Step 1a
1310+
* Data flows from `iterable` to `TIterableSequence(sequence)`
1311+
*/
1312+
predicate iterableUnpackingAssignmentFlowStep(Node nodeFrom, Node nodeTo) {
1313+
exists(AssignmentTarget target |
1314+
nodeFrom.asExpr() = target.getValue() and
1315+
nodeTo = TIterableSequenceNode(target)
1316+
)
1317+
}
1318+
1319+
/**
1320+
* Step 1b
1321+
* Data is read from `iterable` to `TIterableSequence(sequence)`
1322+
*/
1323+
predicate iterableUnpackingForReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
1324+
exists(ForTarget target |
1325+
nodeFrom.asExpr() = target.getSource() and
1326+
target instanceof SequenceNode and
1327+
nodeTo = TIterableSequenceNode(target)
1328+
) and
1329+
(
1330+
c instanceof ListElementContent
1331+
or
1332+
c instanceof SetElementContent
1333+
)
1334+
}
1335+
12711336
/**
12721337
* Step 2
12731338
* Data flows from `TIterableSequence(sequence)` to `sequence`
12741339
*/
1275-
predicate unpackingAssignmentFlowStep(Node nodeFrom, Node nodeTo) {
1340+
predicate iterableUnpackingTupleFlowStep(Node nodeFrom, Node nodeTo) {
12761341
exists(UnpackingAssignmentSequenceTarget target |
12771342
nodeFrom = TIterableSequenceNode(target) and
12781343
nodeTo.asCfgNode() = target
@@ -1285,7 +1350,7 @@ module UnpackingAssignment {
12851350
* As `sequence` is modeled as a tuple, we will not read tuple content as that would allow
12861351
* crosstalk.
12871352
*/
1288-
predicate unpackingAssignmentConvertingReadStep(Node nodeFrom, Content c, Node nodeTo) {
1353+
predicate iterableUnpackingConvertingReadStep(Node nodeFrom, Content c, Node nodeTo) {
12891354
exists(UnpackingAssignmentSequenceTarget target |
12901355
nodeFrom = TIterableSequenceNode(target) and
12911356
nodeTo = TIterableElementNode(target) and
@@ -1304,7 +1369,7 @@ module UnpackingAssignment {
13041369
* Content type is `TupleElementContent` with indices taken from the syntax.
13051370
* For instance, if `sequence` is `(a, *b, c)`, content is written to index 0, 1, and 2.
13061371
*/
1307-
predicate unpackingAssignmentConvertingStoreStep(Node nodeFrom, Content c, Node nodeTo) {
1372+
predicate iterableUnpackingConvertingStoreStep(Node nodeFrom, Content c, Node nodeTo) {
13081373
exists(UnpackingAssignmentSequenceTarget target |
13091374
nodeFrom = TIterableElementNode(target) and
13101375
nodeTo.asCfgNode() = target and
@@ -1324,7 +1389,7 @@ module UnpackingAssignment {
13241389
*
13251390
* c) If the element is a starred variable, with control-flow node `v`, `toNode` is `TIterableElement(v)`.
13261391
*/
1327-
predicate unpackingAssignmentElementReadStep(Node nodeFrom, Content c, Node nodeTo) {
1392+
predicate iterableUnpackingElementReadStep(Node nodeFrom, Content c, Node nodeTo) {
13281393
exists(
13291394
UnpackingAssignmentSequenceTarget target, int index, ControlFlowNode element, int starIndex
13301395
|
@@ -1366,7 +1431,7 @@ module UnpackingAssignment {
13661431
* Data flows from `TIterableElement(v)` to the essa variable for `v`, with
13671432
* content type `ListElementContent`.
13681433
*/
1369-
predicate unpackingAssignmentStarredElementStoreStep(Node nodeFrom, Content c, Node nodeTo) {
1434+
predicate iterableUnpackingStarredElementStoreStep(Node nodeFrom, Content c, Node nodeTo) {
13701435
exists(ControlFlowNode starred | starred.getNode() instanceof Starred |
13711436
nodeFrom = TIterableElementNode(starred) and
13721437
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = starred and
@@ -1375,21 +1440,30 @@ module UnpackingAssignment {
13751440
}
13761441

13771442
/** All read steps associated with unpacking assignment. */
1378-
predicate unpackingAssignmentReadStep(Node nodeFrom, Content c, Node nodeTo) {
1379-
unpackingAssignmentElementReadStep(nodeFrom, c, nodeTo)
1443+
predicate iterableUnpackingReadStep(Node nodeFrom, Content c, Node nodeTo) {
1444+
iterableUnpackingForReadStep(nodeFrom, c, nodeTo)
1445+
or
1446+
iterableUnpackingElementReadStep(nodeFrom, c, nodeTo)
13801447
or
1381-
unpackingAssignmentConvertingReadStep(nodeFrom, c, nodeTo)
1448+
iterableUnpackingConvertingReadStep(nodeFrom, c, nodeTo)
13821449
}
13831450

13841451
/** All store steps associated with unpacking assignment. */
1385-
predicate unpackingAssignmentStoreStep(Node nodeFrom, Content c, Node nodeTo) {
1386-
unpackingAssignmentStarredElementStoreStep(nodeFrom, c, nodeTo)
1452+
predicate iterableUnpackingStoreStep(Node nodeFrom, Content c, Node nodeTo) {
1453+
iterableUnpackingStarredElementStoreStep(nodeFrom, c, nodeTo)
1454+
or
1455+
iterableUnpackingConvertingStoreStep(nodeFrom, c, nodeTo)
1456+
}
1457+
1458+
/** All flow steps associated with unpacking assignment. */
1459+
predicate iterableUnpackingFlowStep(Node nodeFrom, Node nodeTo) {
1460+
iterableUnpackingAssignmentFlowStep(nodeFrom, nodeTo)
13871461
or
1388-
unpackingAssignmentConvertingStoreStep(nodeFrom, c, nodeTo)
1462+
iterableUnpackingTupleFlowStep(nodeFrom, nodeTo)
13891463
}
13901464
}
13911465

1392-
import UnpackingAssignment
1466+
import IterableUnpacking
13931467

13941468
/** Data flows from a sequence to a call to `pop` on the sequence. */
13951469
predicate popReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
@@ -1425,25 +1499,10 @@ predicate popReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
14251499
)
14261500
}
14271501

1428-
/** Data flows from a iterated sequence to the variable iterating over the sequence. */
1429-
predicate comprehensionReadStep(CfgNode nodeFrom, Content c, EssaNode nodeTo) {
1430-
// Comprehension
1431-
// `[x+1 for x in l]`
1432-
// nodeFrom is `l`, cfg node
1433-
// nodeTo is `x`, essa var
1434-
// c denotes element of list or set
1435-
exists(Comp comp |
1436-
// outermost for
1437-
nodeFrom.getNode().getNode() = comp.getIterable() and
1438-
nodeTo.getVar().getDefinition().(AssignmentDefinition).getDefiningNode().getNode() =
1439-
comp.getIterationVariable(0).getAStore()
1440-
or
1441-
// an inner for
1442-
exists(int n | n > 0 |
1443-
nodeFrom.getNode().getNode() = comp.getNthInnerLoop(n).getIter() and
1444-
nodeTo.getVar().getDefinition().(AssignmentDefinition).getDefiningNode().getNode() =
1445-
comp.getNthInnerLoop(n).getTarget()
1446-
)
1502+
predicate forReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
1503+
exists(ForTarget target |
1504+
nodeFrom.asExpr() = target.getSource() and
1505+
nodeTo.asVar().(EssaNodeDefinition).getDefiningNode() = target
14471506
) and
14481507
(
14491508
c instanceof ListElementContent

0 commit comments

Comments
 (0)