Skip to content

Commit 000fa33

Browse files
authored
Merge pull request #4013 from yoff/SharedDataflow_SequenceFlow
Python: Shared dataflow: Content flow
2 parents 92c97b1 + 2608509 commit 000fa33

File tree

5 files changed

+437
-28
lines changed

5 files changed

+437
-28
lines changed

python/ql/src/experimental/dataflow/internal/DataFlowPrivate.qll

Lines changed: 182 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -277,15 +277,193 @@ predicate jumpStep(Node pred, Node succ) {
277277
// Field flow
278278
//--------
279279
/**
280-
* Holds if data can flow from `node1` to `node2` via an assignment to
280+
* Holds if data can flow from `nodeFrom` to `nodeTo` via an assignment to
281281
* content `c`.
282282
*/
283-
predicate storeStep(Node node1, Content c, Node node2) { none() }
283+
predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
284+
listStoreStep(nodeFrom, c, nodeTo)
285+
or
286+
setStoreStep(nodeFrom, c, nodeTo)
287+
or
288+
tupleStoreStep(nodeFrom, c, nodeTo)
289+
or
290+
dictStoreStep(nodeFrom, c, nodeTo)
291+
or
292+
comprehensionStoreStep(nodeFrom, c, nodeTo)
293+
}
294+
295+
/** Data flows from an element of a list to the list. */
296+
predicate listStoreStep(CfgNode nodeFrom, ListElementContent c, CfgNode nodeTo) {
297+
// List
298+
// `[..., 42, ...]`
299+
// nodeFrom is `42`, cfg node
300+
// nodeTo is the list, `[..., 42, ...]`, cfg node
301+
// c denotes element of list
302+
nodeTo.getNode().(ListNode).getAnElement() = nodeFrom.getNode()
303+
}
304+
305+
/** Data flows from an element of a set to the set. */
306+
predicate setStoreStep(CfgNode nodeFrom, ListElementContent c, CfgNode nodeTo) {
307+
// Set
308+
// `{..., 42, ...}`
309+
// nodeFrom is `42`, cfg node
310+
// nodeTo is the set, `{..., 42, ...}`, cfg node
311+
// c denotes element of list
312+
nodeTo.getNode().(SetNode).getAnElement() = nodeFrom.getNode()
313+
}
314+
315+
/** Data flows from an element of a tuple to the tuple at a specific index. */
316+
predicate tupleStoreStep(CfgNode nodeFrom, TupleElementContent c, CfgNode nodeTo) {
317+
// Tuple
318+
// `(..., 42, ...)`
319+
// nodeFrom is `42`, cfg node
320+
// nodeTo is the tuple, `(..., 42, ...)`, cfg node
321+
// c denotes element of tuple and index of nodeFrom
322+
exists(int n |
323+
nodeTo.getNode().(TupleNode).getElement(n) = nodeFrom.getNode() and
324+
c.getIndex() = n
325+
)
326+
}
327+
328+
/** Data flows from an element of a dictionary to the dictionary at a specific key. */
329+
predicate dictStoreStep(CfgNode nodeFrom, DictionaryElementContent c, CfgNode nodeTo) {
330+
// Dictionary
331+
// `{..., "key" = 42, ...}`
332+
// nodeFrom is `42`, cfg node
333+
// nodeTo is the dict, `{..., "key" = 42, ...}`, cfg node
334+
// c denotes element of dictionary and the key `"key"`
335+
exists(KeyValuePair item |
336+
item = nodeTo.getNode().(DictNode).getNode().(Dict).getAnItem() and
337+
nodeFrom.getNode().getNode() = item.getValue() and
338+
c.getKey() = item.getKey().(StrConst).getS()
339+
)
340+
}
341+
342+
/** Data flows from an element expression in a comprehension to the comprehension. */
343+
predicate comprehensionStoreStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
344+
// Comprehension
345+
// `[x+1 for x in l]`
346+
// nodeFrom is `x+1`, cfg node
347+
// nodeTo is `[x+1 for x in l]`, cfg node
348+
// c denotes list or set or dictionary without index
349+
//
350+
// List
351+
nodeTo.getNode().getNode().(ListComp).getElt() = nodeFrom.getNode().getNode() and
352+
c instanceof ListElementContent
353+
or
354+
// Set
355+
nodeTo.getNode().getNode().(SetComp).getElt() = nodeFrom.getNode().getNode() and
356+
c instanceof SetElementContent
357+
or
358+
// Dictionary
359+
nodeTo.getNode().getNode().(DictComp).getElt() = nodeFrom.getNode().getNode() and
360+
c instanceof DictionaryElementAnyContent
361+
}
284362

285363
/**
286-
* Holds if data can flow from `node1` to `node2` via a read of content `c`.
364+
* Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
287365
*/
288-
predicate readStep(Node node1, Content c, Node node2) { none() }
366+
predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
367+
subscriptReadStep(nodeFrom, c, nodeTo)
368+
or
369+
popReadStep(nodeFrom, c, nodeTo)
370+
or
371+
comprehensionReadStep(nodeFrom, c, nodeTo)
372+
}
373+
374+
/** Data flows from a sequence to a subscript of the sequence. */
375+
predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
376+
// Subscript
377+
// `l[3]`
378+
// nodeFrom is `l`, cfg node
379+
// nodeTo is `l[3]`, cfg node
380+
// c is compatible with 3
381+
nodeFrom.getNode() = nodeTo.getNode().(SubscriptNode).getObject() and
382+
(
383+
c instanceof ListElementContent
384+
or
385+
c instanceof SetElementContent
386+
or
387+
c instanceof DictionaryElementAnyContent
388+
or
389+
c.(TupleElementContent).getIndex() =
390+
nodeTo.getNode().(SubscriptNode).getIndex().getNode().(IntegerLiteral).getValue()
391+
or
392+
c.(DictionaryElementContent).getKey() =
393+
nodeTo.getNode().(SubscriptNode).getIndex().getNode().(StrConst).getS()
394+
)
395+
}
396+
397+
/** Data flows from a sequence to a call to `pop` on the sequence. */
398+
predicate popReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
399+
// set.pop or list.pop
400+
// `s.pop()`
401+
// nodeFrom is `s`, cfg node
402+
// nodeTo is `s.pop()`, cfg node
403+
// c denotes element of list or set
404+
exists(CallNode call, AttrNode a |
405+
call.getFunction() = a and
406+
a.getName() = "pop" and // Should match appropriate call since we tracked a sequence here.
407+
not exists(call.getAnArg()) and
408+
nodeFrom.getNode() = a.getObject() and
409+
nodeTo.getNode() = call and
410+
(
411+
c instanceof ListElementContent
412+
or
413+
c instanceof SetElementContent
414+
)
415+
)
416+
or
417+
// dict.pop
418+
// `d.pop("key")`
419+
// nodeFrom is `d`, cfg node
420+
// nodeTo is `d.pop("key")`, cfg node
421+
// c denotes the key `"key"`
422+
exists(CallNode call, AttrNode a |
423+
call.getFunction() = a and
424+
a.getName() = "pop" and // Should match appropriate call since we tracked a dictionary here.
425+
nodeFrom.getNode() = a.getObject() and
426+
nodeTo.getNode() = call and
427+
c.(DictionaryElementContent).getKey() = call.getArg(0).getNode().(StrConst).getS()
428+
)
429+
}
430+
431+
/** Data flows from a iterated sequence to the variable iterating over the sequence. */
432+
predicate comprehensionReadStep(CfgNode nodeFrom, Content c, EssaNode nodeTo) {
433+
// Comprehension
434+
// `[x+1 for x in l]`
435+
// nodeFrom is `l`, cfg node
436+
// nodeTo is `x`, essa var
437+
// c denotes element of list or set
438+
exists(For f, Comp comp |
439+
f = getCompFor(comp) and
440+
nodeFrom.getNode().getNode() = getCompIter(comp) and
441+
nodeTo.getVar().getDefinition().(AssignmentDefinition).getDefiningNode().getNode() =
442+
f.getTarget() and
443+
(
444+
c instanceof ListElementContent
445+
or
446+
c instanceof SetElementContent
447+
)
448+
)
449+
}
450+
451+
/** This seems to compensate for extractor shortcomings */
452+
For getCompFor(Comp c) {
453+
c.contains(result) and
454+
c.getFunction() = result.getScope()
455+
}
456+
457+
/** This seems to compensate for extractor shortcomings */
458+
AstNode getCompIter(Comp c) {
459+
c.contains(result) and
460+
c.getScope() = result.getScope() and
461+
not result = c.getFunction() and
462+
not exists(AstNode between |
463+
c.contains(between) and
464+
between.contains(result)
465+
)
466+
}
289467

290468
/**
291469
* Holds if values stored inside content `c` are cleared at node `n`. For example,

python/ql/src/experimental/dataflow/internal/DataFlowPublic.qll

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,15 @@ class Node extends TNode {
3333
/** Gets the scope of this node. */
3434
Scope getScope() { none() }
3535

36+
private DataFlowCallable getCallableScope(Scope s) {
37+
result.getScope() = s
38+
or
39+
not exists(DataFlowCallable c | c.getScope() = s) and
40+
result = getCallableScope(s.getEnclosingScope())
41+
}
42+
3643
/** Gets the enclosing callable of this node. */
37-
DataFlowCallable getEnclosingCallable() { result.getScope() = this.getScope() }
44+
DataFlowCallable getEnclosingCallable() { result = getCallableScope(this.getScope()) }
3845

3946
/** Gets the location of this node */
4047
Location getLocation() { none() }
@@ -138,6 +145,62 @@ class BarrierGuard extends Expr {
138145
/**
139146
* A reference contained in an object. This is either a field or a property.
140147
*/
141-
class Content extends string {
142-
Content() { this = "Content" }
148+
newtype TContent =
149+
/** An element of a list. */
150+
TListElementContent() or
151+
/** An element of a set. */
152+
TSetElementContent() or
153+
/** An element of a tuple at a specifik index. */
154+
TTupleElementContent(int index) { exists(any(TupleNode tn).getElement(index)) } or
155+
/** An element of a dictionary under a specific key. */
156+
TDictionaryElementContent(string key) {
157+
key = any(KeyValuePair kvp).getKey().(StrConst).getS()
158+
or
159+
key = any(Keyword kw).getArg()
160+
} or
161+
/** An element of a dictionary at any key. */
162+
TDictionaryElementAnyContent()
163+
164+
class Content extends TContent {
165+
/** Gets a textual representation of this element. */
166+
string toString() { result = "Content" }
167+
}
168+
169+
class ListElementContent extends TListElementContent, Content {
170+
/** Gets a textual representation of this element. */
171+
override string toString() { result = "List element" }
172+
}
173+
174+
class SetElementContent extends TSetElementContent, Content {
175+
/** Gets a textual representation of this element. */
176+
override string toString() { result = "Set element" }
177+
}
178+
179+
class TupleElementContent extends TTupleElementContent, Content {
180+
int index;
181+
182+
TupleElementContent() { this = TTupleElementContent(index) }
183+
184+
/** Gets the index for this tuple element */
185+
int getIndex() { result = index }
186+
187+
/** Gets a textual representation of this element. */
188+
override string toString() { result = "Tuple element at index " + index.toString() }
189+
}
190+
191+
class DictionaryElementContent extends TDictionaryElementContent, Content {
192+
string key;
193+
194+
DictionaryElementContent() { this = TDictionaryElementContent(key) }
195+
196+
/** Gets the index for this tuple element */
197+
string getKey() { result = key }
198+
199+
/** Gets a textual representation of this element. */
200+
override string toString() { result = "Dictionary element at key " + key }
201+
}
202+
203+
class DictionaryElementAnyContent extends TDictionaryElementAnyContent, Content {
204+
/** Gets a textual representation of this element. */
205+
override string toString() { result = "Any dictionary element" }
143206
}

python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ uniquePostUpdate
8888
postIsInSameCallable
8989
reverseRead
9090
storeIsPostUpdate
91+
| test.py:152:9:152:16 | ControlFlowNode for List | Store targets should be PostUpdateNodes. |
92+
| test.py:153:9:153:24 | ControlFlowNode for Dict | Store targets should be PostUpdateNodes. |
9193
argHasPostUpdate
9294
| test.py:25:10:25:10 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
9395
| test.py:29:10:29:10 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |

0 commit comments

Comments
 (0)