Skip to content

Commit 83ba8c9

Browse files
committed
Python: Add LocalSourceNode and flowsTo
This fixes the major performance problem with type tracking on some (pathological) databases. The interface could probably be improved a bit. In particular, I'm thinking that we might want to have `DataFlow::exprNode` return a `LocalSourceNode` so that a cast isn't necessary in order to use `flowsTo`. I have added two `cached` annotations. The one on `flowsTo` is crucial, as performance regresses without it. The one on `simpleLocalFlowStep` may not be needed, but Java has a similar annotation, and to me it makes sense to have this relation cached.
1 parent 104ff5d commit 83ba8c9

File tree

7 files changed

+28
-13
lines changed

7 files changed

+28
-13
lines changed

python/ql/src/semmle/python/Concepts.qll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ module HTTP {
336336
/** Gets the URL pattern for this route, if it can be statically determined. */
337337
string getUrlPattern() {
338338
exists(StrConst str |
339-
DataFlow::localFlow(DataFlow::exprNode(str), this.getUrlPatternArg()) and
339+
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getUrlPatternArg()) and
340340
result = str.getText()
341341
)
342342
}
@@ -403,7 +403,9 @@ module HTTP {
403403
/** Gets the mimetype of this HTTP response, if it can be statically determined. */
404404
string getMimetype() {
405405
exists(StrConst str |
406-
DataFlow::localFlow(DataFlow::exprNode(str), this.getMimetypeOrContentTypeArg()) and
406+
DataFlow::exprNode(str)
407+
.(DataFlow::LocalSourceNode)
408+
.flowsTo(this.getMimetypeOrContentTypeArg()) and
407409
result = str.getText().splitAt(";", 0)
408410
)
409411
or

python/ql/src/semmle/python/dataflow/new/TypeTracker.qll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class StepSummary extends TStepSummary {
4646

4747
module StepSummary {
4848
cached
49-
predicate step(Node nodeFrom, Node nodeTo, StepSummary summary) {
49+
predicate step(LocalSourceNode nodeFrom, Node nodeTo, StepSummary summary) {
5050
exists(Node mid | typePreservingStep*(nodeFrom, mid) and smallstep(mid, nodeTo, summary))
5151
}
5252

@@ -70,9 +70,8 @@ module StepSummary {
7070

7171
/** Holds if it's reasonable to expect the data flow step from `nodeFrom` to `nodeTo` to preserve types. */
7272
private predicate typePreservingStep(Node nodeFrom, Node nodeTo) {
73-
EssaFlow::essaFlowStep(nodeFrom, nodeTo) or
74-
jumpStep(nodeFrom, nodeTo) or
75-
nodeFrom = nodeTo.(PostUpdateNode).getPreUpdateNode()
73+
simpleLocalFlowStep(nodeFrom, nodeTo) or
74+
jumpStep(nodeFrom, nodeTo)
7675
}
7776

7877
/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
@@ -115,11 +114,11 @@ predicate returnStep(ReturnNode nodeFrom, Node nodeTo) {
115114
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
116115
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
117116
*/
118-
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
117+
predicate basicStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string attr) {
119118
exists(AttrWrite a |
120119
a.mayHaveAttributeName(attr) and
121120
nodeFrom = a.getValue() and
122-
simpleLocalFlowStep*(nodeTo, a.getObject())
121+
nodeTo.flowsTo(a.getObject())
123122
)
124123
}
125124

python/ql/src/semmle/python/dataflow/new/internal/Attributes.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ abstract class AttrRef extends Node {
3030
predicate mayHaveAttributeName(string attrName) {
3131
attrName = this.getAttributeName()
3232
or
33-
exists(Node nodeFrom |
34-
localFlow(nodeFrom, this.getAttributeNameExpr()) and
33+
exists(LocalSourceNode nodeFrom |
34+
nodeFrom.flowsTo(this.getAttributeNameExpr()) and
3535
attrName = nodeFrom.asExpr().(StrConst).getText()
3636
)
3737
}

python/ql/src/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ module EssaFlow {
181181
* data flow. It is a strict subset of the `localFlowStep` predicate, as it
182182
* excludes SSA flow through instance fields.
183183
*/
184+
cached
184185
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
185186
// If there is ESSA-flow out of a node `node`, we want flow
186187
// both out of `node` and any post-update node of `node`.

python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,19 @@ class BarrierGuard extends GuardNode {
351351
}
352352
}
353353

354+
/**
355+
* A data flow node that is a source of local flow. This includes things like
356+
* - Expressions
357+
* - Function parameters
358+
*/
359+
class LocalSourceNode extends Node {
360+
LocalSourceNode() { not simpleLocalFlowStep(_, this) }
361+
362+
/** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
363+
cached
364+
predicate flowsTo(Node nodeTo) { simpleLocalFlowStep*(this, nodeTo) }
365+
}
366+
354367
/**
355368
* A reference contained in an object. This is either a field or a property.
356369
*/

python/ql/src/semmle/python/frameworks/Django.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1641,7 +1641,7 @@ private module Django {
16411641

16421642
DjangoRouteRegex() {
16431643
this instanceof StrConst and
1644-
DataFlow::localFlow(DataFlow::exprNode(this), rePathCall.getUrlPatternArg())
1644+
DataFlow::exprNode(this).(DataFlow::LocalSourceNode).flowsTo(rePathCall.getUrlPatternArg())
16451645
}
16461646

16471647
DjangoRegexRouteSetup getRouteSetup() { result = rePathCall }

python/ql/src/semmle/python/frameworks/Flask.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,9 +319,9 @@ private module FlaskModel {
319319
}
320320

321321
override Function getARouteHandler() {
322-
exists(DataFlow::Node view_func_arg, DataFlow::Node func_src |
322+
exists(DataFlow::Node view_func_arg, DataFlow::LocalSourceNode func_src |
323323
view_func_arg.asCfgNode() in [node.getArg(2), node.getArgByName("view_func")] and
324-
DataFlow::localFlow(func_src, view_func_arg) and
324+
func_src.flowsTo(view_func_arg) and
325325
func_src.asExpr().(CallableExpr) = result.getDefinition()
326326
)
327327
}

0 commit comments

Comments
 (0)