Skip to content

Commit a152833

Browse files
committed
Merge branch 'python-add-source-nodes' of https://github.com/tausbn/codeql into tausbn-python-add-source-nodes
2 parents e86db3c + a9149b7 commit a152833

File tree

11 files changed

+99
-43
lines changed

11 files changed

+99
-43
lines changed

python/ql/src/semmle/python/Concepts.qll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ module HTTP {
336336
/** Gets the URL pattern for this route, if it can be statically determined. */
337337
string getUrlPattern() {
338338
exists(StrConst str |
339-
DataFlow::localFlow(DataFlow::exprNode(str), this.getUrlPatternArg()) and
339+
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getUrlPatternArg()) and
340340
result = str.getText()
341341
)
342342
}
@@ -403,7 +403,9 @@ module HTTP {
403403
/** Gets the mimetype of this HTTP response, if it can be statically determined. */
404404
string getMimetype() {
405405
exists(StrConst str |
406-
DataFlow::localFlow(DataFlow::exprNode(str), this.getMimetypeOrContentTypeArg()) and
406+
DataFlow::exprNode(str)
407+
.(DataFlow::LocalSourceNode)
408+
.flowsTo(this.getMimetypeOrContentTypeArg()) and
407409
result = str.getText().splitAt(";", 0)
408410
)
409411
or

python/ql/src/semmle/python/Exprs.qll

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -584,18 +584,40 @@ class Slice extends Slice_ {
584584
}
585585
}
586586

587+
/**
588+
* Returns all string prefixes in the database that are explicitly marked as Unicode strings.
589+
*
590+
* Helper predicate for `StrConst::isUnicode`.
591+
*/
592+
pragma[nomagic]
593+
private string unicode_prefix() {
594+
result = any(Str_ s).getPrefix() and
595+
result.charAt(_) in ["u", "U"]
596+
}
597+
598+
/**
599+
* Returns all string prefixes in the database that are _not_ explicitly marked as bytestrings.
600+
*
601+
* Helper predicate for `StrConst::isUnicode`.
602+
*/
603+
pragma[nomagic]
604+
private string non_byte_prefix() {
605+
result = any(Str_ s).getPrefix() and
606+
not result.charAt(_) in ["b", "B"]
607+
}
608+
587609
/** A string constant. */
588610
class StrConst extends Str_, ImmutableLiteral {
589611
/* syntax: "hello" */
590612
predicate isUnicode() {
591-
this.getPrefix().charAt(_) = "u"
592-
or
593-
this.getPrefix().charAt(_) = "U"
594-
or
595-
not this.getPrefix().charAt(_) = "b" and major_version() = 3
613+
this.getPrefix() = unicode_prefix()
596614
or
597-
not this.getPrefix().charAt(_) = "b" and
598-
this.getEnclosingModule().hasFromFuture("unicode_literals")
615+
this.getPrefix() = non_byte_prefix() and
616+
(
617+
major_version() = 3
618+
or
619+
this.getEnclosingModule().hasFromFuture("unicode_literals")
620+
)
599621
}
600622

601623
deprecated override string strValue() { result = this.getS() }

python/ql/src/semmle/python/dataflow/new/TypeTracker.qll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ module StepSummary {
5151
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
5252
*/
5353
cached
54-
predicate step(Node nodeFrom, Node nodeTo, StepSummary summary) {
54+
predicate step(LocalSourceNode nodeFrom, Node nodeTo, StepSummary summary) {
5555
exists(Node mid | typePreservingStep*(nodeFrom, mid) and smallstep(mid, nodeTo, summary))
5656
}
5757

@@ -82,9 +82,8 @@ module StepSummary {
8282

8383
/** Holds if it's reasonable to expect the data flow step from `nodeFrom` to `nodeTo` to preserve types. */
8484
private predicate typePreservingStep(Node nodeFrom, Node nodeTo) {
85-
EssaFlow::essaFlowStep(nodeFrom, nodeTo) or
86-
jumpStep(nodeFrom, nodeTo) or
87-
nodeFrom = nodeTo.(PostUpdateNode).getPreUpdateNode()
85+
simpleLocalFlowStep(nodeFrom, nodeTo) or
86+
jumpStep(nodeFrom, nodeTo)
8887
}
8988

9089
/**
@@ -142,11 +141,11 @@ predicate returnStep(ReturnNode nodeFrom, Node nodeTo) {
142141
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
143142
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
144143
*/
145-
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
144+
predicate basicStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string attr) {
146145
exists(AttrWrite a |
147146
a.mayHaveAttributeName(attr) and
148147
nodeFrom = a.getValue() and
149-
simpleLocalFlowStep*(nodeTo, a.getObject())
148+
nodeTo.flowsTo(a.getObject())
150149
)
151150
}
152151

python/ql/src/semmle/python/dataflow/new/internal/Attributes.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ abstract class AttrRef extends Node {
3030
predicate mayHaveAttributeName(string attrName) {
3131
attrName = this.getAttributeName()
3232
or
33-
exists(Node nodeFrom |
34-
localFlow(nodeFrom, this.getAttributeNameExpr()) and
33+
exists(LocalSourceNode nodeFrom |
34+
nodeFrom.flowsTo(this.getAttributeNameExpr()) and
3535
attrName = nodeFrom.asExpr().(StrConst).getText()
3636
)
3737
}

python/ql/src/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ module EssaFlow {
186186
* data flow. It is a strict subset of the `localFlowStep` predicate, as it
187187
* excludes SSA flow through instance fields.
188188
*/
189+
cached
189190
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
190191
// If there is ESSA-flow out of a node `node`, we want flow
191192
// both out of `node` and any post-update node of `node`.
@@ -219,12 +220,9 @@ private predicate localEssaStep(EssaNode nodeFrom, EssaNode nodeTo) {
219220
* Holds if `result` is either `node`, or the post-update node for `node`.
220221
*/
221222
private Node update(Node node) {
222-
exists(PostUpdateNode pun |
223-
node = pun.getPreUpdateNode() and
224-
result = pun
225-
)
226-
or
227223
result = node
224+
or
225+
result.(PostUpdateNode).getPreUpdateNode() = node
228226
}
229227

230228
// TODO: Make modules for these headings

python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,19 @@ class BarrierGuard extends GuardNode {
355355
}
356356
}
357357

358+
/**
359+
* A data flow node that is a source of local flow. This includes things like
360+
* - Expressions
361+
* - Function parameters
362+
*/
363+
class LocalSourceNode extends Node {
364+
LocalSourceNode() { not simpleLocalFlowStep(_, this) }
365+
366+
/** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
367+
cached
368+
predicate flowsTo(Node nodeTo) { simpleLocalFlowStep*(this, nodeTo) }
369+
}
370+
358371
/**
359372
* Algebraic datatype for tracking data content associated with values.
360373
* Content can be collection elements or object attributes.

python/ql/src/semmle/python/frameworks/Django.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1641,7 +1641,7 @@ private module Django {
16411641

16421642
DjangoRouteRegex() {
16431643
this instanceof StrConst and
1644-
DataFlow::localFlow(DataFlow::exprNode(this), rePathCall.getUrlPatternArg())
1644+
DataFlow::exprNode(this).(DataFlow::LocalSourceNode).flowsTo(rePathCall.getUrlPatternArg())
16451645
}
16461646

16471647
DjangoRegexRouteSetup getRouteSetup() { result = rePathCall }

python/ql/src/semmle/python/frameworks/Flask.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,9 +319,9 @@ private module FlaskModel {
319319
}
320320

321321
override Function getARouteHandler() {
322-
exists(DataFlow::Node view_func_arg, DataFlow::Node func_src |
322+
exists(DataFlow::Node view_func_arg, DataFlow::LocalSourceNode func_src |
323323
view_func_arg.asCfgNode() in [node.getArg(2), node.getArgByName("view_func")] and
324-
DataFlow::localFlow(func_src, view_func_arg) and
324+
func_src.flowsTo(view_func_arg) and
325325
func_src.asExpr().(CallableExpr) = result.getDefinition()
326326
)
327327
}

python/ql/src/semmle/python/objects/TObject.qll

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -229,23 +229,32 @@ predicate class_method(
229229
PointsToInternal::pointsTo(instantiation.getArg(0), context, function, _)
230230
}
231231

232+
/**
233+
* Holds if the literal corresponding to the control flow node `n` has class `cls`.
234+
*
235+
* Helper predicate for `literal_instantiation`. Prevents a bad join with
236+
* `PointsToContext::appliesTo` from occuring.
237+
*/
238+
pragma[nomagic]
239+
private predicate literal_node_class(ControlFlowNode n, ClassObjectInternal cls) {
240+
n instanceof ListNode and cls = ObjectInternal::builtin("list")
241+
or
242+
n instanceof DictNode and cls = ObjectInternal::builtin("dict")
243+
or
244+
n instanceof SetNode and cls = ObjectInternal::builtin("set")
245+
or
246+
n.getNode() instanceof ImaginaryLiteral and cls = ObjectInternal::builtin("complex")
247+
or
248+
n.getNode() instanceof ListComp and cls = ObjectInternal::builtin("list")
249+
or
250+
n.getNode() instanceof SetComp and cls = ObjectInternal::builtin("set")
251+
or
252+
n.getNode() instanceof DictComp and cls = ObjectInternal::builtin("dict")
253+
}
254+
232255
predicate literal_instantiation(ControlFlowNode n, ClassObjectInternal cls, PointsToContext context) {
233256
context.appliesTo(n) and
234-
(
235-
n instanceof ListNode and cls = ObjectInternal::builtin("list")
236-
or
237-
n instanceof DictNode and cls = ObjectInternal::builtin("dict")
238-
or
239-
n instanceof SetNode and cls = ObjectInternal::builtin("set")
240-
or
241-
n.getNode() instanceof ImaginaryLiteral and cls = ObjectInternal::builtin("complex")
242-
or
243-
n.getNode() instanceof ListComp and cls = ObjectInternal::builtin("list")
244-
or
245-
n.getNode() instanceof SetComp and cls = ObjectInternal::builtin("set")
246-
or
247-
n.getNode() instanceof DictComp and cls = ObjectInternal::builtin("dict")
248-
)
257+
literal_node_class(n, cls)
249258
}
250259

251260
predicate super_instantiation(

python/ql/src/semmle/python/regex.qll

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,19 @@ private predicate re_module_function(string name, int flags) {
1919
name = "subn" and flags = 4
2020
}
2121

22+
/**
23+
* Gets the names and corresponding values of attributes of the `re` module that are likely to be
24+
* methods taking regular expressions as arguments.
25+
*
26+
* This is a helper predicate that fixes a bad join order, and should not be inlined without checking
27+
* that this is safe.
28+
*/
29+
pragma[nomagic]
30+
private Value relevant_re_attr(string name) {
31+
result = Module::named("re").attr(name) and
32+
name != "escape"
33+
}
34+
2235
/**
2336
* Holds if `s` is used as a regex with the `re` module, with the regex-mode `mode` (if known).
2437
* If regex mode is not known, `mode` will be `"None"`.
@@ -28,8 +41,7 @@ predicate used_as_regex(Expr s, string mode) {
2841
/* Call to re.xxx(regex, ... [mode]) */
2942
exists(CallNode call, string name |
3043
call.getArg(0).pointsTo(_, _, s.getAFlowNode()) and
31-
call.getFunction().pointsTo(Module::named("re").attr(name)) and
32-
not name = "escape"
44+
call.getFunction().pointsTo(relevant_re_attr(name))
3345
|
3446
mode = "None"
3547
or

0 commit comments

Comments
 (0)