Skip to content

Commit b1946c6

Browse files
authored
Merge pull request #4127 from RasmusWL/python-tainttracking-fstring
Python: Handle f-strings in (current) taint tracking
2 parents f60abd8 + 13148b4 commit b1946c6

File tree

6 files changed

+96
-6
lines changed

6 files changed

+96
-6
lines changed

change-notes/1.25/analysis-python.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,4 @@ The following changes in version 1.25 affect Python analysis in all applications
2020
## Changes to libraries
2121

2222
* Importing `semmle.python.web.HttpRequest` will no longer import `UntrustedStringKind` transitively. `UntrustedStringKind` is the most commonly used non-abstract subclass of `ExternalStringKind`. If not imported (by one mean or another), taint-tracking queries that concern `ExternalStringKind` will not produce any results. Please ensure such queries contain an explicit import (`import semmle.python.security.strings.Untrusted`).
23+
* Added support for tainted f-strings.

python/ql/src/semmle/python/security/strings/Basic.qll

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ abstract class StringKind extends TaintKind {
2727
os_path_join(fromnode, tonode) or
2828
str_format(fromnode, tonode) or
2929
encode_decode(fromnode, tonode) or
30-
to_str(fromnode, tonode)
30+
to_str(fromnode, tonode) or
31+
f_string(fromnode, tonode)
3132
)
3233
or
3334
result = this and copy_call(fromnode, tonode)
@@ -61,13 +62,13 @@ private class StringEqualitySanitizer extends Sanitizer {
6162
}
6263
}
6364

64-
/* tonode = ....format(fromnode) */
65+
/** tonode = ....format(fromnode) */
6566
private predicate str_format(ControlFlowNode fromnode, CallNode tonode) {
6667
tonode.getFunction().(AttrNode).getName() = "format" and
6768
tonode.getAnArg() = fromnode
6869
}
6970

70-
/* tonode = codec.[en|de]code(fromnode)*/
71+
/** tonode = codec.[en|de]code(fromnode) */
7172
private predicate encode_decode(ControlFlowNode fromnode, CallNode tonode) {
7273
exists(FunctionObject func, string name |
7374
not func.getFunction().isMethod() and
@@ -81,7 +82,7 @@ private predicate encode_decode(ControlFlowNode fromnode, CallNode tonode) {
8182
)
8283
}
8384

84-
/* tonode = str(fromnode)*/
85+
/** tonode = str(fromnode) */
8586
private predicate to_str(ControlFlowNode fromnode, CallNode tonode) {
8687
tonode.getAnArg() = fromnode and
8788
(
@@ -91,7 +92,7 @@ private predicate to_str(ControlFlowNode fromnode, CallNode tonode) {
9192
)
9293
}
9394

94-
/* tonode = fromnode[:] */
95+
/** tonode = fromnode[:] */
9596
private predicate slice(ControlFlowNode fromnode, SubscriptNode tonode) {
9697
exists(Slice all |
9798
all = tonode.getIndex().getNode() and
@@ -101,12 +102,17 @@ private predicate slice(ControlFlowNode fromnode, SubscriptNode tonode) {
101102
)
102103
}
103104

104-
/* tonode = os.path.join(..., fromnode, ...) */
105+
/** tonode = os.path.join(..., fromnode, ...) */
105106
private predicate os_path_join(ControlFlowNode fromnode, CallNode tonode) {
106107
tonode = Value::named("os.path.join").getACall() and
107108
tonode.getAnArg() = fromnode
108109
}
109110

111+
/** tonode = f"... {fromnode} ..." */
112+
private predicate f_string(ControlFlowNode fromnode, ControlFlowNode tonode) {
113+
tonode.getNode().(Fstring).getAValue() = fromnode.getNode()
114+
}
115+
110116
/**
111117
* A kind of "taint", representing a dictionary mapping str->"taint"
112118
*
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import python
2+
import semmle.python.dataflow.TaintTracking
3+
import semmle.python.security.strings.Untrusted
4+
import semmle.python.security.Exceptions
5+
6+
class SimpleSource extends TaintSource {
7+
SimpleSource() { this.(NameNode).getId() = "TAINTED_STRING" }
8+
9+
override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringKind }
10+
11+
override string toString() { result = "taint source" }
12+
}
13+
14+
class ListSource extends TaintSource {
15+
ListSource() { this.(NameNode).getId() = "TAINTED_LIST" }
16+
17+
override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringSequenceKind }
18+
19+
override string toString() { result = "list taint source" }
20+
}
21+
22+
class DictSource extends TaintSource {
23+
DictSource() { this.(NameNode).getId() = "TAINTED_DICT" }
24+
25+
override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringDictKind }
26+
27+
override string toString() { result = "dict taint source" }
28+
}
29+
30+
class ExceptionInfoSource extends TaintSource {
31+
ExceptionInfoSource() { this.(NameNode).getId() = "TAINTED_EXCEPTION_INFO" }
32+
33+
override predicate isSourceOf(TaintKind kind) { kind instanceof ExceptionInfo }
34+
35+
override string toString() { result = "Exception info source" }
36+
}
37+
38+
class ExternalFileObjectSource extends TaintSource {
39+
ExternalFileObjectSource() { this.(NameNode).getId() = "TAINTED_FILE" }
40+
41+
override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalFileObject }
42+
43+
override string toString() { result = "Tainted file source" }
44+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
| test.py:4 | ok | fstring | Fstring | externally controlled string |
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import python
2+
import semmle.python.security.TaintTracking
3+
import semmle.python.web.HttpRequest
4+
import semmle.python.security.strings.Untrusted
5+
import Taint
6+
7+
from
8+
Call call, Expr arg, boolean expected_taint, boolean has_taint, string test_res,
9+
string taint_string
10+
where
11+
call.getLocation().getFile().getShortName() = "test.py" and
12+
(
13+
call.getFunc().(Name).getId() = "ensure_tainted" and
14+
expected_taint = true
15+
or
16+
call.getFunc().(Name).getId() = "ensure_not_tainted" and
17+
expected_taint = false
18+
) and
19+
arg = call.getAnArg() and
20+
(
21+
not exists(TaintedNode tainted | tainted.getAstNode() = arg) and
22+
taint_string = "<NO TAINT>" and
23+
has_taint = false
24+
or
25+
exists(TaintedNode tainted | tainted.getAstNode() = arg |
26+
taint_string = tainted.getTaintKind().toString()
27+
) and
28+
has_taint = true
29+
) and
30+
if expected_taint = has_taint then test_res = "ok " else test_res = "fail"
31+
// if expected_taint = has_taint then test_res = "✓" else test_res = "✕"
32+
select arg.getLocation().toString(), test_res, call.getScope().(Function).getName(), arg.toString(),
33+
taint_string
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
def fstring():
2+
tainted_string = TAINTED_STRING
3+
ensure_tainted(
4+
f"foo {tainted_string} bar"
5+
)

0 commit comments

Comments
 (0)