Skip to content

Commit 551ae42

Browse files
committed
Merge branch 'main' of github.com:github/codeql into SharedDataflow_NestedComprehensions
2 parents d67f57a + 000fa33 commit 551ae42

File tree

10 files changed

+490
-7
lines changed

10 files changed

+490
-7
lines changed

python/ql/src/experimental/dataflow/internal/TaintTrackingPrivate.qll

Lines changed: 105 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,6 @@ private import experimental.dataflow.DataFlow
33
private import experimental.dataflow.internal.DataFlowPrivate
44
private import experimental.dataflow.internal.TaintTrackingPublic
55

6-
/**
7-
* Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding
8-
* local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent
9-
* different objects.
10-
*/
11-
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { none() }
12-
136
/**
147
* Holds if `node` should be a barrier in all global taint flow configurations
158
* but not in local taint.
@@ -25,3 +18,108 @@ predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nod
2518
or
2619
any(AdditionalTaintStep a).step(nodeFrom, nodeTo)
2720
}
21+
22+
/**
23+
* Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding
24+
* local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent
25+
* different objects.
26+
*/
27+
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
28+
concatStep(nodeFrom, nodeTo)
29+
or
30+
subscriptStep(nodeFrom, nodeTo)
31+
or
32+
stringManipulation(nodeFrom, nodeTo)
33+
}
34+
35+
/**
36+
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to concatenation.
37+
*
38+
* Note that since we cannot easily distinguish interesting types (like string, list, tuple),
39+
* we consider any `+` operation to propagate taint. After consulting with the JS team, this
40+
* doesn't sound like it is a big problem in practice.
41+
*/
42+
predicate concatStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
43+
exists(BinaryExprNode add | add = nodeTo.getNode() |
44+
add.getOp() instanceof Add and add.getAnOperand() = nodeFrom.getNode()
45+
)
46+
}
47+
48+
/**
49+
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to subscripting.
50+
*/
51+
predicate subscriptStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
52+
nodeTo.getNode().(SubscriptNode).getObject() = nodeFrom.getNode()
53+
}
54+
55+
/**
56+
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to string
57+
* manipulation.
58+
*
59+
* Note that since we cannot easily distinguish when something is a string, this can
60+
* also make taint flow on `<non string>.replace(foo, bar)`.
61+
*/
62+
predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
63+
// transforming something tainted into a string will make the string tainted
64+
exists(CallNode call | call = nodeTo.getNode() |
65+
call.getFunction().(NameNode).getId() in ["str", "bytes", "unicode"] and
66+
(
67+
nodeFrom.getNode() = call.getArg(0)
68+
or
69+
nodeFrom.getNode() = call.getArgByName("object")
70+
)
71+
)
72+
or
73+
// String methods. Note that this doesn't recognize `meth = "foo".upper; meth()`
74+
exists(CallNode call, string method_name, ControlFlowNode object |
75+
call = nodeTo.getNode() and
76+
object = call.getFunction().(AttrNode).getObject(method_name)
77+
|
78+
nodeFrom.getNode() = object and
79+
method_name in ["capitalize", "casefold", "center", "expandtabs", "format", "format_map",
80+
"join", "ljust", "lstrip", "lower", "replace", "rjust", "rstrip", "strip", "swapcase",
81+
"title", "upper", "zfill", "encode", "decode"]
82+
or
83+
method_name = "replace" and
84+
nodeFrom.getNode() = call.getArg(1)
85+
or
86+
method_name = "format" and
87+
nodeFrom.getNode() = call.getAnArg()
88+
or
89+
// str -> List[str]
90+
// TODO: check if these should be handled differently in regards to content
91+
nodeFrom.getNode() = object and
92+
method_name in ["partition", "rpartition", "rsplit", "split", "splitlines"]
93+
or
94+
// List[str] -> str
95+
// TODO: check if these should be handled differently in regards to content
96+
method_name = "join" and
97+
nodeFrom.getNode() = call.getArg(0)
98+
or
99+
// Mapping[str, Any] -> str
100+
method_name = "format_map" and
101+
nodeFrom.getNode() = call.getArg(0)
102+
)
103+
or
104+
// % formatting
105+
exists(BinaryExprNode fmt | fmt = nodeTo.getNode() |
106+
fmt.getOp() instanceof Mod and
107+
(
108+
fmt.getLeft() = nodeFrom.getNode()
109+
or
110+
fmt.getRight() = nodeFrom.getNode()
111+
)
112+
)
113+
or
114+
// string multiplication -- `"foo" * 10`
115+
exists(BinaryExprNode mult | mult = nodeTo.getNode() |
116+
mult.getOp() instanceof Mult and
117+
mult.getLeft() = nodeFrom.getNode()
118+
)
119+
or
120+
// f-strings
121+
nodeTo.getNode().getNode().(Fstring).getAValue() = nodeFrom.getNode().getNode()
122+
// TODO: Handle encode/decode from base64/quopri
123+
// TODO: Handle os.path.join
124+
// TODO: Handle functions in https://docs.python.org/3/library/binascii.html
125+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
semmle-extractor-options: --max-import-depth=1
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import python
2+
import experimental.dataflow.TaintTracking
3+
import experimental.dataflow.DataFlow
4+
5+
class TestTaintTrackingConfiguration extends TaintTracking::Configuration {
6+
TestTaintTrackingConfiguration() { this = "TestTaintTrackingConfiguration" }
7+
8+
override predicate isSource(DataFlow::Node source) {
9+
source.(DataFlow::CfgNode).getNode().(NameNode).getId() in ["TAINTED_STRING", "TAINTED_BYTES"]
10+
}
11+
12+
override predicate isSink(DataFlow::Node sink) {
13+
exists(CallNode call |
14+
call.getFunction().(NameNode).getId() in ["ensure_tainted", "ensure_not_tainted"] and
15+
sink.(DataFlow::CfgNode).getNode() = call.getAnArg()
16+
)
17+
}
18+
}
19+
20+
private string repr(Expr e) {
21+
not e instanceof Num and
22+
not e instanceof StrConst and
23+
not e instanceof Subscript and
24+
not e instanceof Call and
25+
not e instanceof Attribute and
26+
result = e.toString()
27+
or
28+
result = e.(Num).getN()
29+
or
30+
result =
31+
e.(StrConst).getPrefix() + e.(StrConst).getText() +
32+
e.(StrConst).getPrefix().regexpReplaceAll("[a-zA-Z]+", "")
33+
or
34+
result = repr(e.(Subscript).getObject()) + "[" + repr(e.(Subscript).getIndex()) + "]"
35+
or
36+
(
37+
if exists(e.(Call).getAnArg()) or exists(e.(Call).getANamedArg())
38+
then result = repr(e.(Call).getFunc()) + "(..)"
39+
else result = repr(e.(Call).getFunc()) + "()"
40+
)
41+
or
42+
result = repr(e.(Attribute).getObject()) + "." + e.(Attribute).getName()
43+
}
44+
45+
query predicate test_taint(string arg_location, string test_res, string function_name, string repr) {
46+
exists(Call call, Expr arg, boolean expected_taint, boolean has_taint |
47+
call.getLocation().getFile().getShortName() = "test.py" and
48+
(
49+
call.getFunc().(Name).getId() = "ensure_tainted" and
50+
expected_taint = true
51+
or
52+
call.getFunc().(Name).getId() = "ensure_not_tainted" and
53+
expected_taint = false
54+
) and
55+
arg = call.getAnArg() and
56+
(
57+
// TODO: Replace with `hasFlowToExpr` once that is working
58+
if
59+
exists(TaintTracking::Configuration c |
60+
c.hasFlowTo(any(DataFlow::Node n | n.(DataFlow::CfgNode).getNode() = arg.getAFlowNode()))
61+
)
62+
then has_taint = true
63+
else has_taint = false
64+
) and
65+
(if expected_taint = has_taint then test_res = "ok " else test_res = "fail") and
66+
// select
67+
arg_location = arg.getLocation().toString() and
68+
test_res = test_res and
69+
function_name = call.getScope().(Function).getName() and
70+
repr = repr(arg)
71+
)
72+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
| test.py:26 | ok | str_methods | ts.casefold() |
2+
| test.py:28 | ok | str_methods | ts.format_map(..) |
3+
| test.py:29 | fail | str_methods | "{unsafe}".format_map(..) |
4+
| test.py:40 | fail | binary_decode_encode | base64.a85encode(..) |
5+
| test.py:41 | fail | binary_decode_encode | base64.a85decode(..) |
6+
| test.py:44 | fail | binary_decode_encode | base64.b85encode(..) |
7+
| test.py:45 | fail | binary_decode_encode | base64.b85decode(..) |
8+
| test.py:48 | fail | binary_decode_encode | base64.encodebytes(..) |
9+
| test.py:49 | fail | binary_decode_encode | base64.decodebytes(..) |
10+
| test.py:57 | ok | f_strings | Fstring |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
import experimental.dataflow.tainttracking.TestTaintLib
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
semmle-extractor-options: --max-import-depth=1 --lang=3
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Python 3 specific taint tracking for string
2+
3+
TAINTED_STRING = "TAINTED_STRING"
4+
TAINTED_BYTES = b"TAINTED_BYTES"
5+
6+
7+
def ensure_tainted(*args):
8+
print("- ensure_tainted")
9+
for i, arg in enumerate(args):
10+
print("arg {}: {!r}".format(i, arg))
11+
12+
13+
def ensure_not_tainted(*args):
14+
print("- ensure_not_tainted")
15+
for i, arg in enumerate(args):
16+
print("arg {}: {!r}".format(i, arg))
17+
18+
19+
# Actual tests
20+
21+
def str_methods():
22+
print("\n# str_methods")
23+
ts = TAINTED_STRING
24+
tb = TAINTED_BYTES
25+
ensure_tainted(
26+
ts.casefold(),
27+
28+
ts.format_map({}),
29+
"{unsafe}".format_map({"unsafe": ts}),
30+
)
31+
32+
33+
def binary_decode_encode():
34+
print("\n#percent_fmt")
35+
tb = TAINTED_BYTES
36+
import base64
37+
38+
ensure_tainted(
39+
# New in Python 3.4
40+
base64.a85encode(tb),
41+
base64.a85decode(base64.a85encode(tb)),
42+
43+
# New in Python 3.4
44+
base64.b85encode(tb),
45+
base64.b85decode(base64.b85encode(tb)),
46+
47+
# New in Python 3.1
48+
base64.encodebytes(tb),
49+
base64.decodebytes(base64.encodebytes(tb)),
50+
)
51+
52+
53+
def f_strings():
54+
print("\n#f_strings")
55+
ts = TAINTED_STRING
56+
57+
ensure_tainted(f"foo {ts} bar")
58+
59+
60+
# Make tests runable
61+
62+
str_methods()
63+
binary_decode_encode()
64+
f_strings()
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
| test.py:32 | ok | str_operations | ts |
2+
| test.py:33 | ok | str_operations | BinaryExpr |
3+
| test.py:34 | ok | str_operations | BinaryExpr |
4+
| test.py:35 | ok | str_operations | BinaryExpr |
5+
| test.py:36 | ok | str_operations | ts[Slice] |
6+
| test.py:37 | ok | str_operations | ts[Slice] |
7+
| test.py:38 | ok | str_operations | ts[Slice] |
8+
| test.py:39 | ok | str_operations | ts[0] |
9+
| test.py:40 | ok | str_operations | str(..) |
10+
| test.py:41 | ok | str_operations | bytes(..) |
11+
| test.py:42 | ok | str_operations | unicode(..) |
12+
| test.py:51 | ok | str_methods | ts.capitalize() |
13+
| test.py:52 | ok | str_methods | ts.center(..) |
14+
| test.py:53 | ok | str_methods | ts.expandtabs() |
15+
| test.py:55 | ok | str_methods | ts.format() |
16+
| test.py:56 | ok | str_methods | "{}".format(..) |
17+
| test.py:57 | ok | str_methods | "{unsafe}".format(..) |
18+
| test.py:59 | ok | str_methods | ts.join(..) |
19+
| test.py:60 | fail | str_methods | "".join(..) |
20+
| test.py:62 | ok | str_methods | ts.ljust(..) |
21+
| test.py:63 | ok | str_methods | ts.lstrip() |
22+
| test.py:64 | ok | str_methods | ts.lower() |
23+
| test.py:66 | ok | str_methods | ts.replace(..) |
24+
| test.py:67 | ok | str_methods | "safe".replace(..) |
25+
| test.py:69 | ok | str_methods | ts.rjust(..) |
26+
| test.py:70 | ok | str_methods | ts.rstrip() |
27+
| test.py:71 | ok | str_methods | ts.strip() |
28+
| test.py:72 | ok | str_methods | ts.swapcase() |
29+
| test.py:73 | ok | str_methods | ts.title() |
30+
| test.py:74 | ok | str_methods | ts.upper() |
31+
| test.py:75 | ok | str_methods | ts.zfill(..) |
32+
| test.py:77 | ok | str_methods | ts.encode(..) |
33+
| test.py:78 | ok | str_methods | ts.encode(..).decode(..) |
34+
| test.py:80 | ok | str_methods | tb.decode(..) |
35+
| test.py:81 | ok | str_methods | tb.decode(..).encode(..) |
36+
| test.py:84 | ok | str_methods | ts.partition(..) |
37+
| test.py:85 | ok | str_methods | ts.rpartition(..) |
38+
| test.py:86 | ok | str_methods | ts.rsplit(..) |
39+
| test.py:87 | ok | str_methods | ts.split(..) |
40+
| test.py:88 | ok | str_methods | ts.splitlines() |
41+
| test.py:93 | ok | str_methods | "safe".replace(..) |
42+
| test.py:95 | fail | str_methods | ts.join(..) |
43+
| test.py:96 | fail | str_methods | ts.join(..) |
44+
| test.py:106 | fail | non_syntactic | meth() |
45+
| test.py:107 | fail | non_syntactic | _str(..) |
46+
| test.py:116 | ok | percent_fmt | BinaryExpr |
47+
| test.py:117 | ok | percent_fmt | BinaryExpr |
48+
| test.py:118 | fail | percent_fmt | BinaryExpr |
49+
| test.py:128 | fail | binary_decode_encode | base64.b64encode(..) |
50+
| test.py:129 | fail | binary_decode_encode | base64.b64decode(..) |
51+
| test.py:131 | fail | binary_decode_encode | base64.standard_b64encode(..) |
52+
| test.py:132 | fail | binary_decode_encode | base64.standard_b64decode(..) |
53+
| test.py:134 | fail | binary_decode_encode | base64.urlsafe_b64encode(..) |
54+
| test.py:135 | fail | binary_decode_encode | base64.urlsafe_b64decode(..) |
55+
| test.py:137 | fail | binary_decode_encode | base64.b32encode(..) |
56+
| test.py:138 | fail | binary_decode_encode | base64.b32decode(..) |
57+
| test.py:140 | fail | binary_decode_encode | base64.b16encode(..) |
58+
| test.py:141 | fail | binary_decode_encode | base64.b16decode(..) |
59+
| test.py:156 | fail | binary_decode_encode | base64.encodestring(..) |
60+
| test.py:157 | fail | binary_decode_encode | base64.decodestring(..) |
61+
| test.py:162 | fail | binary_decode_encode | quopri.encodestring(..) |
62+
| test.py:163 | fail | binary_decode_encode | quopri.decodestring(..) |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
import experimental.dataflow.tainttracking.TestTaintLib

0 commit comments

Comments
 (0)