Skip to content

Commit d694777

Browse files
authored
Merge pull request #4369 from RasmusWL/python-ospathjoin-taintstep
Python: Add taint-step for os.path.join
2 parents b1c826e + 1595fed commit d694777

File tree

7 files changed

+142
-6
lines changed

7 files changed

+142
-6
lines changed

python/ql/src/experimental/dataflow/internal/TaintTrackingPrivate.qll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
101101
nodeFrom.getNode() = object and
102102
method_name in ["partition", "rpartition", "rsplit", "split", "splitlines"]
103103
or
104-
// List[str] -> str
104+
// Iterable[str] -> str
105105
// TODO: check if these should be handled differently in regards to content
106106
method_name = "join" and
107107
nodeFrom.getNode() = call.getArg(0)
@@ -130,7 +130,6 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
130130
// f-strings
131131
nodeTo.asExpr().(Fstring).getAValue() = nodeFrom.asExpr()
132132
// TODO: Handle encode/decode from base64/quopri
133-
// TODO: Handle os.path.join
134133
// TODO: Handle functions in https://docs.python.org/3/library/binascii.html
135134
}
136135

python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@
55

66
private import python
77
private import experimental.dataflow.DataFlow
8+
private import experimental.dataflow.TaintTracking
89
private import experimental.dataflow.RemoteFlowSources
910
private import experimental.semmle.python.Concepts
1011

12+
/** Provides models for the Python standard library. */
1113
private module Stdlib {
1214
/** Gets a reference to the `os` module. */
1315
DataFlow::Node os(DataFlow::TypeTracker t) {
@@ -20,6 +22,7 @@ private module Stdlib {
2022
/** Gets a reference to the `os` module. */
2123
DataFlow::Node os() { result = os(DataFlow::TypeTracker::end()) }
2224

25+
/** Provides models for the `os` module. */
2326
module os {
2427
/** Gets a reference to the `os.system` function. */
2528
DataFlow::Node system(DataFlow::TypeTracker t) {
@@ -48,6 +51,41 @@ private module Stdlib {
4851

4952
/** Gets a reference to the `os.popen` function. */
5053
DataFlow::Node popen() { result = os::popen(DataFlow::TypeTracker::end()) }
54+
55+
/** Gets a reference to the `os.path` module. */
56+
private DataFlow::Node path(DataFlow::TypeTracker t) {
57+
t.start() and
58+
(
59+
result = DataFlow::importMember("os", "path")
60+
or
61+
result = DataFlow::importModule("os.path")
62+
)
63+
or
64+
t.startInAttr("path") and
65+
result = os()
66+
or
67+
exists(DataFlow::TypeTracker t2 | result = path(t2).track(t2, t))
68+
}
69+
70+
/** Gets a reference to the `os.path` module. */
71+
DataFlow::Node path() { result = path(DataFlow::TypeTracker::end()) }
72+
73+
/** Provides models for the `os.path` module */
74+
module path {
75+
/** Gets a reference to the `os.path.join` function. */
76+
private DataFlow::Node join(DataFlow::TypeTracker t) {
77+
t.start() and
78+
result = DataFlow::importMember("os.path", "join")
79+
or
80+
t.startInAttr("join") and
81+
result = os::path()
82+
or
83+
exists(DataFlow::TypeTracker t2 | result = join(t2).track(t2, t))
84+
}
85+
86+
/** Gets a reference to the `os.join` module. */
87+
DataFlow::Node join() { result = join(DataFlow::TypeTracker::end()) }
88+
}
5189
}
5290

5391
/**
@@ -73,4 +111,16 @@ private module Stdlib {
73111
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
74112
}
75113
}
114+
115+
/** An additional taint step for calls to `os.path.join` */
116+
private class OsPathJoinCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
117+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
118+
exists(CallNode call |
119+
nodeTo.asCfgNode() = call and
120+
call.getFunction() = os::path::join().asCfgNode() and
121+
call.getAnArg() = nodeFrom.asCfgNode()
122+
)
123+
// TODO: Handle pathlib (like we do for os.path.join)
124+
}
125+
}
76126
}

python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/TestTaint.expected

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,18 @@
11
| test_collections.py:16 | ok | test_access | tainted_list.copy() |
22
| test_collections.py:24 | ok | list_clear | tainted_list |
33
| test_collections.py:27 | fail | list_clear | tainted_list |
4+
| test_pathlib.py:26 | fail | test_basic | tainted_path |
5+
| test_pathlib.py:28 | fail | test_basic | tainted_pure_path |
6+
| test_pathlib.py:29 | fail | test_basic | tainted_pure_posix_path |
7+
| test_pathlib.py:30 | fail | test_basic | tainted_pure_windows_path |
8+
| test_pathlib.py:32 | fail | test_basic | BinaryExpr |
9+
| test_pathlib.py:33 | fail | test_basic | BinaryExpr |
10+
| test_pathlib.py:35 | fail | test_basic | tainted_path.joinpath(..) |
11+
| test_pathlib.py:36 | fail | test_basic | pathlib.Path(..).joinpath(..) |
12+
| test_pathlib.py:37 | fail | test_basic | pathlib.Path(..).joinpath(..) |
13+
| test_pathlib.py:39 | fail | test_basic | str(..) |
14+
| test_pathlib.py:49 | fail | test_basic | tainted_posix_path |
15+
| test_pathlib.py:55 | fail | test_basic | tainted_windows_path |
416
| test_string.py:17 | ok | str_methods | ts.casefold() |
517
| test_string.py:19 | ok | str_methods | ts.format_map(..) |
618
| test_string.py:20 | ok | str_methods | "{unsafe}".format_map(..) |
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Add taintlib to PATH so it can be imported during runtime without any hassle
2+
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
3+
from taintlib import *
4+
5+
# This has no runtime impact, but allows autocomplete to work
6+
from typing import Iterable, TYPE_CHECKING
7+
if TYPE_CHECKING:
8+
from ..taintlib import *
9+
10+
# Actual tests
11+
12+
import pathlib
13+
# pathlib was added in 3.4
14+
15+
def test_basic():
16+
print("\n# test_basic")
17+
ts = TAINTED_STRING
18+
19+
tainted_path = pathlib.Path(ts)
20+
21+
tainted_pure_path = pathlib.PurePath(ts)
22+
tainted_pure_posix_path = pathlib.PurePosixPath(ts)
23+
tainted_pure_windows_path = pathlib.PureWindowsPath(ts)
24+
25+
ensure_tainted(
26+
tainted_path,
27+
28+
tainted_pure_path,
29+
tainted_pure_posix_path,
30+
tainted_pure_windows_path,
31+
32+
pathlib.Path("foo") / ts,
33+
ts / pathlib.Path("foo"),
34+
35+
tainted_path.joinpath("foo", "bar"),
36+
pathlib.Path("foo").joinpath(tainted_path, "bar"),
37+
pathlib.Path("foo").joinpath("bar", tainted_path),
38+
39+
str(tainted_path),
40+
41+
# TODO: Tainted methods and attributes
42+
# https://docs.python.org/3.8/library/pathlib.html#methods-and-properties
43+
)
44+
45+
if os.name == "posix":
46+
tainted_posix_path = pathlib.PosixPath(ts)
47+
48+
ensure_tainted(
49+
tainted_posix_path,
50+
)
51+
52+
if os.name == "nt":
53+
tainted_windows_path = pathlib.WindowsPath(ts)
54+
ensure_tainted(
55+
tainted_windows_path,
56+
)
57+
58+
# Make tests runable
59+
60+
test_basic()

python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/test_string.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def str_methods():
2222

2323

2424
def binary_decode_encode():
25-
print("\n#percent_fmt")
25+
print("\n# binary_decode_encode")
2626
tb = TAINTED_BYTES
2727
import base64
2828

@@ -42,7 +42,7 @@ def binary_decode_encode():
4242

4343

4444
def f_strings():
45-
print("\n#f_strings")
45+
print("\n# f_strings")
4646
ts = TAINTED_STRING
4747

4848
ensure_tainted(f"foo {ts} bar")

python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/TestTaint.expected

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@
137137
| test_string.py:143 | fail | binary_decode_encode | base64.decodestring(..) |
138138
| test_string.py:148 | fail | binary_decode_encode | quopri.encodestring(..) |
139139
| test_string.py:149 | fail | binary_decode_encode | quopri.decodestring(..) |
140+
| test_string.py:158 | ok | test_os_path_join | os.path.join(..) |
141+
| test_string.py:159 | ok | test_os_path_join | os.path.join(..) |
142+
| test_string.py:160 | ok | test_os_path_join | os.path.join(..) |
140143
| test_unpacking.py:16 | ok | unpacking | a |
141144
| test_unpacking.py:16 | ok | unpacking | b |
142145
| test_unpacking.py:16 | ok | unpacking | c |

python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def non_syntactic():
107107

108108

109109
def percent_fmt():
110-
print("\n#percent_fmt")
110+
print("\n# percent_fmt")
111111
ts = TAINTED_STRING
112112
tainted_fmt = ts + " %s %s"
113113
ensure_tainted(
@@ -118,7 +118,7 @@ def percent_fmt():
118118

119119

120120
def binary_decode_encode():
121-
print("\n#percent_fmt")
121+
print("\n# binary_decode_encode")
122122
tb = TAINTED_BYTES
123123
import base64
124124

@@ -150,10 +150,22 @@ def binary_decode_encode():
150150
)
151151

152152

153+
def test_os_path_join():
154+
import os
155+
print("\n# test_os_path_join")
156+
ts = TAINTED_STRING
157+
ensure_tainted(
158+
os.path.join(ts, "foo", "bar"),
159+
os.path.join(ts),
160+
os.path.join("foo", "bar", ts),
161+
)
162+
163+
153164
# Make tests runable
154165

155166
str_operations()
156167
str_methods()
157168
non_syntactic()
158169
percent_fmt()
159170
binary_decode_encode()
171+
test_os_path_join()

0 commit comments

Comments
 (0)