Skip to content

Commit 4deb43f

Browse files
authored
Merge pull request #4323 from RasmusWL/python-new-command-injection-query
Approved by tausbn
2 parents 7b1dbb4 + 66815c9 commit 4deb43f

File tree

34 files changed

+1201
-5
lines changed

34 files changed

+1201
-5
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/**
2+
* @name Uncontrolled command line
3+
* @description Using externally controlled strings in a command line may allow a malicious
4+
* user to change the meaning of the command.
5+
* @kind path-problem
6+
* @problem.severity error
7+
* @sub-severity high
8+
* @precision high
9+
* @id py/command-line-injection
10+
* @tags correctness
11+
* security
12+
* external/owasp/owasp-a1
13+
* external/cwe/cwe-078
14+
* external/cwe/cwe-088
15+
*/
16+
17+
import python
18+
import experimental.dataflow.DataFlow
19+
import experimental.dataflow.TaintTracking
20+
import experimental.semmle.python.Concepts
21+
import experimental.dataflow.RemoteFlowSources
22+
import DataFlow::PathGraph
23+
24+
class CommandInjectionConfiguration extends TaintTracking::Configuration {
25+
CommandInjectionConfiguration() { this = "CommandInjectionConfiguration" }
26+
27+
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
28+
29+
override predicate isSink(DataFlow::Node sink) {
30+
sink = any(SystemCommandExecution e).getCommand()
31+
}
32+
}
33+
34+
from CommandInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
35+
where config.hasFlowPath(source, sink)
36+
select sink.getNode(), source, sink, "This command depends on $@.", source.getNode(),
37+
"a user-provided value"

python/ql/src/experimental/dataflow/TypeTracker.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalAttributeN
154154
* t.start() and
155155
* result = < source of myType >
156156
* or
157-
* exists (TypeTracker t2 |
157+
* exists (DataFlow::TypeTracker t2 |
158158
* result = myType(t2).track(t2, t)
159159
* )
160160
* }

python/ql/src/experimental/dataflow/internal/DataFlowPrivate.qll

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,11 @@ predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() }
411411
/**
412412
* Gets the type of `node`.
413413
*/
414-
DataFlowType getNodeType(Node node) { result = TAnyFlow() }
414+
DataFlowType getNodeType(Node node) {
415+
result = TAnyFlow() and
416+
// Suppress unused variable warning
417+
node = node
418+
}
415419

416420
/** Gets a string representation of a type returned by `getErasedRepr`. */
417421
string ppReprType(DataFlowType t) { none() }
@@ -458,7 +462,9 @@ predicate listStoreStep(CfgNode nodeFrom, ListElementContent c, CfgNode nodeTo)
458462
// nodeFrom is `42`, cfg node
459463
// nodeTo is the list, `[..., 42, ...]`, cfg node
460464
// c denotes element of list
461-
nodeTo.getNode().(ListNode).getAnElement() = nodeFrom.getNode()
465+
nodeTo.getNode().(ListNode).getAnElement() = nodeFrom.getNode() and
466+
// Suppress unused variable warning
467+
c = c
462468
}
463469

464470
/** Data flows from an element of a set to the set. */
@@ -468,7 +474,9 @@ predicate setStoreStep(CfgNode nodeFrom, ListElementContent c, CfgNode nodeTo) {
468474
// nodeFrom is `42`, cfg node
469475
// nodeTo is the set, `{..., 42, ...}`, cfg node
470476
// c denotes element of list
471-
nodeTo.getNode().(SetNode).getAnElement() = nodeFrom.getNode()
477+
nodeTo.getNode().(SetNode).getAnElement() = nodeFrom.getNode() and
478+
// Suppress unused variable warning
479+
c = c
472480
}
473481

474482
/** Data flows from an element of a tuple to the tuple at a specific index. */

python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,52 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFr
1616
* (intra-procedural) steps.
1717
*/
1818
predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
19+
20+
/**
21+
* Gets an EssaNode that holds the module imported by `name`.
22+
* Note that for the statement `import pkg.mod`, the new variable introduced is `pkg` that is a
23+
* reference to the module `pkg`.
24+
*
25+
* This predicate handles (with optional `... as <new-name>`):
26+
* 1. `import <name>`
27+
* 2. `from <package> import <module>` when `<name> = <package> + "." + <module>`
28+
* 3. `from <module> import <member>` when `<name> = <module> + "." + <member>`
29+
*
30+
* Note:
31+
* While it is technically possible that `import mypkg.foo` and `from mypkg import foo` can give different values,
32+
* it's highly unlikely that this will be a problem in production level code.
33+
* Example: If `mypkg/__init__.py` contains `foo = 42`, then `from mypkg import foo` will not import the module
34+
* `mypkg/foo.py` but the variable `foo` containing `42` -- however, `import mypkg.foo` will always cause `mypkg.foo`
35+
* to refer to the module.
36+
*
37+
* Also see `DataFlow::importMember`
38+
*/
39+
EssaNode importModule(string name) {
40+
exists(Variable var, Import imp, Alias alias |
41+
alias = imp.getAName() and
42+
alias.getAsname() = var.getAStore() and
43+
(
44+
name = alias.getValue().(ImportMember).getImportedModuleName()
45+
or
46+
name = alias.getValue().(ImportExpr).getImportedModuleName()
47+
) and
48+
result.getVar().(AssignmentDefinition).getSourceVariable() = var
49+
)
50+
}
51+
52+
/**
53+
* Gets a EssaNode that holds the value imported by using fully qualified name in
54+
*`from <moduleName> import <memberName>`.
55+
*
56+
* Also see `DataFlow::importModule`.
57+
*/
58+
EssaNode importMember(string moduleName, string memberName) {
59+
exists(Variable var, Import imp, Alias alias, ImportMember member |
60+
alias = imp.getAName() and
61+
member = alias.getValue() and
62+
moduleName = member.getModule().(ImportExpr).getImportedModuleName() and
63+
memberName = member.getName() and
64+
alias.getAsname() = var.getAStore() and
65+
result.getVar().(AssignmentDefinition).getSourceVariable() = var
66+
)
67+
}

python/ql/src/experimental/semmle/python/frameworks/Flask.qll

Lines changed: 136 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,141 @@
55
private import python
66
private import experimental.dataflow.DataFlow
77
private import experimental.dataflow.RemoteFlowSources
8+
private import experimental.dataflow.TaintTracking
89
private import experimental.semmle.python.Concepts
10+
private import experimental.semmle.python.frameworks.Werkzeug
911

10-
private module Flask { }
12+
// for old improved impl see
13+
// https://github.com/github/codeql/blob/9f95212e103c68d0c1dfa4b6f30fb5d53954ccef/python/ql/src/semmle/python/web/flask/Request.qll
14+
private module Flask {
15+
/** Gets a reference to the `flask` module. */
16+
DataFlow::Node flask(DataFlow::TypeTracker t) {
17+
t.start() and
18+
result = DataFlow::importModule("flask")
19+
or
20+
exists(DataFlow::TypeTracker t2 | result = flask(t2).track(t2, t))
21+
}
22+
23+
/** Gets a reference to the `flask` module. */
24+
DataFlow::Node flask() { result = flask(DataFlow::TypeTracker::end()) }
25+
26+
module flask {
27+
/** Gets a reference to the `flask.request` object. */
28+
DataFlow::Node request(DataFlow::TypeTracker t) {
29+
t.start() and
30+
result = DataFlow::importMember("flask", "request")
31+
or
32+
t.startInAttr("request") and
33+
result = flask()
34+
or
35+
exists(DataFlow::TypeTracker t2 | result = flask::request(t2).track(t2, t))
36+
}
37+
38+
/** Gets a reference to the `flask.request` object. */
39+
DataFlow::Node request() { result = flask::request(DataFlow::TypeTracker::end()) }
40+
}
41+
42+
// TODO: Do we even need this class? :|
43+
/**
44+
* A source of remote flow from a flask request.
45+
*
46+
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Request
47+
*/
48+
private class RequestSource extends RemoteFlowSource::Range {
49+
RequestSource() { this = flask::request() }
50+
51+
override string getSourceType() { result = "flask.request" }
52+
}
53+
54+
private module FlaskRequestTracking {
55+
private DataFlow::Node tainted_methods(string attr_name, DataFlow::TypeTracker t) {
56+
attr_name in ["get_data", "get_json"] and
57+
t.startInAttr(attr_name) and
58+
result = flask::request()
59+
or
60+
exists(DataFlow::TypeTracker t2 | result = tainted_methods(attr_name, t2).track(t2, t))
61+
}
62+
63+
DataFlow::Node tainted_methods(string attr_name) {
64+
result = tainted_methods(attr_name, DataFlow::TypeTracker::end())
65+
}
66+
}
67+
68+
/**
69+
* A source of remote flow from attributes from a flask request.
70+
*
71+
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Request
72+
*/
73+
private class RequestInputAccess extends RemoteFlowSource::Range {
74+
string attr_name;
75+
76+
RequestInputAccess() {
77+
// attributes
78+
exists(AttrNode attr |
79+
this.asCfgNode() = attr and attr.getObject(attr_name) = flask::request().asCfgNode()
80+
|
81+
attr_name in ["path",
82+
// str
83+
"full_path", "base_url", "url", "access_control_request_method", "content_encoding",
84+
"content_md5", "content_type", "data", "method", "mimetype", "origin", "query_string",
85+
"referrer", "remote_addr", "remote_user", "user_agent",
86+
// dict
87+
"environ", "cookies", "mimetype_params", "view_args",
88+
// json
89+
"json",
90+
// List[str]
91+
"access_route",
92+
// file-like
93+
"stream", "input_stream",
94+
// MultiDict[str, str]
95+
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
96+
"args", "values", "form",
97+
// MultiDict[str, FileStorage]
98+
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
99+
// TODO: FileStorage needs extra taint steps
100+
"files",
101+
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.HeaderSet
102+
"access_control_request_headers", "pragma",
103+
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Accept
104+
// TODO: Kinda badly modeled for now -- has type List[Tuple[value, quality]], and some extra methods
105+
"accept_charsets", "accept_encodings", "accept_languages", "accept_mimetypes",
106+
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Authorization
107+
// TODO: dict subclass with extra attributes like `username` and `password`
108+
"authorization",
109+
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.RequestCacheControl
110+
// TODO: has attributes like `no_cache`, and `to_header` method (actually, many of these models do)
111+
"cache_control",
112+
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers
113+
// TODO: dict-like with wsgiref.headers.Header compatibility methods
114+
"headers"]
115+
)
116+
or
117+
// methods (needs special handling to track bound-methods -- see `FlaskRequestMethodCallsAdditionalTaintStep` below)
118+
this = FlaskRequestTracking::tainted_methods(attr_name)
119+
}
120+
121+
override string getSourceType() { result = "flask.request input" }
122+
}
123+
124+
private class FlaskRequestMethodCallsAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
125+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
126+
// NOTE: `request -> request.tainted_method` part is handled as part of RequestInputAccess
127+
// tainted_method -> tainted_method()
128+
nodeFrom = FlaskRequestTracking::tainted_methods(_) and
129+
nodeTo.asCfgNode().(CallNode).getFunction() = nodeFrom.asCfgNode()
130+
}
131+
}
132+
133+
private class RequestInputMultiDict extends RequestInputAccess,
134+
Werkzeug::Datastructures::MultiDict {
135+
RequestInputMultiDict() { attr_name in ["args", "values", "form", "files"] }
136+
}
137+
138+
private class RequestInputFiles extends RequestInputMultiDict {
139+
RequestInputFiles() { attr_name = "files" }
140+
}
141+
// TODO: Somehow specify that elements of `RequestInputFiles` are
142+
// Werkzeug::Datastructures::FileStorage and should have those additional taint steps
143+
// AND that the 0-indexed argument to its' save method is a sink for path-injection.
144+
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage.save
145+
}

python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,70 @@ private import python
77
private import experimental.dataflow.DataFlow
88
private import experimental.dataflow.RemoteFlowSources
99
private import experimental.semmle.python.Concepts
10+
11+
private module Stdlib {
12+
/** Gets a reference to the `os` module. */
13+
DataFlow::Node os(DataFlow::TypeTracker t) {
14+
t.start() and
15+
result = DataFlow::importModule("os")
16+
or
17+
exists(DataFlow::TypeTracker t2 | result = os(t2).track(t2, t))
18+
}
19+
20+
/** Gets a reference to the `os` module. */
21+
DataFlow::Node os() { result = os(DataFlow::TypeTracker::end()) }
22+
23+
module os {
24+
/** Gets a reference to the `os.system` function. */
25+
DataFlow::Node system(DataFlow::TypeTracker t) {
26+
t.start() and
27+
result = DataFlow::importMember("os", "system")
28+
or
29+
t.startInAttr("system") and
30+
result = os()
31+
or
32+
exists(DataFlow::TypeTracker t2 | result = os::system(t2).track(t2, t))
33+
}
34+
35+
/** Gets a reference to the `os.system` function. */
36+
DataFlow::Node system() { result = os::system(DataFlow::TypeTracker::end()) }
37+
38+
/** Gets a reference to the `os.popen` function. */
39+
DataFlow::Node popen(DataFlow::TypeTracker t) {
40+
t.start() and
41+
result = DataFlow::importMember("os", "popen")
42+
or
43+
t.startInAttr("popen") and
44+
result = os()
45+
or
46+
exists(DataFlow::TypeTracker t2 | result = os::popen(t2).track(t2, t))
47+
}
48+
49+
/** Gets a reference to the `os.popen` function. */
50+
DataFlow::Node popen() { result = os::popen(DataFlow::TypeTracker::end()) }
51+
}
52+
53+
/**
54+
* A call to `os.system`.
55+
* See https://docs.python.org/3/library/os.html#os.system
56+
*/
57+
private class OsSystemCall extends SystemCommandExecution::Range {
58+
OsSystemCall() { this.asCfgNode().(CallNode).getFunction() = os::system().asCfgNode() }
59+
60+
override DataFlow::Node getCommand() {
61+
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
62+
}
63+
}
64+
65+
/**
66+
* A call to `os.popen`
67+
* See https://docs.python.org/3/library/os.html#os.popen
68+
*/
69+
private class OsPopenCall extends SystemCommandExecution::Range {
70+
OsPopenCall() { this.asCfgNode().(CallNode).getFunction() = os::popen().asCfgNode() }
71+
72+
override DataFlow::Node getCommand() {
73+
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
74+
}
75+
}
76+
}

0 commit comments

Comments
 (0)