Skip to content

Commit 39acc9a

Browse files
authored
Merge pull request #4735 from RasmusWL/python-untrusted-flow
Python: Untrusted data used in external APIs
2 parents 9dd6439 + a08e1db commit 39acc9a

16 files changed

+446
-18
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from flask import Flask, request, make_response
2+
app = Flask(__name__)
3+
4+
@app.route("/xss")
5+
def xss():
6+
username = request.args.get("username")
7+
return make_response("Hello {}".format(username))
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import base64
2+
import pickle
3+
4+
from flask import Flask, request, make_response
5+
app = Flask(__name__)
6+
7+
@app.route("/example")
8+
def profile():
9+
raw_data = request.args.get("data").encode('utf-8')
10+
data = base64.decodebytes(raw_data)
11+
obj = pickle.loads(data)
12+
...
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
/**
2+
* Definitions for reasoning about untrusted data used in APIs defined outside the
3+
* database.
4+
*/
5+
6+
import python
7+
import semmle.python.dataflow.new.DataFlow
8+
import semmle.python.dataflow.new.TaintTracking
9+
import semmle.python.Concepts
10+
import semmle.python.dataflow.new.RemoteFlowSources
11+
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
12+
private import semmle.python.dataflow.new.internal.TaintTrackingPrivate as TaintTrackingPrivate
13+
private import semmle.python.types.Builtins
14+
private import semmle.python.objects.ObjectInternal
15+
16+
// IMPLEMENTATION NOTES:
17+
//
18+
// This query uses *both* the new data-flow library, and points-to. Why? To get this
19+
// finished quickly, so it can provide value for our field team and ourselves.
20+
//
21+
// In the long run, it should not need to use points-to for anything. Possibly this can
22+
// even be helpful in figuring out what we need from TypeTrackers and the new data-flow
23+
// library to be fully operational.
24+
//
25+
// At least it will allow us to provide a baseline comparison against a solution that
26+
// doesn't use points-to at all
27+
//
28+
// There is a few dirty things we do here:
29+
// 1. DataFlowPrivate: since `DataFlowCall` and `DataFlowCallable` are not exposed
30+
// publicly, but we really want access to them.
31+
// 2. points-to: we kinda need to do this since this is what powers `DataFlowCall` and
32+
// `DataFlowCallable`
33+
// 3. ObjectInternal: to provide better names for built-in functions and methods. If we
34+
// really wanted to polish our points-to implementation, we could move this
35+
// functionality into `BuiltinFunctionValue` and `BuiltinMethodValue`, but will
36+
// probably require some more work: for this query, it's totally ok to use
37+
// `builtins.open` for the code `open(f)`, but well, it requires a bit of thinking to
38+
// figure out if that is desireable in general. I simply skipped a corner here!
39+
// 4. TaintTrackingPrivate: Nothing else gives us access to `defaultAdditionalTaintStep` :(
40+
/**
41+
* A callable that is considered a "safe" external API from a security perspective.
42+
*/
43+
class SafeExternalAPI extends Unit {
44+
abstract DataFlowPrivate::DataFlowCallable getSafeCallable();
45+
}
46+
47+
/** The default set of "safe" external APIs. */
48+
private class DefaultSafeExternalAPI extends SafeExternalAPI {
49+
override DataFlowPrivate::DataFlowCallable getSafeCallable() {
50+
exists(CallableValue cv | cv = result.getCallableValue() |
51+
cv = Value::named(["len", "isinstance", "getattr", "hasattr"])
52+
or
53+
exists(ClassValue cls, string attr |
54+
cls = Value::named("dict") and attr in ["__getitem__", "__setitem__"]
55+
|
56+
cls.lookup(attr) = cv
57+
)
58+
)
59+
}
60+
}
61+
62+
/** A node representing data being passed to an external API through a call. */
63+
class ExternalAPIDataNode extends DataFlow::Node {
64+
DataFlowPrivate::DataFlowCall call;
65+
DataFlowPrivate::DataFlowCallable callable;
66+
int i;
67+
68+
ExternalAPIDataNode() {
69+
exists(call.getLocation().getFile().getRelativePath()) and
70+
callable = call.getCallable() and
71+
not any(SafeExternalAPI safe).getSafeCallable() = callable and
72+
exists(Value cv | cv = callable.getCallableValue() |
73+
cv.isAbsent()
74+
or
75+
cv.isBuiltin()
76+
or
77+
cv.(CallableValue).getScope().getLocation().getFile().inStdlib()
78+
or
79+
not exists(cv.(CallableValue).getScope().getLocation().getFile().getRelativePath())
80+
) and
81+
// TODO: this ignores some complexity of keyword arguments (especially keyword-only args)
82+
this = call.getArg(i) and
83+
// Not already modeled as a taint step
84+
not exists(DataFlow::Node next | TaintTrackingPrivate::defaultAdditionalTaintStep(this, next)) and
85+
// for `list.append(x)`, we have a additional taint step from x -> [post] list.
86+
// Since we have modeled this explicitly, I don't see any cases where we would want to report this.
87+
not exists(DataFlow::Node prev, DataFlow::PostUpdateNode post |
88+
post.getPreUpdateNode() = this and
89+
TaintTrackingPrivate::defaultAdditionalTaintStep(prev, post)
90+
)
91+
}
92+
93+
/** Gets the index for the parameter that will receive this untrusted data */
94+
int getIndex() { result = i }
95+
96+
/** Gets the callable to which this argument is passed. */
97+
DataFlowPrivate::DataFlowCallable getCallable() { result = callable }
98+
}
99+
100+
/** A configuration for tracking flow from `RemoteFlowSource`s to `ExternalAPIDataNode`s. */
101+
class UntrustedDataToExternalAPIConfig extends TaintTracking::Configuration {
102+
UntrustedDataToExternalAPIConfig() { this = "UntrustedDataToExternalAPIConfig" }
103+
104+
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
105+
106+
override predicate isSink(DataFlow::Node sink) { sink instanceof ExternalAPIDataNode }
107+
}
108+
109+
/** A node representing untrusted data being passed to an external API. */
110+
class UntrustedExternalAPIDataNode extends ExternalAPIDataNode {
111+
UntrustedExternalAPIDataNode() { any(UntrustedDataToExternalAPIConfig c).hasFlow(_, this) }
112+
113+
/** Gets a source of untrusted data which is passed to this external API data node. */
114+
DataFlow::Node getAnUntrustedSource() {
115+
any(UntrustedDataToExternalAPIConfig c).hasFlow(result, this)
116+
}
117+
}
118+
119+
private newtype TExternalAPI =
120+
TExternalAPIParameter(DataFlowPrivate::DataFlowCallable callable, int index) {
121+
exists(UntrustedExternalAPIDataNode n |
122+
callable = n.getCallable() and
123+
index = n.getIndex()
124+
)
125+
}
126+
127+
/** An external API which is used with untrusted data. */
128+
class ExternalAPIUsedWithUntrustedData extends TExternalAPI {
129+
/** Gets a possibly untrusted use of this external API. */
130+
UntrustedExternalAPIDataNode getUntrustedDataNode() {
131+
this = TExternalAPIParameter(result.getCallable(), result.getIndex())
132+
}
133+
134+
/** Gets the number of untrusted sources used with this external API. */
135+
int getNumberOfUntrustedSources() {
136+
result = count(getUntrustedDataNode().getAnUntrustedSource())
137+
}
138+
139+
/** Gets a textual representation of this element. */
140+
string toString() {
141+
exists(
142+
DataFlowPrivate::DataFlowCallable callable, int index, string callableString,
143+
string indexString
144+
|
145+
this = TExternalAPIParameter(callable, index) and
146+
indexString = "param " + index and
147+
exists(CallableValue cv | cv = callable.getCallableValue() |
148+
callableString =
149+
cv.getScope().getEnclosingModule().getName() + "." + cv.getScope().getQualifiedName()
150+
or
151+
not exists(cv.getScope()) and
152+
(
153+
cv instanceof BuiltinFunctionValue and
154+
callableString = pretty_builtin_function_value(cv)
155+
or
156+
cv instanceof BuiltinMethodValue and
157+
callableString = pretty_builtin_method_value(cv)
158+
or
159+
not cv instanceof BuiltinFunctionValue and
160+
not cv instanceof BuiltinMethodValue and
161+
callableString = cv.toString()
162+
)
163+
) and
164+
result = callableString + " [" + indexString + "]"
165+
)
166+
}
167+
}
168+
169+
/** Gets the fully qualified name for the `BuiltinFunctionValue` bfv. */
170+
private string pretty_builtin_function_value(BuiltinFunctionValue bfv) {
171+
exists(Builtin b | b = bfv.(BuiltinFunctionObjectInternal).getBuiltin() |
172+
result = prefix_with_module_if_found(b)
173+
)
174+
}
175+
176+
/** Gets the fully qualified name for the `BuiltinMethodValue` bmv. */
177+
private string pretty_builtin_method_value(BuiltinMethodValue bmv) {
178+
exists(Builtin b | b = bmv.(BuiltinMethodObjectInternal).getBuiltin() |
179+
exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b |
180+
result = prefix_with_module_if_found(cls) + "." + b.getName()
181+
)
182+
or
183+
not exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b) and
184+
result = b.getName()
185+
)
186+
}
187+
188+
/** Helper predicate that tries to adds module qualifier to `b`. Will succeed even if module not found. */
189+
private string prefix_with_module_if_found(Builtin b) {
190+
exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b |
191+
result = mod.getName() + "." + b.getName()
192+
)
193+
or
194+
not exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b) and
195+
result = b.getName()
196+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
<overview>
6+
<p>Using unsanitized untrusted data in an external API can cause a variety of security
7+
issues. This query reports external APIs that are used with untrusted data, along
8+
with how frequently the API is used, and how many unique sources of untrusted data flow
9+
to this API. This query is designed primarily to help identify which APIs may be
10+
relevant for security analysis of this application.</p>
11+
12+
<p>An external API is defined as a call to a method that is not defined in the source
13+
code, and is not modeled as a taint step in the default taint library. External APIs may
14+
be from the Python standard library or dependencies. The query will report the fully qualified name,
15+
along with <code>[param x]</code>, where <code>x</code> indicates the position of
16+
the parameter receiving the untrusted data. Note that for methods and
17+
<code>classmethod</code>s, parameter 0 represents the class instance or class itself
18+
respectively.</p>
19+
20+
<p>Note that an excepted sink might not be included in the results, if it also defines a
21+
taint step. This is the case for <code>pickle.loads</code> which is a sink for the
22+
Unsafe Deserialization query, but is also a taint step for other queries.</p>
23+
24+
<p>Note: Compared to the Java version of this query, we currently do not give special
25+
care to methods that are overridden in the source code.</p>
26+
27+
<p>Note: Currently this query will only report results for external packages that are extracted.</p>
28+
29+
</overview>
30+
<recommendation>
31+
32+
<p>For each result:</p>
33+
34+
<ul>
35+
<li>If the result highlights a known sink, no action is required.</li>
36+
<li>If the result highlights an unknown sink for a problem, then add modeling for the sink to the relevant query.</li>
37+
<li>If the result represents a call to an external API which transfers taint, add the appropriate modeling, and
38+
re-run the query to determine what new results have appeared due to this additional modeling.</li>
39+
</ul>
40+
41+
<p>Otherwise, the result is likely uninteresting. Custom versions of this query can extend the <code>SafeExternalAPI</code>
42+
class and specify <code>getSafeCallable</code> to exclude known safe external APIs from future analysis.</p>
43+
44+
</recommendation>
45+
<example>
46+
47+
<p>If the query were to return the API <code>flask.make_response [param 0]</code>
48+
then we should first consider whether this a security relevant sink. In this case, this is making a HTTP response, so we should
49+
consider whether this is an XSS sink. If it is, we should confirm that it is handled by the XSS query.</p>
50+
51+
<p>If the query were to return the API <code>base64.decodebytes [param 0]</code>, then this should be
52+
reviewed as a possible taint step, because tainted data would flow from the 0th argument to the result of the call.</p>
53+
54+
<p>Note that both examples are correctly handled by the standard taint tracking library and XSS query.</p>
55+
</example>
56+
<references>
57+
58+
</references>
59+
</qhelp>
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
/**
2+
* @name Frequency counts for external APIs that are used with untrusted data
3+
* @description This reports the external APIs that are used with untrusted data, along with how
4+
* frequently the API is called, and how many unique sources of untrusted data flow
5+
* to it.
6+
* @id python/count-untrusted-data-external-api
7+
* @kind table
8+
* @tags security external/cwe/cwe-20
9+
*/
10+
11+
import python
12+
import ExternalAPIs
13+
14+
from ExternalAPIUsedWithUntrustedData externalAPI
15+
select externalAPI, count(externalAPI.getUntrustedDataNode()) as numberOfUses,
16+
externalAPI.getNumberOfUntrustedSources() as numberOfUntrustedSources order by
17+
numberOfUntrustedSources desc
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
<overview>
6+
<p>Using unsanitized untrusted data in an external API can cause a variety of security
7+
issues. This query reports external APIs that use untrusted data. The results are not
8+
filtered so that you can audit all examples. The query provides data for security
9+
reviews of the application and you can also use it to identify external APIs that should
10+
be modeled as either taint steps, or sinks for specific problems.</p>
11+
12+
<p>An external API is defined as a call to a method that is not defined in the source
13+
code, and is not modeled as a taint step in the default taint library. External APIs may
14+
be from the Python standard library or dependencies. The query will report the fully qualified name,
15+
along with <code>[param x]</code>, where <code>x</code> indicates the position of
16+
the parameter receiving the untrusted data. Note that for methods and
17+
<code>classmethod</code>s, parameter 0 represents the class instance or class itself
18+
respectively.</p>
19+
20+
<p>Note that an excepted sink might not be included in the results, if it also defines a
21+
taint step. This is the case for <code>pickle.loads</code> which is a sink for the
22+
Unsafe Deserialization query, but is also a taint step for other queries.</p>
23+
24+
<p>Note: Compared to the Java version of this query, we currently do not give special
25+
care to methods that are overridden in the source code.</p>
26+
27+
<p>Note: Currently this query will only report results for external packages that are extracted.</p>
28+
29+
</overview>
30+
<recommendation>
31+
32+
<p>For each result:</p>
33+
34+
<ul>
35+
<li>If the result highlights a known sink, confirm that the result is reported by the relevant query, or
36+
that the result is a false positive because this data is sanitized.</li>
37+
<li>If the result highlights an unknown sink for a problem, then add modeling for the sink to the relevant query,
38+
and confirm that the result is either found, or is safe due to appropriate sanitization.</li>
39+
<li>If the result represents a call to an external API that transfers taint, add the appropriate modeling, and
40+
re-run the query to determine what new results have appeared due to this additional modeling.</li>
41+
</ul>
42+
43+
<p>Otherwise, the result is likely uninteresting. Custom versions of this query can extend the <code>SafeExternalAPI</code>
44+
class and specify <code>getSafeCallable</code> to exclude known safe external APIs from future analysis.</p>
45+
46+
</recommendation>
47+
<example>
48+
49+
<p>In this first example, a request parameter is read from the Flask <code>request</code> and then ultimately used in a call to the
50+
<code>flask.make_response</code> external API:</p>
51+
52+
<sample src="ExternalAPISinkExample.py" />
53+
54+
<p>This is an XSS sink. The XSS query should therefore be reviewed to confirm that this sink is appropriately modeled,
55+
and if it is, to confirm that the query reports this particular result, or that the result is a false positive due to
56+
some existing sanitization.</p>
57+
58+
<p>In this second example, again a request parameter is read from the Flask <code>request</code>.</p>
59+
60+
<sample src="ExternalAPITaintStepExample.py" />
61+
62+
<p>If the query reported the call to <code>base64.decodebytes</code> on line 10, this
63+
would suggest that this external API is not currently modeled as a taint step in the
64+
taint tracking library. The next step would be to model this as a taint step, then
65+
re-run the query to determine what additional results might be found. In this example,
66+
the result of the Base64 decoding is pickled, which can result in remote code execution due
67+
to unsafe deserialization.</p>
68+
69+
<p>Note that both examples are correctly handled by the standard taint tracking library and Unsafe Deserialization query.</p>
70+
</example>
71+
<references>
72+
73+
</references>
74+
</qhelp>
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/**
2+
* @name Untrusted data passed to external API
3+
* @description Data provided remotely is used in this external API without sanitization, which could be a security risk.
4+
* @id python/untrusted-data-to-external-api
5+
* @kind path-problem
6+
* @precision low
7+
* @problem.severity error
8+
* @tags security external/cwe/cwe-20
9+
*/
10+
11+
import python
12+
import ExternalAPIs
13+
import DataFlow::PathGraph
14+
15+
from
16+
UntrustedDataToExternalAPIConfig config, DataFlow::PathNode source, DataFlow::PathNode sink,
17+
ExternalAPIUsedWithUntrustedData externalAPI
18+
where
19+
sink.getNode() = externalAPI.getUntrustedDataNode() and
20+
config.hasFlowPath(source, sink)
21+
select sink.getNode(), source, sink,
22+
"Call to " + externalAPI.toString() + " with untrusted data from $@.", source.getNode(),
23+
source.toString()

0 commit comments

Comments
 (0)