Skip to content

Commit 5142bfa

Browse files
authored
Merge pull request #4453 from yoff/python-port-unsafe-deserialization
Python: port unsafe deserialization
2 parents 58baec5 + 89f5352 commit 5142bfa

File tree

18 files changed

+449
-2
lines changed

18 files changed

+449
-2
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/**
2+
* @name Deserializing untrusted input
3+
* @description Deserializing user-controlled data may allow attackers to execute arbitrary code.
4+
* @kind path-problem
5+
* @id py/unsafe-deserialization
6+
* @problem.severity error
7+
* @sub-severity high
8+
* @precision high
9+
* @tags external/cwe/cwe-502
10+
* security
11+
* serialization
12+
*/
13+
14+
import python
15+
import experimental.dataflow.DataFlow
16+
import experimental.dataflow.TaintTracking
17+
import experimental.semmle.python.Concepts
18+
import experimental.dataflow.RemoteFlowSources
19+
import DataFlow::PathGraph
20+
21+
class UnsafeDeserializationConfiguration extends TaintTracking::Configuration {
22+
UnsafeDeserializationConfiguration() { this = "UnsafeDeserializationConfiguration" }
23+
24+
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
25+
26+
override predicate isSink(DataFlow::Node sink) {
27+
exists(Decoding d |
28+
d.mayExecuteInput() and
29+
sink = d.getAnInput()
30+
)
31+
}
32+
}
33+
34+
from UnsafeDeserializationConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
35+
where config.hasFlowPath(source, sink)
36+
select sink.getNode(), source, sink, "Deserializing of $@.", source.getNode(), "untrusted input"

python/ql/src/experimental/semmle/python/Concepts.qll

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,63 @@ module SystemCommandExecution {
4040
}
4141
}
4242

43+
/**
44+
* A data-flow node that decodes data from a binary or textual format. This
45+
* is intended to include deserialization, unmarshalling, decoding, unpickling,
46+
* decompressing, decrypting, parsing etc.
47+
*
48+
* Doing so should normally preserve taint, but it can also be a problem
49+
* in itself, e.g. if it allows code execution or could result in denial-of-service.
50+
*
51+
* Extend this class to refine existing API models. If you want to model new APIs,
52+
* extend `Decoding::Range` instead.
53+
*/
54+
class Decoding extends DataFlow::Node {
55+
Decoding::Range range;
56+
57+
Decoding() { this = range }
58+
59+
/** Holds if this call may execute code embedded in its input. */
60+
predicate mayExecuteInput() { range.mayExecuteInput() }
61+
62+
/** Gets an input that is decoded by this function. */
63+
DataFlow::Node getAnInput() { result = range.getAnInput() }
64+
65+
/** Gets the output that contains the decoded data produced by this function. */
66+
DataFlow::Node getOutput() { result = range.getOutput() }
67+
68+
/** Gets an identifier for the format this function decodes from, such as "JSON". */
69+
string getFormat() { result = range.getFormat() }
70+
}
71+
72+
/** Provides a class for modeling new decoding mechanisms. */
73+
module Decoding {
74+
/**
75+
* A data-flow node that decodes data from a binary or textual format. This
76+
* is intended to include deserialization, unmarshalling, decoding, unpickling,
77+
* decompressing, decrypting, parsing etc.
78+
*
79+
* Doing so should normally preserve taint, but it can also be a problem
80+
* in itself, e.g. if it allows code execution or could result in denial-of-service.
81+
*
82+
* Extend this class to model new APIs. If you want to refine existing API models,
83+
* extend `Decoding` instead.
84+
*/
85+
abstract class Range extends DataFlow::Node {
86+
/** Holds if this call may execute code embedded in its input. */
87+
abstract predicate mayExecuteInput();
88+
89+
/** Gets an input that is decoded by this function. */
90+
abstract DataFlow::Node getAnInput();
91+
92+
/** Gets the output that contains the decoded data produced by this function. */
93+
abstract DataFlow::Node getOutput();
94+
95+
/** Gets an identifier for the format this function decodes from, such as "JSON". */
96+
abstract string getFormat();
97+
}
98+
}
99+
43100
/**
44101
* A data-flow node that dynamically executes Python code.
45102
*

python/ql/src/experimental/semmle/python/Frameworks.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
* Helper file that imports all framework modeling.
33
*/
44

5+
private import experimental.semmle.python.frameworks.Dill
56
private import experimental.semmle.python.frameworks.Django
67
private import experimental.semmle.python.frameworks.Flask
78
private import experimental.semmle.python.frameworks.Invoke
89
private import experimental.semmle.python.frameworks.Stdlib
10+
private import experimental.semmle.python.frameworks.Yaml
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the 'dill' package.
3+
* See https://pypi.org/project/dill/.
4+
*/
5+
6+
private import python
7+
private import experimental.dataflow.DataFlow
8+
private import experimental.dataflow.RemoteFlowSources
9+
private import experimental.semmle.python.Concepts
10+
11+
private module Dill {
12+
/** Gets a reference to the `dill` module. */
13+
private DataFlow::Node dill(DataFlow::TypeTracker t) {
14+
t.start() and
15+
result = DataFlow::importNode("dill")
16+
or
17+
exists(DataFlow::TypeTracker t2 | result = dill(t2).track(t2, t))
18+
}
19+
20+
/** Gets a reference to the `dill` module. */
21+
DataFlow::Node dill() { result = dill(DataFlow::TypeTracker::end()) }
22+
23+
/** Provides models for the `dill` module. */
24+
module dill {
25+
/** Gets a reference to the `dill.loads` function. */
26+
private DataFlow::Node loads(DataFlow::TypeTracker t) {
27+
t.start() and
28+
result = DataFlow::importNode("dill.loads")
29+
or
30+
t.startInAttr("loads") and
31+
result = dill()
32+
or
33+
exists(DataFlow::TypeTracker t2 | result = loads(t2).track(t2, t))
34+
}
35+
36+
/** Gets a reference to the `dill.loads` function. */
37+
DataFlow::Node loads() { result = loads(DataFlow::TypeTracker::end()) }
38+
}
39+
}
40+
41+
/**
42+
* A call to `dill.loads`
43+
* See https://pypi.org/project/dill/ (which currently refers you
44+
* to https://docs.python.org/3/library/pickle.html#pickle.loads)
45+
*/
46+
private class DillLoadsCall extends Decoding::Range, DataFlow::CfgNode {
47+
override CallNode node;
48+
49+
DillLoadsCall() { node.getFunction() = Dill::dill::loads().asCfgNode() }
50+
51+
override predicate mayExecuteInput() { any() }
52+
53+
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
54+
55+
override DataFlow::Node getOutput() { result = this }
56+
57+
override string getFormat() { result = "dill" }
58+
}

python/ql/src/experimental/semmle/python/frameworks/Flask.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ private import experimental.semmle.python.frameworks.Werkzeug
1818
*/
1919
private module Flask {
2020
/** Gets a reference to the `flask` module. */
21-
DataFlow::Node flask(DataFlow::TypeTracker t) {
21+
private DataFlow::Node flask(DataFlow::TypeTracker t) {
2222
t.start() and
2323
result = DataFlow::importNode("flask")
2424
or
@@ -31,7 +31,7 @@ private module Flask {
3131
/** Provides models for the `flask` module. */
3232
module flask {
3333
/** Gets a reference to the `flask.request` object. */
34-
DataFlow::Node request(DataFlow::TypeTracker t) {
34+
private DataFlow::Node request(DataFlow::TypeTracker t) {
3535
t.start() and
3636
result = DataFlow::importNode("flask.request")
3737
or

python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,106 @@ private module Stdlib {
329329
}
330330
}
331331

332+
// ---------------------------------------------------------------------------
333+
// marshal
334+
// ---------------------------------------------------------------------------
335+
/** Gets a reference to the `marshal` module. */
336+
private DataFlow::Node marshal(DataFlow::TypeTracker t) {
337+
t.start() and
338+
result = DataFlow::importNode("marshal")
339+
or
340+
exists(DataFlow::TypeTracker t2 | result = marshal(t2).track(t2, t))
341+
}
342+
343+
/** Gets a reference to the `marshal` module. */
344+
DataFlow::Node marshal() { result = marshal(DataFlow::TypeTracker::end()) }
345+
346+
/** Provides models for the `marshal` module. */
347+
module marshal {
348+
/** Gets a reference to the `marshal.loads` function. */
349+
private DataFlow::Node loads(DataFlow::TypeTracker t) {
350+
t.start() and
351+
result = DataFlow::importNode("marshal.loads")
352+
or
353+
t.startInAttr("loads") and
354+
result = marshal()
355+
or
356+
exists(DataFlow::TypeTracker t2 | result = loads(t2).track(t2, t))
357+
}
358+
359+
/** Gets a reference to the `marshal.loads` function. */
360+
DataFlow::Node loads() { result = loads(DataFlow::TypeTracker::end()) }
361+
}
362+
363+
/**
364+
* A call to `marshal.loads`
365+
* See https://docs.python.org/3/library/marshal.html#marshal.loads
366+
*/
367+
private class MarshalLoadsCall extends Decoding::Range, DataFlow::CfgNode {
368+
override CallNode node;
369+
370+
MarshalLoadsCall() { node.getFunction() = marshal::loads().asCfgNode() }
371+
372+
override predicate mayExecuteInput() { any() }
373+
374+
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
375+
376+
override DataFlow::Node getOutput() { result = this }
377+
378+
override string getFormat() { result = "marshal" }
379+
}
380+
381+
// ---------------------------------------------------------------------------
382+
// pickle
383+
// ---------------------------------------------------------------------------
384+
private string pickleModuleName() { result in ["pickle", "cPickle", "_pickle"] }
385+
386+
/** Gets a reference to the `pickle` module. */
387+
private DataFlow::Node pickle(DataFlow::TypeTracker t) {
388+
t.start() and
389+
result = DataFlow::importNode(pickleModuleName())
390+
or
391+
exists(DataFlow::TypeTracker t2 | result = pickle(t2).track(t2, t))
392+
}
393+
394+
/** Gets a reference to the `pickle` module. */
395+
DataFlow::Node pickle() { result = pickle(DataFlow::TypeTracker::end()) }
396+
397+
/** Provides models for the `pickle` module. */
398+
module pickle {
399+
/** Gets a reference to the `pickle.loads` function. */
400+
private DataFlow::Node loads(DataFlow::TypeTracker t) {
401+
t.start() and
402+
result = DataFlow::importNode(pickleModuleName() + ".loads")
403+
or
404+
t.startInAttr("loads") and
405+
result = pickle()
406+
or
407+
exists(DataFlow::TypeTracker t2 | result = loads(t2).track(t2, t))
408+
}
409+
410+
/** Gets a reference to the `pickle.loads` function. */
411+
DataFlow::Node loads() { result = loads(DataFlow::TypeTracker::end()) }
412+
}
413+
414+
/**
415+
* A call to `pickle.loads`
416+
* See https://docs.python.org/3/library/pickle.html#pickle.loads
417+
*/
418+
private class PickleLoadsCall extends Decoding::Range, DataFlow::CfgNode {
419+
override CallNode node;
420+
421+
PickleLoadsCall() { node.getFunction() = pickle::loads().asCfgNode() }
422+
423+
override predicate mayExecuteInput() { any() }
424+
425+
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
426+
427+
override DataFlow::Node getOutput() { result = this }
428+
429+
override string getFormat() { result = "pickle" }
430+
}
431+
332432
// ---------------------------------------------------------------------------
333433
// popen2
334434
// ---------------------------------------------------------------------------
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the PyYAML package
3+
* https://pyyaml.org/wiki/PyYAMLDocumentation (obtained via `import yaml`).
4+
*/
5+
6+
private import python
7+
private import experimental.dataflow.DataFlow
8+
private import experimental.dataflow.RemoteFlowSources
9+
private import experimental.semmle.python.Concepts
10+
11+
private module Yaml {
12+
/** Gets a reference to the `yaml` module. */
13+
private DataFlow::Node yaml(DataFlow::TypeTracker t) {
14+
t.start() and
15+
result = DataFlow::importNode("yaml")
16+
or
17+
exists(DataFlow::TypeTracker t2 | result = yaml(t2).track(t2, t))
18+
}
19+
20+
/** Gets a reference to the `yaml` module. */
21+
DataFlow::Node yaml() { result = yaml(DataFlow::TypeTracker::end()) }
22+
23+
/** Provides models for the `yaml` module. */
24+
module yaml {
25+
/**
26+
* Gets a reference to the attribute `attr_name` of the `yaml` module.
27+
* WARNING: Only holds for a few predefined attributes.
28+
*
29+
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
30+
*/
31+
private DataFlow::Node yaml_attr(DataFlow::TypeTracker t, string attr_name) {
32+
attr_name in ["load", "SafeLoader", "BaseLoader"] and
33+
(
34+
t.start() and
35+
result = DataFlow::importNode("yaml." + attr_name)
36+
or
37+
t.startInAttr(attr_name) and
38+
result = yaml()
39+
)
40+
or
41+
// Due to bad performance when using normal setup with `yaml_attr(t2, attr_name).track(t2, t)`
42+
// we have inlined that code and forced a join
43+
exists(DataFlow::TypeTracker t2 |
44+
exists(DataFlow::StepSummary summary |
45+
yaml_attr_first_join(t2, attr_name, result, summary) and
46+
t = t2.append(summary)
47+
)
48+
)
49+
}
50+
51+
pragma[nomagic]
52+
private predicate yaml_attr_first_join(
53+
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
54+
) {
55+
DataFlow::StepSummary::step(yaml_attr(t2, attr_name), res, summary)
56+
}
57+
58+
/**
59+
* Gets a reference to the attribute `attr_name` of the `yaml` module.
60+
* WARNING: Only holds for a few predefined attributes.
61+
*
62+
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
63+
*/
64+
DataFlow::Node yaml_attr(string attr_name) {
65+
result = yaml_attr(DataFlow::TypeTracker::end(), attr_name)
66+
}
67+
}
68+
}
69+
70+
/**
71+
* A call to `yaml.load`
72+
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
73+
*/
74+
private class YamlLoadCall extends Decoding::Range, DataFlow::CfgNode {
75+
override CallNode node;
76+
77+
YamlLoadCall() { node.getFunction() = Yaml::yaml::yaml_attr("load").asCfgNode() }
78+
79+
/**
80+
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
81+
* In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0
82+
* (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389).
83+
* Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution.
84+
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
85+
*/
86+
override predicate mayExecuteInput() {
87+
// If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all,
88+
// then the default loader will be used, which is not safe.
89+
not node.getArgByName("Loader") =
90+
Yaml::yaml::yaml_attr(["SafeLoader", "BaseLoader"]).asCfgNode()
91+
}
92+
93+
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
94+
95+
override DataFlow::Node getOutput() { result = this }
96+
97+
override string getFormat() { result = "YAML" }
98+
}

python/ql/test/experimental/library-tests/frameworks/dill/ConceptsTest.expected

Whitespace-only changes.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
import python
2+
import experimental.meta.ConceptsTest

0 commit comments

Comments
 (0)