|
1 | 1 | /** |
2 | | - * Provides classes modeling security-relevant aspects of the PyYAML package |
3 | | - * https://pyyaml.org/wiki/PyYAMLDocumentation (obtained via `import yaml`). |
| 2 | + * Provides classes modeling security-relevant aspects of the PyYAML package (obtained |
| 3 | + * via `import yaml`) |
| 4 | + * |
| 5 | + * See |
| 6 | + * - https://pyyaml.org/wiki/PyYAMLDocumentation |
| 7 | + * - https://pyyaml.docsforge.com/master/documentation/ |
4 | 8 | */ |
5 | 9 |
|
6 | 10 | private import python |
7 | 11 | private import semmle.python.dataflow.new.DataFlow |
8 | 12 | private import semmle.python.dataflow.new.RemoteFlowSources |
9 | 13 | private import semmle.python.Concepts |
| 14 | +private import semmle.python.ApiGraphs |
10 | 15 |
|
| 16 | +/** |
| 17 | + * Provides classes modeling security-relevant aspects of the PyYAML package (obtained |
| 18 | + * via `import yaml`) |
| 19 | + * |
| 20 | + * See |
| 21 | + * - https://pyyaml.org/wiki/PyYAMLDocumentation |
| 22 | + * - https://pyyaml.docsforge.com/master/documentation/ |
| 23 | + */ |
11 | 24 | private module Yaml { |
12 | | - /** Gets a reference to the `yaml` module. */ |
13 | | - private DataFlow::Node yaml(DataFlow::TypeTracker t) { |
14 | | - t.start() and |
15 | | - result = DataFlow::importNode("yaml") |
16 | | - or |
17 | | - exists(DataFlow::TypeTracker t2 | result = yaml(t2).track(t2, t)) |
18 | | - } |
19 | | - |
20 | | - /** Gets a reference to the `yaml` module. */ |
21 | | - DataFlow::Node yaml() { result = yaml(DataFlow::TypeTracker::end()) } |
| 25 | + /** |
| 26 | + * A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`, |
| 27 | + * `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`) |
| 28 | + * |
| 29 | + * See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down). |
| 30 | + */ |
| 31 | + private class YamlLoadCall extends Decoding::Range, DataFlow::CallCfgNode { |
| 32 | + override CallNode node; |
| 33 | + string func_name; |
22 | 34 |
|
23 | | - /** Provides models for the `yaml` module. */ |
24 | | - module yaml { |
25 | | - /** |
26 | | - * Gets a reference to the attribute `attr_name` of the `yaml` module. |
27 | | - * WARNING: Only holds for a few predefined attributes. |
28 | | - * |
29 | | - * For example, using `attr_name = "load"` will get all uses of `yaml.load`. |
30 | | - */ |
31 | | - private DataFlow::Node yaml_attr(DataFlow::TypeTracker t, string attr_name) { |
32 | | - attr_name in [ |
33 | | - // functions |
| 35 | + YamlLoadCall() { |
| 36 | + func_name in [ |
34 | 37 | "load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all", |
35 | | - "safe_load", "safe_load_all", |
36 | | - // Classes |
37 | | - "SafeLoader", "BaseLoader" |
| 38 | + "safe_load", "safe_load_all" |
38 | 39 | ] and |
39 | | - ( |
40 | | - t.start() and |
41 | | - result = DataFlow::importNode("yaml." + attr_name) |
42 | | - or |
43 | | - t.startInAttr(attr_name) and |
44 | | - result = yaml() |
45 | | - ) |
46 | | - or |
47 | | - // Due to bad performance when using normal setup with `yaml_attr(t2, attr_name).track(t2, t)` |
48 | | - // we have inlined that code and forced a join |
49 | | - exists(DataFlow::TypeTracker t2 | |
50 | | - exists(DataFlow::StepSummary summary | |
51 | | - yaml_attr_first_join(t2, attr_name, result, summary) and |
52 | | - t = t2.append(summary) |
53 | | - ) |
54 | | - ) |
55 | | - } |
56 | | - |
57 | | - pragma[nomagic] |
58 | | - private predicate yaml_attr_first_join( |
59 | | - DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary |
60 | | - ) { |
61 | | - DataFlow::StepSummary::step(yaml_attr(t2, attr_name), res, summary) |
| 40 | + this = API::moduleImport("yaml").getMember(func_name).getACall() |
62 | 41 | } |
63 | 42 |
|
64 | 43 | /** |
65 | | - * Gets a reference to the attribute `attr_name` of the `yaml` module. |
66 | | - * WARNING: Only holds for a few predefined attributes. |
67 | | - * |
68 | | - * For example, using `attr_name = "load"` will get all uses of `yaml.load`. |
| 44 | + * This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`. |
| 45 | + * In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0 |
| 46 | + * (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389). |
| 47 | + * Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution. |
| 48 | + * See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details. |
69 | 49 | */ |
70 | | - DataFlow::Node yaml_attr(string attr_name) { |
71 | | - result = yaml_attr(DataFlow::TypeTracker::end(), attr_name) |
| 50 | + override predicate mayExecuteInput() { |
| 51 | + func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"] |
| 52 | + or |
| 53 | + func_name in ["load", "load_all"] and |
| 54 | + // If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all, |
| 55 | + // then the default loader will be used, which is not safe. |
| 56 | + not exists(DataFlow::Node loader_arg | |
| 57 | + loader_arg in [this.getArg(1), this.getArgByName("Loader")] |
| 58 | + | |
| 59 | + loader_arg = |
| 60 | + API::moduleImport("yaml") |
| 61 | + .getMember(["SafeLoader", "BaseLoader", "CSafeLoader", "CBaseLoader"]) |
| 62 | + .getAUse() |
| 63 | + ) |
72 | 64 | } |
73 | | - } |
74 | | -} |
75 | 65 |
|
76 | | -/** |
77 | | - * A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`, |
78 | | - * `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`) |
79 | | - * |
80 | | - * See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down). |
81 | | - */ |
82 | | -private class YamlLoadCall extends Decoding::Range, DataFlow::CfgNode { |
83 | | - override CallNode node; |
84 | | - string func_name; |
| 66 | + override DataFlow::Node getAnInput() { result = this.getArg(0) } |
85 | 67 |
|
86 | | - YamlLoadCall() { |
87 | | - func_name in [ |
88 | | - "load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all", |
89 | | - "safe_load", "safe_load_all" |
90 | | - ] and |
91 | | - node.getFunction() = Yaml::yaml::yaml_attr(func_name).asCfgNode() |
92 | | - } |
| 68 | + override DataFlow::Node getOutput() { result = this } |
93 | 69 |
|
94 | | - /** |
95 | | - * This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`. |
96 | | - * In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0 |
97 | | - * (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389). |
98 | | - * Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution. |
99 | | - * See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details. |
100 | | - */ |
101 | | - override predicate mayExecuteInput() { |
102 | | - func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"] |
103 | | - or |
104 | | - func_name in ["load", "load_all"] and |
105 | | - // If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all, |
106 | | - // then the default loader will be used, which is not safe. |
107 | | - not exists(DataFlow::Node loader_arg | |
108 | | - loader_arg.asCfgNode() in [node.getArg(1), node.getArgByName("Loader")] |
109 | | - | |
110 | | - loader_arg = Yaml::yaml::yaml_attr(["SafeLoader", "BaseLoader"]) |
111 | | - ) |
| 70 | + override string getFormat() { result = "YAML" } |
112 | 71 | } |
113 | | - |
114 | | - override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) } |
115 | | - |
116 | | - override DataFlow::Node getOutput() { result = this } |
117 | | - |
118 | | - override string getFormat() { result = "YAML" } |
119 | 72 | } |
0 commit comments