|
1 | 1 | /** |
2 | | - * Provides a taint-tracking configuration for detecting polynomial regular expression denial of service (ReDoS) |
3 | | - * vulnerabilities. |
| 2 | + * Provides a taint-tracking configuration for detecting "polynomial regular expression denial of service (ReDoS)" vulnerabilities. |
| 3 | + * |
| 4 | + * Note, for performance reasons: only import this file if |
| 5 | + * `PolynomialReDoS::Configuration` is needed, otherwise |
| 6 | + * `PolynomialReDoSCustomizations` should be imported instead. |
4 | 7 | */ |
5 | 8 |
|
6 | | -import python |
| 9 | +private import python |
7 | 10 | import semmle.python.dataflow.new.DataFlow |
8 | | -import semmle.python.dataflow.new.DataFlow2 |
9 | 11 | import semmle.python.dataflow.new.TaintTracking |
10 | | -import semmle.python.Concepts |
11 | | -import semmle.python.dataflow.new.RemoteFlowSources |
12 | | -import semmle.python.dataflow.new.BarrierGuards |
13 | | -import semmle.python.RegexTreeView |
14 | | -import semmle.python.ApiGraphs |
15 | | - |
16 | | -/** A configuration for finding uses of compiled regexes. */ |
17 | | -class RegexDefinitionConfiguration extends DataFlow2::Configuration { |
18 | | - RegexDefinitionConfiguration() { this = "RegexDefinitionConfiguration" } |
19 | | - |
20 | | - override predicate isSource(DataFlow::Node source) { source instanceof RegexDefinitonSource } |
21 | | - |
22 | | - override predicate isSink(DataFlow::Node sink) { sink instanceof RegexDefinitionSink } |
23 | | -} |
24 | | - |
25 | | -/** A regex compilation. */ |
26 | | -class RegexDefinitonSource extends DataFlow::CallCfgNode { |
27 | | - DataFlow::Node regexNode; |
28 | | - |
29 | | - RegexDefinitonSource() { |
30 | | - this = API::moduleImport("re").getMember("compile").getACall() and |
31 | | - regexNode in [this.getArg(0), this.getArgByName("pattern")] |
32 | | - } |
33 | | - |
34 | | - /** Gets the regex that is being compiled by this node. */ |
35 | | - RegExpTerm getRegExp() { result.getRegex() = regexNode.asExpr() and result.isRootTerm() } |
36 | | - |
37 | | - /** Gets the data flow node for the regex being compiled by this node. */ |
38 | | - DataFlow::Node getRegexNode() { result = regexNode } |
39 | | -} |
40 | | - |
41 | | -/** A use of a compiled regex. */ |
42 | | -class RegexDefinitionSink extends DataFlow::Node { |
43 | | - RegexExecutionMethod method; |
44 | | - DataFlow::CallCfgNode executingCall; |
45 | | - |
46 | | - RegexDefinitionSink() { |
47 | | - exists(DataFlow::AttrRead reMethod | |
48 | | - executingCall.getFunction() = reMethod and |
49 | | - reMethod.getAttributeName() = method and |
50 | | - this = reMethod.getObject() |
51 | | - ) |
52 | | - } |
53 | | - |
54 | | - /** Gets the method used to execute the regex. */ |
55 | | - RegexExecutionMethod getMethod() { result = method } |
56 | | - |
57 | | - /** Gets the data flow node for the executing call. */ |
58 | | - DataFlow::CallCfgNode getExecutingCall() { result = executingCall } |
59 | | -} |
60 | | - |
61 | | -/** |
62 | | - * A taint-tracking configuration for detecting regular expression denial-of-service vulnerabilities. |
63 | | - */ |
64 | | -class PolynomialReDoSConfiguration extends TaintTracking::Configuration { |
65 | | - PolynomialReDoSConfiguration() { this = "PolynomialReDoSConfiguration" } |
66 | | - |
67 | | - override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource } |
68 | | - |
69 | | - override predicate isSink(DataFlow::Node sink) { sink instanceof PolynomialReDoSSink } |
70 | | -} |
71 | | - |
72 | | -/** A data flow node executing a regex. */ |
73 | | -abstract class RegexExecution extends DataFlow::Node { |
74 | | - /** Gets the data flow node for the regex being compiled by this node. */ |
75 | | - abstract DataFlow::Node getRegexNode(); |
76 | | - |
77 | | - /** Gets a dataflow node for the string to be searched or matched against. */ |
78 | | - abstract DataFlow::Node getString(); |
79 | | -} |
80 | | - |
81 | | -private class RegexExecutionMethod extends string { |
82 | | - RegexExecutionMethod() { |
83 | | - this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"] |
84 | | - } |
85 | | -} |
86 | | - |
87 | | -/** Gets the index of the argument representing the string to be searched by a regex. */ |
88 | | -int stringArg(RegexExecutionMethod method) { |
89 | | - method in ["match", "fullmatch", "search", "split", "findall", "finditer"] and |
90 | | - result = 1 |
91 | | - or |
92 | | - method in ["sub", "subn"] and |
93 | | - result = 2 |
94 | | -} |
95 | 12 |
|
96 | 13 | /** |
97 | | - * A class to find `re` methods immediately executing an expression. |
98 | | - * |
99 | | - * See `RegexExecutionMethods` |
| 14 | + * Provides a taint-tracking configuration for detecting "polynomial regular expression denial of service (ReDoS)" vulnerabilities. |
100 | 15 | */ |
101 | | -class DirectRegex extends DataFlow::CallCfgNode, RegexExecution { |
102 | | - RegexExecutionMethod method; |
103 | | - |
104 | | - DirectRegex() { this = API::moduleImport("re").getMember(method).getACall() } |
| 16 | +module PolynomialReDoS { |
| 17 | + import PolynomialReDoSCustomizations::PolynomialReDoS |
105 | 18 |
|
106 | | - override DataFlow::Node getRegexNode() { |
107 | | - result in [this.getArg(0), this.getArgByName("pattern")] |
108 | | - } |
109 | | - |
110 | | - override DataFlow::Node getString() { |
111 | | - result in [this.getArg(stringArg(method)), this.getArgByName("string")] |
112 | | - } |
113 | | -} |
114 | | - |
115 | | -/** |
116 | | - * A class to find `re` methods immediately executing a compiled expression by `re.compile`. |
117 | | - * |
118 | | - * Given the following example: |
119 | | - * |
120 | | - * ```py |
121 | | - * pattern = re.compile(input) |
122 | | - * pattern.match(s) |
123 | | - * ``` |
124 | | - * |
125 | | - * This class will identify that `re.compile` compiles `input` and afterwards |
126 | | - * executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)` |
127 | | - * and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument) |
128 | | - * |
129 | | - * |
130 | | - * See `RegexExecutionMethods` |
131 | | - * |
132 | | - * See https://docs.python.org/3/library/re.html#regular-expression-objects |
133 | | - */ |
134 | | -private class CompiledRegex extends DataFlow::CallCfgNode, RegexExecution { |
135 | | - DataFlow::Node regexNode; |
136 | | - RegexExecutionMethod method; |
137 | | - |
138 | | - CompiledRegex() { |
139 | | - exists( |
140 | | - RegexDefinitionConfiguration conf, RegexDefinitonSource source, RegexDefinitionSink sink |
141 | | - | |
142 | | - conf.hasFlow(source, sink) and |
143 | | - regexNode = source.getRegexNode() and |
144 | | - method = sink.getMethod() and |
145 | | - this = sink.getExecutingCall() |
146 | | - ) |
147 | | - } |
| 19 | + /** |
| 20 | + * A taint-tracking configuration for detecting "polynomial regular expression denial of service (ReDoS)" vulnerabilities. |
| 21 | + */ |
| 22 | + class Configuration extends TaintTracking::Configuration { |
| 23 | + Configuration() { this = "PolynomialReDoS" } |
148 | 24 |
|
149 | | - override DataFlow::Node getRegexNode() { result = regexNode } |
| 25 | + override predicate isSource(DataFlow::Node source) { source instanceof Source } |
150 | 26 |
|
151 | | - override DataFlow::Node getString() { |
152 | | - result in [this.getArg(stringArg(method) - 1), this.getArgByName("string")] |
153 | | - } |
154 | | -} |
| 27 | + override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } |
155 | 28 |
|
156 | | -/** |
157 | | - * A data flow sink node for polynomial regular expression denial-of-service vulnerabilities. |
158 | | - */ |
159 | | -class PolynomialReDoSSink extends DataFlow::Node { |
160 | | - RegExpTerm t; |
| 29 | + override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer } |
161 | 30 |
|
162 | | - PolynomialReDoSSink() { |
163 | | - exists(RegexExecution re | |
164 | | - re.getRegexNode().asExpr() = t.getRegex() and |
165 | | - this = re.getString() |
166 | | - ) and |
167 | | - t.isRootTerm() |
| 31 | + override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { |
| 32 | + guard instanceof SanitizerGuard |
| 33 | + } |
168 | 34 | } |
169 | | - |
170 | | - /** Gets the regex that is being executed by this node. */ |
171 | | - RegExpTerm getRegExp() { result = t } |
172 | | - |
173 | | - /** |
174 | | - * Gets the node to highlight in the alert message. |
175 | | - */ |
176 | | - DataFlow::Node getHighlight() { result = this } |
177 | 35 | } |
0 commit comments