Skip to content

Commit 3215f50

Browse files
authored
Merge pull request #4632 from RasmusWL/python-move-configurations-out-of-queries
Python: move configurations out of queries
2 parents 25ba6ca + fbe51c5 commit 3215f50

File tree

13 files changed

+273
-194
lines changed

13 files changed

+273
-194
lines changed

python/ql/src/Security/CWE-022/PathInjection.ql

Lines changed: 2 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -14,103 +14,11 @@
1414
* external/cwe/cwe-036
1515
* external/cwe/cwe-073
1616
* external/cwe/cwe-099
17-
*
18-
* The query detects cases where a user-controlled path is used in an unsafe manner,
19-
* meaning it is not both normalized and _afterwards_ checked.
20-
*
21-
* It does so by dividing the problematic situation into two cases:
22-
* 1. The file path is never normalized.
23-
* This is easily detected by using normalization as a sanitizer.
24-
*
25-
* 2. The file path is normalized at least once, but never checked afterwards.
26-
* This is detected by finding the earliest normalization and then ensuring that
27-
* no checks happen later. Since we start from the earliest normalization,
28-
* we know that the absence of checks means that no normalization has a
29-
* check after it. (No checks after a second normalization would be ok if
30-
* there was a check between the first and the second.)
31-
*
32-
* Note that one could make the dual split on whether the file path is ever checked. This does
33-
* not work as nicely, however, since checking is modelled as a `BarrierGuard` rather than
34-
* as a `Sanitizer`. That means that only some dataflow paths out of a check will be removed,
35-
* and so identifying the last check is not possible simply by finding a dataflow path from it
36-
* to a sink.
3717
*/
3818

3919
import python
40-
import semmle.python.dataflow.new.DataFlow
41-
import semmle.python.dataflow.new.DataFlow2
42-
import semmle.python.dataflow.new.TaintTracking
43-
import semmle.python.dataflow.new.TaintTracking2
44-
import semmle.python.Concepts
45-
import semmle.python.dataflow.new.RemoteFlowSources
46-
import ChainedConfigs12
20+
import semmle.python.security.dataflow.PathInjection
4721

48-
// ---------------------------------------------------------------------------
49-
// Case 1. The path is never normalized.
50-
// ---------------------------------------------------------------------------
51-
/** Configuration to find paths from sources to sinks that contain no normalization. */
52-
class PathNotNormalizedConfiguration extends TaintTracking::Configuration {
53-
PathNotNormalizedConfiguration() { this = "PathNotNormalizedConfiguration" }
54-
55-
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
56-
57-
override predicate isSink(DataFlow::Node sink) {
58-
sink = any(FileSystemAccess e).getAPathArgument()
59-
}
60-
61-
override predicate isSanitizer(DataFlow::Node node) { node instanceof Path::PathNormalization }
62-
}
63-
64-
predicate pathNotNormalized(CustomPathNode source, CustomPathNode sink) {
65-
any(PathNotNormalizedConfiguration config).hasFlowPath(source.asNode1(), sink.asNode1())
66-
}
67-
68-
// ---------------------------------------------------------------------------
69-
// Case 2. The path is normalized at least once, but never checked afterwards.
70-
// ---------------------------------------------------------------------------
71-
/** Configuration to find paths from sources to normalizations that contain no prior normalizations. */
72-
class FirstNormalizationConfiguration extends TaintTracking::Configuration {
73-
FirstNormalizationConfiguration() { this = "FirstNormalizationConfiguration" }
74-
75-
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
76-
77-
override predicate isSink(DataFlow::Node sink) { sink instanceof Path::PathNormalization }
78-
79-
override predicate isSanitizerOut(DataFlow::Node node) { node instanceof Path::PathNormalization }
80-
}
81-
82-
/** Configuration to find paths from normalizations to sinks that do not go through a check. */
83-
class NormalizedPathNotCheckedConfiguration extends TaintTracking2::Configuration {
84-
NormalizedPathNotCheckedConfiguration() { this = "NormalizedPathNotCheckedConfiguration" }
85-
86-
override predicate isSource(DataFlow::Node source) { source instanceof Path::PathNormalization }
87-
88-
override predicate isSink(DataFlow::Node sink) {
89-
sink = any(FileSystemAccess e).getAPathArgument()
90-
}
91-
92-
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
93-
guard instanceof Path::SafeAccessCheck
94-
}
95-
}
96-
97-
predicate pathNotCheckedAfterNormalization(CustomPathNode source, CustomPathNode sink) {
98-
exists(
99-
FirstNormalizationConfiguration config, DataFlow::PathNode mid1, DataFlow2::PathNode mid2,
100-
NormalizedPathNotCheckedConfiguration config2
101-
|
102-
config.hasFlowPath(source.asNode1(), mid1) and
103-
config2.hasFlowPath(mid2, sink.asNode2()) and
104-
mid1.getNode().asCfgNode() = mid2.getNode().asCfgNode()
105-
)
106-
}
107-
108-
// ---------------------------------------------------------------------------
109-
// Query: Either case 1 or case 2.
110-
// ---------------------------------------------------------------------------
11122
from CustomPathNode source, CustomPathNode sink
112-
where
113-
pathNotNormalized(source, sink)
114-
or
115-
pathNotCheckedAfterNormalization(source, sink)
23+
where pathInjection(source, sink)
11624
select sink, source, sink, "This path depends on $@.", source, "a user-provided value"

python/ql/src/Security/CWE-078/CommandInjection.ql

Lines changed: 1 addition & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -15,50 +15,9 @@
1515
*/
1616

1717
import python
18-
import semmle.python.dataflow.new.DataFlow
19-
import semmle.python.dataflow.new.TaintTracking
20-
import semmle.python.Concepts
21-
import semmle.python.dataflow.new.RemoteFlowSources
18+
import semmle.python.security.dataflow.CommandInjection
2219
import DataFlow::PathGraph
2320

24-
class CommandInjectionConfiguration extends TaintTracking::Configuration {
25-
CommandInjectionConfiguration() { this = "CommandInjectionConfiguration" }
26-
27-
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
28-
29-
override predicate isSink(DataFlow::Node sink) {
30-
sink = any(SystemCommandExecution e).getCommand() and
31-
// Since the implementation of standard library functions such `os.popen` looks like
32-
// ```py
33-
// def popen(cmd, mode="r", buffering=-1):
34-
// ...
35-
// proc = subprocess.Popen(cmd, ...)
36-
// ```
37-
// any time we would report flow to the `os.popen` sink, we can ALSO report the flow
38-
// from the `cmd` parameter to the `subprocess.Popen` sink -- obviously we don't
39-
// want that.
40-
//
41-
// However, simply removing taint edges out of a sink is not a good enough solution,
42-
// since we would only flag one of the `os.system` calls in the following example
43-
// due to use-use flow
44-
// ```py
45-
// os.system(cmd)
46-
// os.system(cmd)
47-
// ```
48-
//
49-
// Best solution I could come up with is to exclude all sinks inside the modules of
50-
// known sinks. This does have a downside: If we have overlooked a function in any
51-
// of these, that internally runs a command, we no longer give an alert :| -- and we
52-
// need to keep them updated (which is hard to remember)
53-
//
54-
// This does not only affect `os.popen`, but also the helper functions in
55-
// `subprocess`. See:
56-
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
57-
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
58-
not sink.getScope().getEnclosingModule().getName() in ["os", "subprocess", "platform", "popen2"]
59-
}
60-
}
61-
6221
from CommandInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
6322
where config.hasFlowPath(source, sink)
6423
select sink.getNode(), source, sink, "This command depends on $@.", source.getNode(),

python/ql/src/Security/CWE-079/ReflectedXss.ql

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,25 +13,9 @@
1313
*/
1414

1515
import python
16-
import semmle.python.dataflow.new.DataFlow
17-
import semmle.python.dataflow.new.TaintTracking
18-
import semmle.python.Concepts
19-
import semmle.python.dataflow.new.RemoteFlowSources
16+
import semmle.python.security.dataflow.ReflectedXSS
2017
import DataFlow::PathGraph
2118

22-
class ReflectedXssConfiguration extends TaintTracking::Configuration {
23-
ReflectedXssConfiguration() { this = "ReflectedXssConfiguration" }
24-
25-
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
26-
27-
override predicate isSink(DataFlow::Node sink) {
28-
exists(HTTP::Server::HttpResponse response |
29-
response.getMimetype().toLowerCase() = "text/html" and
30-
sink = response.getBody()
31-
)
32-
}
33-
}
34-
3519
from ReflectedXssConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
3620
where config.hasFlowPath(source, sink)
3721
select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.",

python/ql/src/Security/CWE-089/SqlInjection.ql

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,9 @@
1212
*/
1313

1414
import python
15-
import semmle.python.dataflow.new.DataFlow
16-
import semmle.python.dataflow.new.TaintTracking
17-
import semmle.python.Concepts
18-
import semmle.python.dataflow.new.RemoteFlowSources
15+
import semmle.python.security.dataflow.SqlInjection
1916
import DataFlow::PathGraph
2017

21-
class SQLInjectionConfiguration extends TaintTracking::Configuration {
22-
SQLInjectionConfiguration() { this = "SQLInjectionConfiguration" }
23-
24-
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
25-
26-
override predicate isSink(DataFlow::Node sink) { sink = any(SqlExecution e).getSql() }
27-
}
28-
2918
from SQLInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
3019
where config.hasFlowPath(source, sink)
3120
select sink.getNode(), source, sink, "This SQL query depends on $@.", source.getNode(),

python/ql/src/Security/CWE-094/CodeInjection.ql

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,20 +15,9 @@
1515
*/
1616

1717
import python
18-
import semmle.python.dataflow.new.DataFlow
19-
import semmle.python.dataflow.new.TaintTracking
20-
import semmle.python.Concepts
21-
import semmle.python.dataflow.new.RemoteFlowSources
18+
import semmle.python.security.dataflow.CodeInjection
2219
import DataFlow::PathGraph
2320

24-
class CodeInjectionConfiguration extends TaintTracking::Configuration {
25-
CodeInjectionConfiguration() { this = "CodeInjectionConfiguration" }
26-
27-
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
28-
29-
override predicate isSink(DataFlow::Node sink) { sink = any(CodeExecution e).getCode() }
30-
}
31-
3221
from CodeInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
3322
where config.hasFlowPath(source, sink)
3423
select sink.getNode(), source, sink, "$@ flows to here and is interpreted as code.",

python/ql/src/Security/CWE-502/UnsafeDeserialization.ql

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,9 @@
1212
*/
1313

1414
import python
15-
import semmle.python.dataflow.new.DataFlow
16-
import semmle.python.dataflow.new.TaintTracking
17-
import semmle.python.Concepts
18-
import semmle.python.dataflow.new.RemoteFlowSources
15+
import semmle.python.security.dataflow.UnsafeDeserialization
1916
import DataFlow::PathGraph
2017

21-
class UnsafeDeserializationConfiguration extends TaintTracking::Configuration {
22-
UnsafeDeserializationConfiguration() { this = "UnsafeDeserializationConfiguration" }
23-
24-
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
25-
26-
override predicate isSink(DataFlow::Node sink) {
27-
exists(Decoding d |
28-
d.mayExecuteInput() and
29-
sink = d.getAnInput()
30-
)
31-
}
32-
}
33-
3418
from UnsafeDeserializationConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
3519
where config.hasFlowPath(source, sink)
3620
select sink.getNode(), source, sink, "Deserializing of $@.", source.getNode(), "untrusted input"

python/ql/src/Security/CWE-022/ChainedConfigs12.qll renamed to python/ql/src/semmle/python/security/dataflow/ChainedConfigs12.qll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@ class CustomPathNode extends TCustomPathNode {
4141
this = Config2Node(result) or this = CrossoverNode(result.getNode())
4242
}
4343

44+
/**
45+
* Holds if this element is at the specified location.
46+
* The location spans column `startcolumn` of line `startline` to
47+
* column `endcolumn` of line `endline` in file `filepath`.
48+
* For more information, see
49+
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
50+
*/
4451
predicate hasLocationInfo(
4552
string filepath, int startline, int startcolumn, int endline, int endcolumn
4653
) {
@@ -49,6 +56,7 @@ class CustomPathNode extends TCustomPathNode {
4956
asNode2().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
5057
}
5158

59+
/** Gets a textual representation of this element. */
5260
string toString() {
5361
result = asNode1().toString()
5462
or
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/**
2+
* Provides a taint-tracking configuration for detecting code injection
3+
* vulnerabilities.
4+
*/
5+
6+
import python
7+
import semmle.python.dataflow.new.DataFlow
8+
import semmle.python.dataflow.new.TaintTracking
9+
import semmle.python.Concepts
10+
import semmle.python.dataflow.new.RemoteFlowSources
11+
12+
/**
13+
* A taint-tracking configuration for detecting code injection vulnerabilities.
14+
*/
15+
class CodeInjectionConfiguration extends TaintTracking::Configuration {
16+
CodeInjectionConfiguration() { this = "CodeInjectionConfiguration" }
17+
18+
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
19+
20+
override predicate isSink(DataFlow::Node sink) { sink = any(CodeExecution e).getCode() }
21+
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/**
2+
* Provides a taint-tracking configuration for detecting command injection
3+
* vulnerabilities.
4+
*/
5+
6+
import python
7+
import semmle.python.dataflow.new.DataFlow
8+
import semmle.python.dataflow.new.TaintTracking
9+
import semmle.python.Concepts
10+
import semmle.python.dataflow.new.RemoteFlowSources
11+
12+
/**
13+
* A taint-tracking configuration for detecting command injection vulnerabilities.
14+
*/
15+
class CommandInjectionConfiguration extends TaintTracking::Configuration {
16+
CommandInjectionConfiguration() { this = "CommandInjectionConfiguration" }
17+
18+
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
19+
20+
override predicate isSink(DataFlow::Node sink) {
21+
sink = any(SystemCommandExecution e).getCommand() and
22+
// Since the implementation of standard library functions such `os.popen` looks like
23+
// ```py
24+
// def popen(cmd, mode="r", buffering=-1):
25+
// ...
26+
// proc = subprocess.Popen(cmd, ...)
27+
// ```
28+
// any time we would report flow to the `os.popen` sink, we can ALSO report the flow
29+
// from the `cmd` parameter to the `subprocess.Popen` sink -- obviously we don't
30+
// want that.
31+
//
32+
// However, simply removing taint edges out of a sink is not a good enough solution,
33+
// since we would only flag one of the `os.system` calls in the following example
34+
// due to use-use flow
35+
// ```py
36+
// os.system(cmd)
37+
// os.system(cmd)
38+
// ```
39+
//
40+
// Best solution I could come up with is to exclude all sinks inside the modules of
41+
// known sinks. This does have a downside: If we have overlooked a function in any
42+
// of these, that internally runs a command, we no longer give an alert :| -- and we
43+
// need to keep them updated (which is hard to remember)
44+
//
45+
// This does not only affect `os.popen`, but also the helper functions in
46+
// `subprocess`. See:
47+
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
48+
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
49+
not sink.getScope().getEnclosingModule().getName() in ["os", "subprocess", "platform", "popen2"]
50+
}
51+
}

0 commit comments

Comments
 (0)