Skip to content

Commit 146787b

Browse files
authored
Merge pull request #4539 from yoff/python-port-path-injection
Python: port path injection
2 parents 310975b + cf97a56 commit 146787b

File tree

27 files changed

+7913
-12
lines changed

27 files changed

+7913
-12
lines changed

config/identical-files.json

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020
"csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowImpl4.qll",
2121
"csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowImpl5.qll",
2222
"python/ql/src/experimental/dataflow/internal/DataFlowImpl.qll",
23-
"python/ql/src/experimental/dataflow/internal/DataFlowImpl2.qll"
23+
"python/ql/src/experimental/dataflow/internal/DataFlowImpl2.qll",
24+
"python/ql/src/experimental/dataflow/internal/DataFlowImpl3.qll",
25+
"python/ql/src/experimental/dataflow/internal/DataFlowImpl4.qll"
2426
],
2527
"DataFlow Java/C++/C#/Python Common": [
2628
"java/ql/src/semmle/code/java/dataflow/internal/DataFlowImplCommon.qll",
@@ -41,7 +43,10 @@
4143
"csharp/ql/src/semmle/code/csharp/dataflow/internal/tainttracking5/TaintTrackingImpl.qll",
4244
"java/ql/src/semmle/code/java/dataflow/internal/tainttracking1/TaintTrackingImpl.qll",
4345
"java/ql/src/semmle/code/java/dataflow/internal/tainttracking2/TaintTrackingImpl.qll",
44-
"python/ql/src/experimental/dataflow/internal/tainttracking1/TaintTrackingImpl.qll"
46+
"python/ql/src/experimental/dataflow/internal/tainttracking1/TaintTrackingImpl.qll",
47+
"python/ql/src/experimental/dataflow/internal/tainttracking2/TaintTrackingImpl.qll",
48+
"python/ql/src/experimental/dataflow/internal/tainttracking3/TaintTrackingImpl.qll",
49+
"python/ql/src/experimental/dataflow/internal/tainttracking4/TaintTrackingImpl.qll"
4550
],
4651
"DataFlow Java/C++/C#/Python Consistency checks": [
4752
"java/ql/src/semmle/code/java/dataflow/internal/DataFlowImplConsistency.qll",
@@ -405,4 +410,4 @@
405410
"javascript/ql/src/Comments/CommentedOutCodeReferences.qhelp",
406411
"python/ql/src/Lexical/CommentedOutCodeReferences.qhelp"
407412
]
408-
}
413+
}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/**
2+
* This defines a `PathGraph` where sinks from `TaintTracking::Configuration`s are identified with
3+
* sources from `TaintTracking2::Configuration`s if they represent the same `ControlFlowNode`.
4+
*
5+
* Paths are then connected appropriately.
6+
*/
7+
8+
import python
9+
import experimental.dataflow.DataFlow
10+
import experimental.dataflow.DataFlow2
11+
import experimental.dataflow.TaintTracking
12+
import experimental.dataflow.TaintTracking2
13+
14+
/**
15+
* A `DataFlow::Node` that appears as a sink in Config1 and a source in Config2.
16+
*/
17+
private predicate crossoverNode(DataFlow::Node n) {
18+
any(TaintTracking::Configuration t1).isSink(n) and
19+
any(TaintTracking2::Configuration t2).isSource(n)
20+
}
21+
22+
/**
23+
* A new type which represents the union of the two sets of nodes.
24+
*/
25+
private newtype TCustomPathNode =
26+
Config1Node(DataFlow::PathNode node1) { not crossoverNode(node1.getNode()) } or
27+
Config2Node(DataFlow2::PathNode node2) { not crossoverNode(node2.getNode()) } or
28+
CrossoverNode(DataFlow::Node node) { crossoverNode(node) }
29+
30+
/**
31+
* A class representing the set of all the path nodes in either config.
32+
*/
33+
class CustomPathNode extends TCustomPathNode {
34+
/** Gets the PathNode if it is in Config1. */
35+
DataFlow::PathNode asNode1() {
36+
this = Config1Node(result) or this = CrossoverNode(result.getNode())
37+
}
38+
39+
/** Gets the PathNode if it is in Config2. */
40+
DataFlow2::PathNode asNode2() {
41+
this = Config2Node(result) or this = CrossoverNode(result.getNode())
42+
}
43+
44+
predicate hasLocationInfo(
45+
string filepath, int startline, int startcolumn, int endline, int endcolumn
46+
) {
47+
asNode1().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
48+
or
49+
asNode2().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
50+
}
51+
52+
string toString() {
53+
result = asNode1().toString()
54+
or
55+
result = asNode2().toString()
56+
}
57+
}
58+
59+
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
60+
query predicate edges(CustomPathNode a, CustomPathNode b) {
61+
// Edge is in Config1 graph
62+
DataFlow::PathGraph::edges(a.asNode1(), b.asNode1())
63+
or
64+
// Edge is in Config2 graph
65+
DataFlow2::PathGraph::edges(a.asNode2(), b.asNode2())
66+
}
67+
68+
/** Holds if `n` is a node in the graph of data flow path explanations. */
69+
query predicate nodes(CustomPathNode n, string key, string val) {
70+
// Node is in Config1 graph
71+
DataFlow::PathGraph::nodes(n.asNode1(), key, val)
72+
or
73+
// Node is in Config2 graph
74+
DataFlow2::PathGraph::nodes(n.asNode2(), key, val)
75+
}
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/**
2+
* @name Uncontrolled data used in path expression
3+
* @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
4+
* @kind path-problem
5+
* @problem.severity error
6+
* @sub-severity high
7+
* @precision high
8+
* @id py/path-injection
9+
* @tags correctness
10+
* security
11+
* external/owasp/owasp-a1
12+
* external/cwe/cwe-022
13+
* external/cwe/cwe-023
14+
* external/cwe/cwe-036
15+
* external/cwe/cwe-073
16+
* external/cwe/cwe-099
17+
*
18+
* The query detects cases where a user-controlled path is used in an unsafe manner,
19+
* meaning it is not both normalized and _afterwards_ checked.
20+
*
21+
* It does so by dividing the problematic situation into two cases:
22+
* 1. The file path is never normalized.
23+
* This is easily detected by using normalization as a sanitizer.
24+
*
25+
* 2. The file path is normalized at least once, but never checked afterwards.
26+
* This is detected by finding the earliest normalization and then ensuring that
27+
* no checks happen later. Since we start from the earliest normalization,
28+
* we know that the absence of checks means that no normalization has a
29+
* check after it. (No checks after a second normalization would be ok if
30+
* there was a check between the first and the second.)
31+
*
32+
* Note that one could make the dual split on whether the file path is ever checked. This does
33+
* not work as nicely, however, since checking is modelled as a `BarrierGuard` rather than
34+
* as a `Sanitizer`. That means that only some dataflow paths out of a check will be removed,
35+
* and so identifying the last check is not possible simply by finding a dataflow path from it
36+
* to a sink.
37+
*/
38+
39+
import python
40+
import experimental.dataflow.DataFlow
41+
import experimental.dataflow.DataFlow2
42+
import experimental.dataflow.TaintTracking
43+
import experimental.dataflow.TaintTracking2
44+
import experimental.semmle.python.Concepts
45+
import experimental.dataflow.RemoteFlowSources
46+
import ChainedConfigs12
47+
48+
// ---------------------------------------------------------------------------
49+
// Case 1. The path is never normalized.
50+
// ---------------------------------------------------------------------------
51+
/** Configuration to find paths from sources to sinks that contain no normalization. */
52+
class PathNotNormalizedConfiguration extends TaintTracking::Configuration {
53+
PathNotNormalizedConfiguration() { this = "PathNotNormalizedConfiguration" }
54+
55+
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
56+
57+
override predicate isSink(DataFlow::Node sink) {
58+
sink = any(FileSystemAccess e).getAPathArgument()
59+
}
60+
61+
override predicate isSanitizer(DataFlow::Node node) { node instanceof Path::PathNormalization }
62+
}
63+
64+
predicate pathNotNormalized(CustomPathNode source, CustomPathNode sink) {
65+
any(PathNotNormalizedConfiguration config).hasFlowPath(source.asNode1(), sink.asNode1())
66+
}
67+
68+
// ---------------------------------------------------------------------------
69+
// Case 2. The path is normalized at least once, but never checked afterwards.
70+
// ---------------------------------------------------------------------------
71+
/** Configuration to find paths from sources to normalizations that contain no prior normalizations. */
72+
class FirstNormalizationConfiguration extends TaintTracking::Configuration {
73+
FirstNormalizationConfiguration() { this = "FirstNormalizationConfiguration" }
74+
75+
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
76+
77+
override predicate isSink(DataFlow::Node sink) { sink instanceof Path::PathNormalization }
78+
79+
override predicate isSanitizerOut(DataFlow::Node node) { node instanceof Path::PathNormalization }
80+
}
81+
82+
/** Configuration to find paths from normalizations to sinks that do not go through a check. */
83+
class NormalizedPathNotCheckedConfiguration extends TaintTracking2::Configuration {
84+
NormalizedPathNotCheckedConfiguration() { this = "NormalizedPathNotCheckedConfiguration" }
85+
86+
override predicate isSource(DataFlow::Node source) { source instanceof Path::PathNormalization }
87+
88+
override predicate isSink(DataFlow::Node sink) {
89+
sink = any(FileSystemAccess e).getAPathArgument()
90+
}
91+
92+
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
93+
guard instanceof Path::SafeAccessCheck
94+
}
95+
}
96+
97+
predicate pathNotCheckedAfterNormalization(CustomPathNode source, CustomPathNode sink) {
98+
exists(
99+
FirstNormalizationConfiguration config, DataFlow::PathNode mid1, DataFlow2::PathNode mid2,
100+
NormalizedPathNotCheckedConfiguration config2
101+
|
102+
config.hasFlowPath(source.asNode1(), mid1) and
103+
config2.hasFlowPath(mid2, sink.asNode2()) and
104+
mid1.getNode().asCfgNode() = mid2.getNode().asCfgNode()
105+
)
106+
}
107+
108+
// ---------------------------------------------------------------------------
109+
// Query: Either case 1 or case 2.
110+
// ---------------------------------------------------------------------------
111+
from CustomPathNode source, CustomPathNode sink
112+
where
113+
pathNotNormalized(source, sink)
114+
or
115+
pathNotCheckedAfterNormalization(source, sink)
116+
select sink, source, sink, "This path depends on $@.", source, "a user-provided value"
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/**
2+
* Provides a library for local (intra-procedural) and global (inter-procedural)
3+
* data flow analysis: deciding whether data can flow from a _source_ to a
4+
* _sink_.
5+
*
6+
* Unless configured otherwise, _flow_ means that the exact value of
7+
* the source may reach the sink. We do not track flow across pointer
8+
* dereferences or array indexing. To track these types of flow, where the
9+
* exact value may not be preserved, import
10+
* `experimental.dataflow.TaintTracking`.
11+
*
12+
* To use global (interprocedural) data flow, extend the class
13+
* `DataFlow::Configuration` as documented on that class. To use local
14+
* (intraprocedural) data flow, call `DataFlow::localFlow` or
15+
* `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
16+
*/
17+
18+
private import python
19+
20+
/**
21+
* Provides classes for performing local (intra-procedural) and
22+
* global (inter-procedural) data flow analyses.
23+
*/
24+
module DataFlow3 {
25+
import experimental.dataflow.internal.DataFlowImpl3
26+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/**
2+
* Provides a library for local (intra-procedural) and global (inter-procedural)
3+
* data flow analysis: deciding whether data can flow from a _source_ to a
4+
* _sink_.
5+
*
6+
* Unless configured otherwise, _flow_ means that the exact value of
7+
* the source may reach the sink. We do not track flow across pointer
8+
* dereferences or array indexing. To track these types of flow, where the
9+
* exact value may not be preserved, import
10+
* `experimental.dataflow.TaintTracking`.
11+
*
12+
* To use global (interprocedural) data flow, extend the class
13+
* `DataFlow::Configuration` as documented on that class. To use local
14+
* (intraprocedural) data flow, call `DataFlow::localFlow` or
15+
* `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
16+
*/
17+
18+
private import python
19+
20+
/**
21+
* Provides classes for performing local (intra-procedural) and
22+
* global (inter-procedural) data flow analyses.
23+
*/
24+
module DataFlow4 {
25+
import experimental.dataflow.internal.DataFlowImpl4
26+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/**
2+
* Provides classes for performing local (intra-procedural) and
3+
* global (inter-procedural) taint-tracking analyses.
4+
*
5+
* To use global (interprocedural) taint tracking, extend the class
6+
* `TaintTracking::Configuration` as documented on that class. To use local
7+
* (intraprocedural) taint tracking, call `TaintTracking::localTaint` or
8+
* `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`.
9+
*/
10+
11+
private import python
12+
13+
/**
14+
* Provides classes for performing local (intra-procedural) and
15+
* global (inter-procedural) taint-tracking analyses.
16+
*/
17+
module TaintTracking2 {
18+
import experimental.dataflow.internal.tainttracking2.TaintTrackingImpl
19+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/**
2+
* Provides classes for performing local (intra-procedural) and
3+
* global (inter-procedural) taint-tracking analyses.
4+
*
5+
* To use global (interprocedural) taint tracking, extend the class
6+
* `TaintTracking::Configuration` as documented on that class. To use local
7+
* (intraprocedural) taint tracking, call `TaintTracking::localTaint` or
8+
* `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`.
9+
*/
10+
11+
private import python
12+
13+
/**
14+
* Provides classes for performing local (intra-procedural) and
15+
* global (inter-procedural) taint-tracking analyses.
16+
*/
17+
module TaintTracking3 {
18+
import experimental.dataflow.internal.tainttracking3.TaintTrackingImpl
19+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/**
2+
* Provides classes for performing local (intra-procedural) and
3+
* global (inter-procedural) taint-tracking analyses.
4+
*
5+
* To use global (interprocedural) taint tracking, extend the class
6+
* `TaintTracking::Configuration` as documented on that class. To use local
7+
* (intraprocedural) taint tracking, call `TaintTracking::localTaint` or
8+
* `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`.
9+
*/
10+
11+
private import python
12+
13+
/**
14+
* Provides classes for performing local (intra-procedural) and
15+
* global (inter-procedural) taint-tracking analyses.
16+
*/
17+
module TaintTracking4 {
18+
import experimental.dataflow.internal.tainttracking4.TaintTrackingImpl
19+
}

0 commit comments

Comments
 (0)