Skip to content

Commit f12c057

Browse files
authored
Merge pull request #1470 from markshannon/python-tarslip
Python: "TarSlip" query
2 parents 41e46f6 + fbe20a9 commit f12c057

File tree

10 files changed

+403
-2
lines changed

10 files changed

+403
-2
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Improvements to Python analysis
2+
3+
4+
## General improvements
5+
6+
7+
8+
### Impact on existing queries.
9+
10+
11+
12+
## New queries
13+
14+
| **Query** | **Tags** | **Purpose** |
15+
|-----------|----------|-------------|
16+
| Arbitrary file write during tarfile extraction (`py/tarslip`) | security, external/cwe/cwe-022 | Finds instances where extracting from a tar archive can result in arbitrary file writes. Results are not shown on LGTM by default. |
17+
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>Extracting files from a malicious tar archive without validating that the destination file path
8+
is within the destination directory can cause files outside the destination directory to be
9+
overwritten, due to the possible presence of directory traversal elements (<code>..</code>) in
10+
archive paths.</p>
11+
12+
<p>Tar archives contain archive entries representing each file in the archive. These entries
13+
include a file path for the entry, but these file paths are not restricted and may contain
14+
unexpected special elements such as the directory traversal element (<code>..</code>). If these
15+
file paths are used to determine an output file to write the contents of the archive item to, then
16+
the file may be written to an unexpected location. This can result in sensitive information being
17+
revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
18+
files.</p>
19+
20+
<p>For example, if a tar archive contains a file entry <code>..\sneaky-file</code>, and the tar archive
21+
is extracted to the directory <code>c:\output</code>, then naively combining the paths would result
22+
in an output file path of <code>c:\output\..\sneaky-file</code>, which would cause the file to be
23+
written to <code>c:\sneaky-file</code>.</p>
24+
25+
</overview>
26+
<recommendation>
27+
28+
<p>Ensure that output paths constructed from tar archive entries are validated
29+
to prevent writing files to unexpected locations.</p>
30+
31+
<p>The recommended way of writing an output file from a tar archive entry is to check that
32+
<code>".."</code> does not occur in the path.
33+
</p>
34+
35+
</recommendation>
36+
37+
<example>
38+
<p>
39+
In this example an archive is extracted without validating file paths.
40+
If <code>archive.tar</code> contained relative paths (for
41+
instance, if it were created by something like <code>tar -cf archive.tar
42+
../file.txt</code>) then executing this code could write to locations
43+
outside the destination directory.
44+
</p>
45+
46+
<sample src="examples/tarslip_bad.py" />
47+
48+
<p>To fix this vulnerability, we need to check that the path does not
49+
contain any <code>".."</code> elements in it.
50+
</p>
51+
52+
<sample src="examples/tarslip_good.py" />
53+
54+
</example>
55+
<references>
56+
57+
<li>
58+
Snyk:
59+
<a href="https://snyk.io/research/zip-slip-vulnerability">Zip Slip Vulnerability</a>.
60+
</li>
61+
<li>
62+
OWASP:
63+
<a href="https://www.owasp.org/index.php/Path_traversal">Path Traversal</a>.
64+
</li>
65+
<li>
66+
Python Library Reference:
67+
<a href="https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extract">TarFile.extract</a>.
68+
</li>
69+
<li>
70+
Python Library Reference:
71+
<a href="https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall">TarFile.extractall</a>.
72+
</li>
73+
74+
</references>
75+
</qhelp>
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
/**
2+
* @name Arbitrary file write during tarfile extraction
3+
* @description Extracting files from a malicious tar archive without validating that the
4+
* destination file path is within the destination directory can cause files outside
5+
* the destination directory to be overwritten.
6+
* @kind path-problem
7+
* @id py/tarslip
8+
* @problem.severity error
9+
* @precision medium
10+
* @tags security
11+
* external/cwe/cwe-022
12+
*/
13+
14+
import python
15+
import semmle.python.security.Paths
16+
17+
import semmle.python.security.TaintTracking
18+
import semmle.python.security.strings.Basic
19+
20+
/** A TaintKind to represent open tarfile objects. That is, the result of calling `tarfile.open(...)` */
21+
class OpenTarFile extends TaintKind {
22+
OpenTarFile() {
23+
this = "tarfile.open"
24+
}
25+
26+
override TaintKind getTaintOfMethodResult(string name) {
27+
name = "getmember" and result instanceof TarFileInfo
28+
or
29+
name = "getmembers" and result.(SequenceKind).getItem() instanceof TarFileInfo
30+
}
31+
32+
override ClassValue getType() {
33+
result = Module::named("tarfile").attr("TarFile")
34+
}
35+
36+
override TaintKind getTaintForIteration() {
37+
result instanceof TarFileInfo
38+
}
39+
40+
}
41+
42+
/** The source of open tarfile objects. That is, any call to `tarfile.open(...)` */
43+
class TarfileOpen extends TaintSource {
44+
45+
TarfileOpen() {
46+
Module::named("tarfile").attr("open").getACall() = this
47+
and
48+
/* If argument refers to a string object, then it's a hardcoded path and
49+
* this tarfile is safe.
50+
*/
51+
not this.(CallNode).getAnArg().refersTo(any(StringObject str))
52+
and
53+
/* Ignore opens within the tarfile module itself */
54+
not this.(ControlFlowNode).getLocation().getFile().getBaseName() = "tarfile.py"
55+
}
56+
57+
override predicate isSourceOf(TaintKind kind) {
58+
kind instanceof OpenTarFile
59+
}
60+
61+
}
62+
63+
class TarFileInfo extends TaintKind {
64+
65+
TarFileInfo() {
66+
this = "tarfile.entry"
67+
}
68+
69+
override TaintKind getTaintOfMethodResult(string name) {
70+
name = "next" and result = this
71+
}
72+
73+
override TaintKind getTaintOfAttribute(string name) {
74+
name = "name" and result instanceof TarFileInfo
75+
}
76+
}
77+
78+
79+
/* For efficiency we don't want to track the flow of taint
80+
* around the tarfile module. */
81+
class ExcludeTarFilePy extends Sanitizer {
82+
83+
ExcludeTarFilePy() {
84+
this = "Tar sanitizer"
85+
}
86+
87+
override predicate sanitizingNode(TaintKind taint, ControlFlowNode node) {
88+
node.getLocation().getFile().getBaseName() = "tarfile.py" and
89+
(
90+
taint instanceof OpenTarFile
91+
or
92+
taint instanceof TarFileInfo
93+
or
94+
taint.(SequenceKind).getItem() instanceof TarFileInfo
95+
)
96+
}
97+
98+
}
99+
100+
/* Any call to an extractall method */
101+
class ExtractAllSink extends TaintSink {
102+
103+
CallNode call;
104+
105+
ExtractAllSink() {
106+
this = call.getFunction().(AttrNode).getObject("extractall") and
107+
count(call.getAnArg()) = 0
108+
}
109+
110+
override predicate sinks(TaintKind kind) {
111+
kind instanceof OpenTarFile
112+
}
113+
114+
}
115+
116+
/* Argument to extract method */
117+
class ExtractSink extends TaintSink {
118+
119+
CallNode call;
120+
121+
ExtractSink() {
122+
call.getFunction().(AttrNode).getName() = "extract" and
123+
this = call.getArg(0)
124+
}
125+
126+
override predicate sinks(TaintKind kind) {
127+
kind instanceof TarFileInfo
128+
}
129+
130+
}
131+
132+
133+
/* Members argument to extract method */
134+
class ExtractMembersSink extends TaintSink {
135+
136+
CallNode call;
137+
138+
ExtractMembersSink() {
139+
call.getFunction().(AttrNode).getName() = "extractall" and
140+
(this = call.getArg(0) or this = call.getArgByName("members"))
141+
}
142+
143+
override predicate sinks(TaintKind kind) {
144+
kind.(SequenceKind).getItem() instanceof TarFileInfo
145+
or
146+
kind instanceof OpenTarFile
147+
}
148+
149+
}
150+
151+
class TarFileInfoSanitizer extends Sanitizer {
152+
153+
TarFileInfoSanitizer() {
154+
this = "TarInfo sanitizer"
155+
}
156+
157+
override predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) {
158+
path_sanitizing_test(test.getTest()) and
159+
taint instanceof TarFileInfo
160+
}
161+
162+
163+
}
164+
165+
private predicate path_sanitizing_test(ControlFlowNode test) {
166+
/* Assume that any test with "path" in it is a sanitizer */
167+
test.getAChild+().(AttrNode).getName().matches("%path")
168+
or
169+
test.getAChild+().(NameNode).getId().matches("%path")
170+
}
171+
172+
class TarSlipConfiguration extends TaintTracking::Configuration {
173+
174+
TarSlipConfiguration() { this = "TarSlip configuration" }
175+
176+
override predicate isSource(TaintTracking::Source source) { source instanceof TarfileOpen }
177+
178+
override predicate isSink(TaintTracking::Sink sink) {
179+
sink instanceof ExtractSink or
180+
sink instanceof ExtractAllSink or
181+
sink instanceof ExtractMembersSink
182+
}
183+
184+
override predicate isSanitizer(Sanitizer sanitizer) {
185+
sanitizer instanceof TarFileInfoSanitizer
186+
or
187+
sanitizer instanceof ExcludeTarFilePy
188+
}
189+
190+
}
191+
192+
193+
from TarSlipConfiguration config, TaintedPathSource src, TaintedPathSink sink
194+
where config.hasFlowPath(src, sink)
195+
select sink.getSink(), src, sink, "Extraction of tarfile from $@", src.getSource(), "a potentially untrusted source"
196+
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
2+
import tarfile
3+
4+
with tarfile.open('archive.zip') as tar:
5+
#BAD : This could write any file on the filesystem.
6+
for entry in tar:
7+
tar.extract(entry, "/tmp/unpack/")
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
2+
import tarfile
3+
import os.path
4+
5+
with tarfile.open('archive.zip') as tar:
6+
for entry in tar:
7+
#GOOD: Check that entry is safe
8+
if os.path.isabs(entry.name) or ".." in entry.name:
9+
raise ValueError("Illegal tar archive entry")
10+
tar.extract(entry, "/tmp/unpack/")

python/ql/src/semmle/python/security/Paths.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import semmle.python.security.TaintTracking
55
query predicate edges(TaintedNode fromnode, TaintedNode tonode) {
66
fromnode.getASuccessor() = tonode and
77
/* Don't record flow past sinks */
8-
not fromnode.isVulnerableSink()
8+
not fromnode.isSink()
99
}
1010

1111
private TaintedNode first_child(TaintedNode parent) {

python/ql/src/semmle/python/security/TaintTracking.qll

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -703,7 +703,7 @@ class TaintedNode extends TTaintedNode {
703703
/** Holds if the underlying CFG node for this node is a vulnerable node
704704
* and is vulnerable to this node's taint.
705705
*/
706-
predicate isVulnerableSink() {
706+
predicate isSink() {
707707
exists(TaintedNode src, TaintSink vuln |
708708
src.isSource() and
709709
src.getASuccessor*() = this and
@@ -712,6 +712,13 @@ class TaintedNode extends TTaintedNode {
712712
)
713713
}
714714

715+
/** DEPRECATED -- Use `TaintedNode.isSink()` instead
716+
* Sinks are not necessarily vulnerable
717+
* For removal 2020-07-01 */
718+
deprecated predicate isVulnerableSink() {
719+
this.isSink()
720+
}
721+
715722
TaintFlowImplementation::TrackedTaint fromAttribute(string name) {
716723
result = this.getTrackedValue().(TaintFlowImplementation::TrackedAttribute).fromAttribute(name)
717724
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
edges
2+
| tarslip.py:12:7:12:39 | tarfile.open | tarslip.py:13:1:13:3 | tarfile.open |
3+
| tarslip.py:12:7:12:39 | tarfile.open | tarslip.py:14:1:14:3 | tarfile.open |
4+
| tarslip.py:16:7:16:39 | tarfile.open | tarslip.py:17:14:17:16 | tarfile.open |
5+
| tarslip.py:16:7:16:39 | tarfile.open | tarslip.py:18:5:18:7 | tarfile.open |
6+
| tarslip.py:17:5:17:9 | tarfile.entry | tarslip.py:18:17:18:21 | tarfile.entry |
7+
| tarslip.py:17:14:17:16 | tarfile.open | tarslip.py:17:5:17:9 | tarfile.entry |
8+
| tarslip.py:26:7:26:39 | tarfile.open | tarslip.py:27:14:27:16 | tarfile.open |
9+
| tarslip.py:26:7:26:39 | tarfile.open | tarslip.py:30:5:30:7 | tarfile.open |
10+
| tarslip.py:27:5:27:9 | tarfile.entry | tarslip.py:28:22:28:26 | tarfile.entry |
11+
| tarslip.py:27:14:27:16 | tarfile.open | tarslip.py:27:5:27:9 | tarfile.entry |
12+
| tarslip.py:28:22:28:26 | tarfile.entry | tarslip.py:28:22:28:31 | tarfile.entry |
13+
| tarslip.py:33:7:33:39 | tarfile.open | tarslip.py:34:14:34:16 | tarfile.open |
14+
| tarslip.py:33:7:33:39 | tarfile.open | tarslip.py:37:5:37:7 | tarfile.open |
15+
| tarslip.py:34:5:34:9 | tarfile.entry | tarslip.py:35:16:35:20 | tarfile.entry |
16+
| tarslip.py:34:5:34:9 | tarfile.entry | tarslip.py:37:17:37:21 | tarfile.entry |
17+
| tarslip.py:34:14:34:16 | tarfile.open | tarslip.py:34:5:34:9 | tarfile.entry |
18+
| tarslip.py:35:16:35:20 | tarfile.entry | tarslip.py:35:16:35:25 | tarfile.entry |
19+
| tarslip.py:40:7:40:39 | tarfile.open | tarslip.py:41:1:41:3 | tarfile.open |
20+
| tarslip.py:40:7:40:39 | tarfile.open | tarslip.py:41:24:41:26 | tarfile.open |
21+
| tarslip.py:45:17:45:23 | tarfile.open | tarslip.py:46:17:46:23 | tarfile.open |
22+
| tarslip.py:46:9:46:12 | tarfile.entry | tarslip.py:47:20:47:23 | tarfile.entry |
23+
| tarslip.py:46:17:46:23 | tarfile.open | tarslip.py:46:9:46:12 | tarfile.entry |
24+
| tarslip.py:51:7:51:39 | tarfile.open | tarslip.py:52:1:52:3 | tarfile.open |
25+
| tarslip.py:51:7:51:39 | tarfile.open | tarslip.py:52:36:52:38 | tarfile.open |
26+
| tarslip.py:52:36:52:38 | tarfile.open | tarslip.py:45:17:45:23 | tarfile.open |
27+
parents
28+
| tarslip.py:45:17:45:23 | tarfile.open | tarslip.py:52:36:52:38 | tarfile.open |
29+
| tarslip.py:46:9:46:12 | tarfile.entry | tarslip.py:52:36:52:38 | tarfile.open |
30+
| tarslip.py:46:17:46:23 | tarfile.open | tarslip.py:52:36:52:38 | tarfile.open |
31+
| tarslip.py:47:20:47:23 | tarfile.entry | tarslip.py:52:36:52:38 | tarfile.open |
32+
#select
33+
| tarslip.py:13:1:13:3 | Taint sink | tarslip.py:12:7:12:39 | tarfile.open | tarslip.py:13:1:13:3 | tarfile.open | Extraction of tarfile from $@ | tarslip.py:12:7:12:39 | Taint source | a potentially untrusted source |
34+
| tarslip.py:18:17:18:21 | Taint sink | tarslip.py:16:7:16:39 | tarfile.open | tarslip.py:18:17:18:21 | tarfile.entry | Extraction of tarfile from $@ | tarslip.py:16:7:16:39 | Taint source | a potentially untrusted source |
35+
| tarslip.py:37:17:37:21 | Taint sink | tarslip.py:33:7:33:39 | tarfile.open | tarslip.py:37:17:37:21 | tarfile.entry | Extraction of tarfile from $@ | tarslip.py:33:7:33:39 | Taint source | a potentially untrusted source |
36+
| tarslip.py:41:24:41:26 | Taint sink | tarslip.py:40:7:40:39 | tarfile.open | tarslip.py:41:24:41:26 | tarfile.open | Extraction of tarfile from $@ | tarslip.py:40:7:40:39 | Taint source | a potentially untrusted source |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Security/CWE-022/TarSlip.ql

0 commit comments

Comments
 (0)