Skip to content

Commit c8b9314

Browse files
authored
Merge pull request #4424 from RasmusWL/python-model-python2-specific-command-execution
Python: model Python 2 specific command execution
2 parents 60ce02a + ce967e1 commit c8b9314

File tree

8 files changed

+288
-51
lines changed

8 files changed

+288
-51
lines changed

python/ql/src/experimental/Security-new-dataflow/CWE-078/CommandInjection.ql

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,16 @@ class CommandInjectionConfiguration extends TaintTracking::Configuration {
4646
// os.system(cmd)
4747
// ```
4848
//
49-
// Best solution I could come up with is to exclude all sinks inside the `os` and
50-
// `subprocess` modules. This does have a downside: If we have overlooked a function
51-
// in any of these, that internally runs a command, we no longer give an alert :|
49+
// Best solution I could come up with is to exclude all sinks inside the modules of
50+
// known sinks. This does have a downside: If we have overlooked a function in any
51+
// of these, that internally runs a command, we no longer give an alert :| -- and we
52+
// need to keep them updated (which is hard to remember)
5253
//
5354
// This does not only affect `os.popen`, but also the helper functions in
5455
// `subprocess`. See:
5556
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
5657
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
57-
not sink.getScope().getEnclosingModule().getName() in ["os", "subprocess"]
58+
not sink.getScope().getEnclosingModule().getName() in ["os", "subprocess", "platform", "popen2"]
5859
}
5960
}
6061

python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll

Lines changed: 185 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ private module Stdlib {
3232
* For example, using `attr_name = "system"` will get all uses of `os.system`.
3333
*/
3434
private DataFlow::Node os_attr(DataFlow::TypeTracker t, string attr_name) {
35-
attr_name in ["system", "popen",
35+
attr_name in ["system", "popen", "popen2", "popen3", "popen4",
3636
// exec
3737
"execl", "execle", "execlp", "execlpe", "execv", "execve", "execvp", "execvpe",
3838
// spawn
@@ -102,74 +102,84 @@ private module Stdlib {
102102
* A call to `os.system`.
103103
* See https://docs.python.org/3/library/os.html#os.system
104104
*/
105-
private class OsSystemCall extends SystemCommandExecution::Range {
106-
OsSystemCall() { this.asCfgNode().(CallNode).getFunction() = os_attr("system").asCfgNode() }
105+
private class OsSystemCall extends SystemCommandExecution::Range, DataFlow::CfgNode {
106+
override CallNode node;
107107

108-
override DataFlow::Node getCommand() {
109-
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
110-
}
108+
OsSystemCall() { node.getFunction() = os_attr("system").asCfgNode() }
109+
110+
override DataFlow::Node getCommand() { result.asCfgNode() = node.getArg(0) }
111111
}
112112

113113
/**
114-
* A call to `os.popen`
114+
* A call to any of the `os.popen*` functions
115115
* See https://docs.python.org/3/library/os.html#os.popen
116+
*
117+
* Note that in Python 2, there are also `popen2`, `popen3`, and `popen4` functions.
118+
* Although deprecated since version 2.6, they still work in 2.7.
119+
* See https://docs.python.org/2.7/library/os.html#os.popen2
116120
*/
117-
private class OsPopenCall extends SystemCommandExecution::Range {
118-
OsPopenCall() { this.asCfgNode().(CallNode).getFunction() = os_attr("popen").asCfgNode() }
121+
private class OsPopenCall extends SystemCommandExecution::Range, DataFlow::CfgNode {
122+
override CallNode node;
123+
string name;
124+
125+
OsPopenCall() {
126+
name in ["popen", "popen2", "popen3", "popen4"] and
127+
node.getFunction() = os_attr(name).asCfgNode()
128+
}
119129

120130
override DataFlow::Node getCommand() {
121-
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
131+
result.asCfgNode() = node.getArg(0)
132+
or
133+
not name = "popen" and
134+
result.asCfgNode() = node.getArgByName("cmd")
122135
}
123136
}
124137

125138
/**
126139
* A call to any of the `os.exec*` functions
127140
* See https://docs.python.org/3.8/library/os.html#os.execl
128141
*/
129-
private class OsExecCall extends SystemCommandExecution::Range {
142+
private class OsExecCall extends SystemCommandExecution::Range, DataFlow::CfgNode {
143+
override CallNode node;
144+
130145
OsExecCall() {
131146
exists(string name |
132147
name in ["execl", "execle", "execlp", "execlpe", "execv", "execve", "execvp", "execvpe"] and
133-
this.asCfgNode().(CallNode).getFunction() = os_attr(name).asCfgNode()
148+
node.getFunction() = os_attr(name).asCfgNode()
134149
)
135150
}
136151

137-
override DataFlow::Node getCommand() {
138-
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
139-
}
152+
override DataFlow::Node getCommand() { result.asCfgNode() = node.getArg(0) }
140153
}
141154

142155
/**
143156
* A call to any of the `os.spawn*` functions
144157
* See https://docs.python.org/3.8/library/os.html#os.spawnl
145158
*/
146-
private class OsSpawnCall extends SystemCommandExecution::Range {
159+
private class OsSpawnCall extends SystemCommandExecution::Range, DataFlow::CfgNode {
160+
override CallNode node;
161+
147162
OsSpawnCall() {
148163
exists(string name |
149164
name in ["spawnl", "spawnle", "spawnlp", "spawnlpe", "spawnv", "spawnve", "spawnvp",
150165
"spawnvpe"] and
151-
this.asCfgNode().(CallNode).getFunction() = os_attr(name).asCfgNode()
166+
node.getFunction() = os_attr(name).asCfgNode()
152167
)
153168
}
154169

155-
override DataFlow::Node getCommand() {
156-
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(1)
157-
}
170+
override DataFlow::Node getCommand() { result.asCfgNode() = node.getArg(1) }
158171
}
159172

160173
/**
161174
* A call to any of the `os.posix_spawn*` functions
162175
* See https://docs.python.org/3.8/library/os.html#os.posix_spawn
163176
*/
164-
private class OsPosixSpawnCall extends SystemCommandExecution::Range {
165-
OsPosixSpawnCall() {
166-
this.asCfgNode().(CallNode).getFunction() =
167-
os_attr(["posix_spawn", "posix_spawnp"]).asCfgNode()
168-
}
177+
private class OsPosixSpawnCall extends SystemCommandExecution::Range, DataFlow::CfgNode {
178+
override CallNode node;
169179

170-
override DataFlow::Node getCommand() {
171-
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
172-
}
180+
OsPosixSpawnCall() { node.getFunction() = os_attr(["posix_spawn", "posix_spawnp"]).asCfgNode() }
181+
182+
override DataFlow::Node getCommand() { result.asCfgNode() = node.getArg(0) }
173183
}
174184

175185
/** An additional taint step for calls to `os.path.join` */
@@ -245,29 +255,22 @@ private module Stdlib {
245255
* A call to `subprocess.Popen` or helper functions (call, check_call, check_output, run)
246256
* See https://docs.python.org/3.8/library/subprocess.html#subprocess.Popen
247257
*/
248-
private class SubprocessPopenCall extends SystemCommandExecution::Range {
249-
CallNode call;
258+
private class SubprocessPopenCall extends SystemCommandExecution::Range, DataFlow::CfgNode {
259+
override CallNode node;
250260

251261
SubprocessPopenCall() {
252-
call = this.asCfgNode() and
253262
exists(string name |
254263
name in ["Popen", "call", "check_call", "check_output", "run"] and
255-
call.getFunction() = subprocess_attr(name).asCfgNode()
264+
node.getFunction() = subprocess_attr(name).asCfgNode()
256265
)
257266
}
258267

259268
/** Gets the ControlFlowNode for the `args` argument, if any. */
260-
private ControlFlowNode get_args_arg() {
261-
result = call.getArg(0)
262-
or
263-
result = call.getArgByName("args")
264-
}
269+
private ControlFlowNode get_args_arg() { result in [node.getArg(0), node.getArgByName("args")] }
265270

266271
/** Gets the ControlFlowNode for the `shell` argument, if any. */
267272
private ControlFlowNode get_shell_arg() {
268-
result = call.getArg(8)
269-
or
270-
result = call.getArgByName("shell")
273+
result in [node.getArg(8), node.getArgByName("shell")]
271274
}
272275

273276
private boolean get_shell_arg_value() {
@@ -289,9 +292,7 @@ private module Stdlib {
289292

290293
/** Gets the ControlFlowNode for the `executable` argument, if any. */
291294
private ControlFlowNode get_executable_arg() {
292-
result = call.getArg(2)
293-
or
294-
result = call.getArgByName("executable")
295+
result in [node.getArg(2), node.getArgByName("executable")]
295296
}
296297

297298
override DataFlow::Node getCommand() {
@@ -328,6 +329,147 @@ private module Stdlib {
328329
}
329330
}
330331

332+
// ---------------------------------------------------------------------------
333+
// popen2
334+
// ---------------------------------------------------------------------------
335+
/** Gets a reference to the `popen2` module (only available in Python 2). */
336+
private DataFlow::Node popen2(DataFlow::TypeTracker t) {
337+
t.start() and
338+
result = DataFlow::importNode("popen2")
339+
or
340+
exists(DataFlow::TypeTracker t2 | result = popen2(t2).track(t2, t))
341+
}
342+
343+
/** Gets a reference to the `popen2` module (only available in Python 2). */
344+
DataFlow::Node popen2() { result = popen2(DataFlow::TypeTracker::end()) }
345+
346+
/**
347+
* Gets a reference to the attribute `attr_name` of the `popen2` module.
348+
* WARNING: Only holds for a few predefined attributes.
349+
*/
350+
private DataFlow::Node popen2_attr(DataFlow::TypeTracker t, string attr_name) {
351+
attr_name in ["popen2", "popen3", "popen4",
352+
// classes
353+
"Popen3", "Popen4"] and
354+
(
355+
t.start() and
356+
result = DataFlow::importNode("popen2." + attr_name)
357+
or
358+
t.startInAttr(attr_name) and
359+
result = DataFlow::importNode("popen2")
360+
)
361+
or
362+
// Due to bad performance when using normal setup with `popen2_attr(t2, attr_name).track(t2, t)`
363+
// we have inlined that code and forced a join
364+
exists(DataFlow::TypeTracker t2 |
365+
exists(DataFlow::StepSummary summary |
366+
popen2_attr_first_join(t2, attr_name, result, summary) and
367+
t = t2.append(summary)
368+
)
369+
)
370+
}
371+
372+
pragma[nomagic]
373+
private predicate popen2_attr_first_join(
374+
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
375+
) {
376+
DataFlow::StepSummary::step(popen2_attr(t2, attr_name), res, summary)
377+
}
378+
379+
/**
380+
* Gets a reference to the attribute `attr_name` of the `popen2` module.
381+
* WARNING: Only holds for a few predefined attributes.
382+
*/
383+
private DataFlow::Node popen2_attr(string attr_name) {
384+
result = popen2_attr(DataFlow::TypeTracker::end(), attr_name)
385+
}
386+
387+
/**
388+
* A call to any of the `popen.popen*` functions, or instantiation of a `popen.Popen*` class.
389+
* See https://docs.python.org/2.7/library/popen2.html
390+
*/
391+
private class Popen2PopenCall extends SystemCommandExecution::Range, DataFlow::CfgNode {
392+
override CallNode node;
393+
394+
Popen2PopenCall() {
395+
exists(string name |
396+
name in ["popen2", "popen3", "popen4", "Popen3", "Popen4"] and
397+
node.getFunction() = popen2_attr(name).asCfgNode()
398+
)
399+
}
400+
401+
override DataFlow::Node getCommand() {
402+
result.asCfgNode() in [node.getArg(0), node.getArgByName("cmd")]
403+
}
404+
}
405+
406+
// ---------------------------------------------------------------------------
407+
// platform
408+
// ---------------------------------------------------------------------------
409+
/** Gets a reference to the `platform` module. */
410+
private DataFlow::Node platform(DataFlow::TypeTracker t) {
411+
t.start() and
412+
result = DataFlow::importNode("platform")
413+
or
414+
exists(DataFlow::TypeTracker t2 | result = platform(t2).track(t2, t))
415+
}
416+
417+
/** Gets a reference to the `platform` module. */
418+
DataFlow::Node platform() { result = platform(DataFlow::TypeTracker::end()) }
419+
420+
/**
421+
* Gets a reference to the attribute `attr_name` of the `platform` module.
422+
* WARNING: Only holds for a few predefined attributes.
423+
*/
424+
private DataFlow::Node platform_attr(DataFlow::TypeTracker t, string attr_name) {
425+
attr_name in ["popen"] and
426+
(
427+
t.start() and
428+
result = DataFlow::importNode("platform." + attr_name)
429+
or
430+
t.startInAttr(attr_name) and
431+
result = DataFlow::importNode("platform")
432+
)
433+
or
434+
// Due to bad performance when using normal setup with `platform_attr(t2, attr_name).track(t2, t)`
435+
// we have inlined that code and forced a join
436+
exists(DataFlow::TypeTracker t2 |
437+
exists(DataFlow::StepSummary summary |
438+
platform_attr_first_join(t2, attr_name, result, summary) and
439+
t = t2.append(summary)
440+
)
441+
)
442+
}
443+
444+
pragma[nomagic]
445+
private predicate platform_attr_first_join(
446+
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
447+
) {
448+
DataFlow::StepSummary::step(platform_attr(t2, attr_name), res, summary)
449+
}
450+
451+
/**
452+
* Gets a reference to the attribute `attr_name` of the `platform` module.
453+
* WARNING: Only holds for a few predefined attributes.
454+
*/
455+
private DataFlow::Node platform_attr(string attr_name) {
456+
result = platform_attr(DataFlow::TypeTracker::end(), attr_name)
457+
}
458+
459+
/**
460+
* A call to the `platform.popen` function.
461+
* See https://docs.python.org/2.7/library/platform.html#platform.popen
462+
*/
463+
private class PlatformPopenCall extends SystemCommandExecution::Range, DataFlow::CfgNode {
464+
override CallNode node;
465+
466+
PlatformPopenCall() { node.getFunction() = platform_attr("popen").asCfgNode() }
467+
468+
override DataFlow::Node getCommand() {
469+
result.asCfgNode() in [node.getArg(0), node.getArgByName("cmd")]
470+
}
471+
}
472+
331473
// ---------------------------------------------------------------------------
332474
// builtins
333475
// ---------------------------------------------------------------------------
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
########################################
2+
import os
3+
4+
os.popen2("cmd1; cmd2") # $getCommand="cmd1; cmd2"
5+
os.popen3("cmd1; cmd2") # $getCommand="cmd1; cmd2"
6+
os.popen4("cmd1; cmd2") # $getCommand="cmd1; cmd2"
7+
8+
9+
os.popen2(cmd="cmd1; cmd2") # $getCommand="cmd1; cmd2"
10+
os.popen3(cmd="cmd1; cmd2") # $getCommand="cmd1; cmd2"
11+
os.popen4(cmd="cmd1; cmd2") # $getCommand="cmd1; cmd2"
12+
13+
# os.popen does not support keyword arguments, so this is a TypeError
14+
os.popen(cmd="cmd1; cmd2")
15+
16+
########################################
17+
import platform
18+
19+
platform.popen("cmd1; cmd2") # $getCommand="cmd1; cmd2"
20+
platform.popen(cmd="cmd1; cmd2") # $getCommand="cmd1; cmd2"
21+
22+
########################################
23+
# popen2 was deprecated in Python 2.6, but still available in Python 2.7
24+
import popen2
25+
26+
popen2.popen2("cmd1; cmd2") # $getCommand="cmd1; cmd2"
27+
popen2.popen3("cmd1; cmd2") # $getCommand="cmd1; cmd2"
28+
popen2.popen4("cmd1; cmd2") # $getCommand="cmd1; cmd2"
29+
popen2.Popen3("cmd1; cmd2") # $getCommand="cmd1; cmd2"
30+
popen2.Popen4("cmd1; cmd2") # $getCommand="cmd1; cmd2"
31+
32+
popen2.popen2(cmd="cmd1; cmd2") # $getCommand="cmd1; cmd2"
33+
popen2.popen3(cmd="cmd1; cmd2") # $getCommand="cmd1; cmd2"
34+
popen2.popen4(cmd="cmd1; cmd2") # $getCommand="cmd1; cmd2"
35+
popen2.Popen3(cmd="cmd1; cmd2") # $getCommand="cmd1; cmd2"
36+
popen2.Popen4(cmd="cmd1; cmd2") # $getCommand="cmd1; cmd2"

0 commit comments

Comments
 (0)