Skip to content

Commit 466c22f

Browse files
authored
Merge pull request #4435 from RasmusWL/python-port-code-injection
Python: port code injection query
2 parents 5f6f85c + 5db4f90 commit 466c22f

File tree

21 files changed

+319
-3
lines changed

21 files changed

+319
-3
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/**
2+
* @name Code injection
3+
* @description Interpreting unsanitized user input as code allows a malicious user to perform arbitrary
4+
* code execution.
5+
* @kind path-problem
6+
* @problem.severity error
7+
* @sub-severity high
8+
* @precision high
9+
* @id py/code-injection
10+
* @tags security
11+
* external/owasp/owasp-a1
12+
* external/cwe/cwe-094
13+
* external/cwe/cwe-095
14+
* external/cwe/cwe-116
15+
*/
16+
17+
import python
18+
import experimental.dataflow.DataFlow
19+
import experimental.dataflow.TaintTracking
20+
import experimental.semmle.python.Concepts
21+
import experimental.dataflow.RemoteFlowSources
22+
import DataFlow::PathGraph
23+
24+
class CodeInjectionConfiguration extends TaintTracking::Configuration {
25+
CodeInjectionConfiguration() { this = "CodeInjectionConfiguration" }
26+
27+
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
28+
29+
override predicate isSink(DataFlow::Node sink) { sink = any(CodeExecution e).getCode() }
30+
}
31+
32+
from CodeInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
33+
where config.hasFlowPath(source, sink)
34+
select sink.getNode(), source, sink, "$@ flows to here and is interpreted as code.",
35+
source.getNode(), "A user-provided value"

python/ql/src/experimental/semmle/python/Concepts.qll

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ private import experimental.dataflow.RemoteFlowSources
1717
* extend `SystemCommandExecution::Range` instead.
1818
*/
1919
class SystemCommandExecution extends DataFlow::Node {
20-
SystemCommandExecution::Range self;
20+
SystemCommandExecution::Range range;
2121

22-
SystemCommandExecution() { this = self }
22+
SystemCommandExecution() { this = range }
2323

2424
/** Gets the argument that specifies the command to be executed. */
25-
DataFlow::Node getCommand() { result = self.getCommand() }
25+
DataFlow::Node getCommand() { result = range.getCommand() }
2626
}
2727

2828
/** Provides a class for modeling new system-command execution APIs. */
@@ -40,6 +40,35 @@ module SystemCommandExecution {
4040
}
4141
}
4242

43+
/**
44+
* A data-flow node that dynamically executes Python code.
45+
*
46+
* Extend this class to refine existing API models. If you want to model new APIs,
47+
* extend `CodeExecution::Range` instead.
48+
*/
49+
class CodeExecution extends DataFlow::Node {
50+
CodeExecution::Range range;
51+
52+
CodeExecution() { this = range }
53+
54+
/** Gets the argument that specifies the code to be executed. */
55+
DataFlow::Node getCode() { result = range.getCode() }
56+
}
57+
58+
/** Provides a class for modeling new dynamic code execution APIs. */
59+
module CodeExecution {
60+
/**
61+
* A data-flow node that dynamically executes Python code.
62+
*
63+
* Extend this class to model new APIs. If you want to refine existing API models,
64+
* extend `CodeExecution` instead.
65+
*/
66+
abstract class Range extends DataFlow::Node {
67+
/** Gets the argument that specifies the code to be executed. */
68+
abstract DataFlow::Node getCode();
69+
}
70+
}
71+
4372
/** Provides classes for modeling HTTP-related APIs. */
4473
module HTTP {
4574
/** Provides classes for modeling HTTP servers. */

python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,4 +327,115 @@ private module Stdlib {
327327
)
328328
}
329329
}
330+
331+
// ---------------------------------------------------------------------------
332+
// builtins
333+
// ---------------------------------------------------------------------------
334+
/** Gets a reference to the `builtins` module (called `__builtin__` in Python 2). */
335+
private DataFlow::Node builtins(DataFlow::TypeTracker t) {
336+
t.start() and
337+
result = DataFlow::importNode(["builtins", "__builtin__"])
338+
or
339+
exists(DataFlow::TypeTracker t2 | result = builtins(t2).track(t2, t))
340+
}
341+
342+
/** Gets a reference to the `builtins` module. */
343+
DataFlow::Node builtins() { result = builtins(DataFlow::TypeTracker::end()) }
344+
345+
/**
346+
* Gets a reference to the attribute `attr_name` of the `builtins` module.
347+
* WARNING: Only holds for a few predefined attributes.
348+
*/
349+
private DataFlow::Node builtins_attr(DataFlow::TypeTracker t, string attr_name) {
350+
attr_name in ["exec", "eval", "compile"] and
351+
(
352+
t.start() and
353+
result = DataFlow::importNode(["builtins", "__builtin__"] + "." + attr_name)
354+
or
355+
t.startInAttr(attr_name) and
356+
result = DataFlow::importNode(["builtins", "__builtin__"])
357+
or
358+
// special handling of builtins, that are in scope without any imports
359+
// TODO: Take care of overrides, either `def eval: ...`, `eval = ...`, or `builtins.eval = ...`
360+
t.start() and
361+
exists(NameNode ref | result.asCfgNode() = ref |
362+
ref.isGlobal() and
363+
ref.getId() = attr_name and
364+
ref.isLoad()
365+
)
366+
)
367+
or
368+
// Due to bad performance when using normal setup with `builtins_attr(t2, attr_name).track(t2, t)`
369+
// we have inlined that code and forced a join
370+
exists(DataFlow::TypeTracker t2 |
371+
exists(DataFlow::StepSummary summary |
372+
builtins_attr_first_join(t2, attr_name, result, summary) and
373+
t = t2.append(summary)
374+
)
375+
)
376+
}
377+
378+
pragma[nomagic]
379+
private predicate builtins_attr_first_join(
380+
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
381+
) {
382+
DataFlow::StepSummary::step(builtins_attr(t2, attr_name), res, summary)
383+
}
384+
385+
/**
386+
* Gets a reference to the attribute `attr_name` of the `builtins` module.
387+
* WARNING: Only holds for a few predefined attributes.
388+
*/
389+
private DataFlow::Node builtins_attr(string attr_name) {
390+
result = builtins_attr(DataFlow::TypeTracker::end(), attr_name)
391+
}
392+
393+
/**
394+
* A call to the builtin `exec` function.
395+
* See https://docs.python.org/3/library/functions.html#exec
396+
*/
397+
private class BuiltinsExecCall extends CodeExecution::Range, DataFlow::CfgNode {
398+
override CallNode node;
399+
400+
BuiltinsExecCall() { node.getFunction() = builtins_attr("exec").asCfgNode() }
401+
402+
override DataFlow::Node getCode() { result.asCfgNode() = node.getArg(0) }
403+
}
404+
405+
/**
406+
* A call to the builtin `eval` function.
407+
* See https://docs.python.org/3/library/functions.html#eval
408+
*/
409+
private class BuiltinsEvalCall extends CodeExecution::Range, DataFlow::CfgNode {
410+
override CallNode node;
411+
412+
BuiltinsEvalCall() { node.getFunction() = builtins_attr("eval").asCfgNode() }
413+
414+
override DataFlow::Node getCode() { result.asCfgNode() = node.getArg(0) }
415+
}
416+
417+
/** An additional taint step for calls to the builtin function `compile` */
418+
private class BuiltinsCompileCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
419+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
420+
exists(CallNode call |
421+
nodeTo.asCfgNode() = call and
422+
call.getFunction() = builtins_attr("compile").asCfgNode() and
423+
nodeFrom.asCfgNode() in [call.getArg(0), call.getArgByName("source")]
424+
)
425+
}
426+
}
427+
}
428+
429+
/**
430+
* An exec statement (only Python 2).
431+
* Se ehttps://docs.python.org/2/reference/simple_stmts.html#the-exec-statement.
432+
*/
433+
private class ExecStatement extends CodeExecution::Range {
434+
ExecStatement() {
435+
// since there are no DataFlow::Nodes for a Statement, we can't do anything like
436+
// `this = any(Exec exec)`
437+
this.asExpr() = any(Exec exec).getBody()
438+
}
439+
440+
override DataFlow::Node getCode() { result = this }
330441
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# exec statement is Python 2 specific
2+
exec "print(42)" # $getCode="print(42)"

python/ql/test/experimental/library-tests/frameworks/stdlib-py2/ConceptsTest.expected

Whitespace-only changes.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
import python
2+
import experimental.meta.ConceptsTest
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
semmle-extractor-options: --max-import-depth=1 --lang=2
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
import builtins
2+
3+
# exec being part of builtins is Python 3 only
4+
builtins.exec("print(42)") # $getCode="print(42)"

python/ql/test/experimental/library-tests/frameworks/stdlib-py3/ConceptsTest.expected

Whitespace-only changes.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
import python
2+
import experimental.meta.ConceptsTest

0 commit comments

Comments
 (0)