Skip to content

Commit a6abee9

Browse files
authored
Merge pull request #4476 from yoff/python-port-sql-injection
Python: Port SqlInjection
2 parents ea4ea6b + 0604810 commit a6abee9

File tree

10 files changed

+470
-1
lines changed

10 files changed

+470
-1
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/**
2+
* @name SQL query built from user-controlled sources
3+
* @description Building a SQL query from user-controlled sources is vulnerable to insertion of
4+
* malicious SQL code by the user.
5+
* @kind path-problem
6+
* @problem.severity error
7+
* @precision high
8+
* @id py/sql-injection
9+
* @tags security
10+
* external/cwe/cwe-089
11+
* external/owasp/owasp-a1
12+
*/
13+
14+
import python
15+
import experimental.dataflow.DataFlow
16+
import experimental.dataflow.TaintTracking
17+
import experimental.semmle.python.Concepts
18+
import experimental.dataflow.RemoteFlowSources
19+
import DataFlow::PathGraph
20+
21+
class SQLInjectionConfiguration extends TaintTracking::Configuration {
22+
SQLInjectionConfiguration() { this = "SQLInjectionConfiguration" }
23+
24+
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
25+
26+
override predicate isSink(DataFlow::Node sink) { sink = any(SqlExecution e).getSql() }
27+
}
28+
29+
from SQLInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
30+
where config.hasFlowPath(source, sink)
31+
select sink.getNode(), source, sink, "This SQL query depends on $@.", source.getNode(),
32+
"a user-provided value"

python/ql/src/experimental/semmle/python/Concepts.qll

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,35 @@ module CodeExecution {
126126
}
127127
}
128128

129+
/**
130+
* A data-flow node that executes SQL statements.
131+
*
132+
* Extend this class to refine existing API models. If you want to model new APIs,
133+
* extend `SqlExecution::Range` instead.
134+
*/
135+
class SqlExecution extends DataFlow::Node {
136+
SqlExecution::Range range;
137+
138+
SqlExecution() { this = range }
139+
140+
/** Gets the argument that specifies the SQL statements to be executed. */
141+
DataFlow::Node getSql() { result = range.getSql() }
142+
}
143+
144+
/** Provides a class for modeling new SQL execution APIs. */
145+
module SqlExecution {
146+
/**
147+
* A data-flow node that executes SQL statements.
148+
*
149+
* Extend this class to model new APIs. If you want to refine existing API models,
150+
* extend `SqlExecution` instead.
151+
*/
152+
abstract class Range extends DataFlow::Node {
153+
/** Gets the argument that specifies the SQL statements to be executed. */
154+
abstract DataFlow::Node getSql();
155+
}
156+
}
157+
129158
/** Provides classes for modeling HTTP-related APIs. */
130159
module HTTP {
131160
/** Provides classes for modeling HTTP servers. */

python/ql/src/experimental/semmle/python/frameworks/Django.qll

Lines changed: 304 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ private module Django {
3434
* WARNING: Only holds for a few predefined attributes.
3535
*/
3636
private DataFlow::Node django_attr(DataFlow::TypeTracker t, string attr_name) {
37-
attr_name in ["urls", "http"] and
37+
attr_name in ["db", "urls", "http"] and
3838
(
3939
t.start() and
4040
result = DataFlow::importNode("django" + "." + attr_name)
@@ -70,6 +70,309 @@ private module Django {
7070

7171
/** Provides models for the `django` module. */
7272
module django {
73+
// -------------------------------------------------------------------------
74+
// django.db
75+
// -------------------------------------------------------------------------
76+
/** Gets a reference to the `django.db` module. */
77+
DataFlow::Node db() { result = django_attr("db") }
78+
79+
/** Provides models for the `django.db` module. */
80+
module db {
81+
/** Gets a reference to the `django.db.connection` object. */
82+
private DataFlow::Node connection(DataFlow::TypeTracker t) {
83+
t.start() and
84+
result = DataFlow::importNode("django.db.connection")
85+
or
86+
t.startInAttr("connection") and
87+
result = db()
88+
or
89+
exists(DataFlow::TypeTracker t2 | result = connection(t2).track(t2, t))
90+
}
91+
92+
/** Gets a reference to the `django.db.connection` object. */
93+
DataFlow::Node connection() { result = connection(DataFlow::TypeTracker::end()) }
94+
95+
/** Provides models for the `django.db.connection.cursor` method. */
96+
module cursor {
97+
/** Gets a reference to the `django.db.connection.cursor` metod. */
98+
private DataFlow::Node methodRef(DataFlow::TypeTracker t) {
99+
t.start() and
100+
result = DataFlow::importNode("django.db.connection.cursor")
101+
or
102+
t.startInAttr("cursor") and
103+
result = connection()
104+
or
105+
exists(DataFlow::TypeTracker t2 | result = methodRef(t2).track(t2, t))
106+
}
107+
108+
/** Gets a reference to the `django.db.connection.cursor` metod. */
109+
DataFlow::Node methodRef() { result = methodRef(DataFlow::TypeTracker::end()) }
110+
111+
/** Gets a reference to a result of calling `django.db.connection.cursor`. */
112+
private DataFlow::Node methodResult(DataFlow::TypeTracker t) {
113+
t.start() and
114+
result.asCfgNode().(CallNode).getFunction() = methodRef().asCfgNode()
115+
or
116+
exists(DataFlow::TypeTracker t2 | result = methodResult(t2).track(t2, t))
117+
}
118+
119+
/** Gets a reference to a result of calling `django.db.connection.cursor`. */
120+
DataFlow::Node methodResult() { result = methodResult(DataFlow::TypeTracker::end()) }
121+
}
122+
123+
/** Gets a reference to the `django.db.connection.cursor.execute` function. */
124+
private DataFlow::Node execute(DataFlow::TypeTracker t) {
125+
t.startInAttr("execute") and
126+
result = cursor::methodResult()
127+
or
128+
exists(DataFlow::TypeTracker t2 | result = execute(t2).track(t2, t))
129+
}
130+
131+
/** Gets a reference to the `django.db.connection.cursor.execute` function. */
132+
DataFlow::Node execute() { result = execute(DataFlow::TypeTracker::end()) }
133+
134+
// -------------------------------------------------------------------------
135+
// django.db.models
136+
// -------------------------------------------------------------------------
137+
// NOTE: The modelling of django models is currently fairly incomplete.
138+
// It does not fully take `Model`s, `Manager`s, `and QuerySet`s into account.
139+
// It simply identifies some common dangerous cases.
140+
/** Gets a reference to the `django.db.models` module. */
141+
private DataFlow::Node models(DataFlow::TypeTracker t) {
142+
t.start() and
143+
result = DataFlow::importNode("django.db.models")
144+
or
145+
t.startInAttr("models") and
146+
result = django()
147+
or
148+
exists(DataFlow::TypeTracker t2 | result = models(t2).track(t2, t))
149+
}
150+
151+
/** Gets a reference to the `django.db.models` module. */
152+
DataFlow::Node models() { result = models(DataFlow::TypeTracker::end()) }
153+
154+
/** Provides models for the `django.db.models` module. */
155+
module models {
156+
/** Provides models for the `django.db.models.Model` class. */
157+
module Model {
158+
/** Gets a reference to the `django.db.models.Model` class. */
159+
private DataFlow::Node classRef(DataFlow::TypeTracker t) {
160+
t.start() and
161+
result = DataFlow::importNode("django.db.models.Model")
162+
or
163+
t.startInAttr("Model") and
164+
result = models()
165+
or
166+
exists(DataFlow::TypeTracker t2 | result = classRef(t2).track(t2, t))
167+
}
168+
169+
/** Gets a reference to the `django.db.models.Model` class. */
170+
DataFlow::Node classRef() { result = classRef(DataFlow::TypeTracker::end()) }
171+
172+
/** Gets a definition of a subclass the `django.db.models.Model` class. */
173+
class SubclassDef extends ControlFlowNode {
174+
string name;
175+
176+
SubclassDef() {
177+
exists(ClassExpr ce |
178+
this.getNode() = ce and
179+
ce.getABase() = classRef().asExpr() and
180+
ce.getName() = name
181+
)
182+
}
183+
184+
string getName() { result = name }
185+
}
186+
187+
/**
188+
* A reference to a class that is a subclass of the `django.db.models.Model` class.
189+
* This is an approximation, since it simply matches identifiers.
190+
*/
191+
private DataFlow::Node subclassRef(DataFlow::TypeTracker t) {
192+
t.start() and
193+
result.asCfgNode().(NameNode).getId() = any(SubclassDef cd).getName()
194+
or
195+
exists(DataFlow::TypeTracker t2 | result = subclassRef(t2).track(t2, t))
196+
}
197+
198+
/**
199+
* A reference to a class that is a subclass of the `django.db.models.Model` class.
200+
* This is an approximation, since it simply matches identifiers.
201+
*/
202+
DataFlow::Node subclassRef() { result = subclassRef(DataFlow::TypeTracker::end()) }
203+
}
204+
205+
/** Gets a reference to the `objects` object of a django model. */
206+
private DataFlow::Node objects(DataFlow::TypeTracker t) {
207+
t.startInAttr("objects") and
208+
result = Model::subclassRef()
209+
or
210+
exists(DataFlow::TypeTracker t2 | result = objects(t2).track(t2, t))
211+
}
212+
213+
/** Gets a reference to the `objects` object of a model. */
214+
DataFlow::Node objects() { result = objects(DataFlow::TypeTracker::end()) }
215+
216+
/**
217+
* Gets a reference to the attribute `attr_name` of an `objects` object.
218+
* WARNING: Only holds for a few predefined attributes.
219+
*/
220+
private DataFlow::Node objects_attr(DataFlow::TypeTracker t, string attr_name) {
221+
attr_name in ["annotate", "extra", "raw"] and
222+
t.startInAttr(attr_name) and
223+
result = objects()
224+
or
225+
// Due to bad performance when using normal setup with `objects_attr(t2, attr_name).track(t2, t)`
226+
// we have inlined that code and forced a join
227+
exists(DataFlow::TypeTracker t2 |
228+
exists(DataFlow::StepSummary summary |
229+
objects_attr_first_join(t2, attr_name, result, summary) and
230+
t = t2.append(summary)
231+
)
232+
)
233+
}
234+
235+
pragma[nomagic]
236+
private predicate objects_attr_first_join(
237+
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res,
238+
DataFlow::StepSummary summary
239+
) {
240+
DataFlow::StepSummary::step(objects_attr(t2, attr_name), res, summary)
241+
}
242+
243+
/**
244+
* Gets a reference to the attribute `attr_name` of an `objects` object.
245+
* WARNING: Only holds for a few predefined attributes.
246+
*/
247+
DataFlow::Node objects_attr(string attr_name) {
248+
result = objects_attr(DataFlow::TypeTracker::end(), attr_name)
249+
}
250+
251+
/** Gets a reference to the `django.db.models.expressions` module. */
252+
private DataFlow::Node expressions(DataFlow::TypeTracker t) {
253+
t.start() and
254+
result = DataFlow::importNode("django.db.models.expressions")
255+
or
256+
t.startInAttr("expressions") and
257+
result = models()
258+
or
259+
exists(DataFlow::TypeTracker t2 | result = expressions(t2).track(t2, t))
260+
}
261+
262+
/** Gets a reference to the `django.db.models.expressions` module. */
263+
DataFlow::Node expressions() { result = expressions(DataFlow::TypeTracker::end()) }
264+
265+
/** Provides models for the `django.db.models.expressions` module. */
266+
module expressions {
267+
/** Provides models for the `django.db.models.expressions.RawSQL` class. */
268+
module RawSQL {
269+
/** Gets a reference to the `django.db.models.expressions.RawSQL` class. */
270+
private DataFlow::Node classRef(DataFlow::TypeTracker t) {
271+
t.start() and
272+
result = DataFlow::importNode("django.db.models.expressions.RawSQL")
273+
or
274+
t.start() and
275+
result = DataFlow::importNode("django.db.models.RawSQL") // Commonly used alias
276+
or
277+
t.startInAttr("RawSQL") and
278+
result = expressions()
279+
or
280+
exists(DataFlow::TypeTracker t2 | result = classRef(t2).track(t2, t))
281+
}
282+
283+
/**
284+
* Gets a reference to the `django.db.models.expressions.RawSQL` class.
285+
*/
286+
DataFlow::Node classRef() { result = classRef(DataFlow::TypeTracker::end()) }
287+
288+
/** Gets an instance of the `django.db.models.expressions.RawSQL` class. */
289+
private DataFlow::Node instance(DataFlow::TypeTracker t, ControlFlowNode sql) {
290+
t.start() and
291+
exists(CallNode c | result.asCfgNode() = c |
292+
c.getFunction() = classRef().asCfgNode() and
293+
c.getArg(0) = sql
294+
)
295+
or
296+
exists(DataFlow::TypeTracker t2 | result = instance(t2, sql).track(t2, t))
297+
}
298+
299+
/** Gets an instance of the `django.db.models.expressions.RawSQL` class. */
300+
DataFlow::Node instance(ControlFlowNode sql) {
301+
result = instance(DataFlow::TypeTracker::end(), sql)
302+
}
303+
}
304+
}
305+
}
306+
}
307+
308+
/**
309+
* A call to the `django.db.connection.cursor.execute` function.
310+
*
311+
* See
312+
* - https://docs.djangoproject.com/en/3.1/topics/db/sql/#executing-custom-sql-directly
313+
* - https://docs.djangoproject.com/en/3.1/topics/db/sql/#connections-and-cursors
314+
*/
315+
private class DbConnectionExecute extends SqlExecution::Range, DataFlow::CfgNode {
316+
override CallNode node;
317+
318+
DbConnectionExecute() { node.getFunction() = django::db::execute().asCfgNode() }
319+
320+
override DataFlow::Node getSql() {
321+
result.asCfgNode() in [node.getArg(0), node.getArgByName("sql")]
322+
}
323+
}
324+
325+
/**
326+
* A call to the `annotate` function on a model using a `RawSQL` argument.
327+
*
328+
* TODO: Consider reworking this to use taint tracking.
329+
*
330+
* See https://docs.djangoproject.com/en/3.1/ref/models/querysets/#annotate
331+
*/
332+
private class ObjectsAnnotate extends SqlExecution::Range, DataFlow::CfgNode {
333+
override CallNode node;
334+
ControlFlowNode sql;
335+
336+
ObjectsAnnotate() {
337+
node.getFunction() = django::db::models::objects_attr("annotate").asCfgNode() and
338+
django::db::models::expressions::RawSQL::instance(sql).asCfgNode() in [node.getArg(_),
339+
node.getArgByName(_)]
340+
}
341+
342+
override DataFlow::Node getSql() { result.asCfgNode() = sql }
343+
}
344+
345+
/**
346+
* A call to the `raw` function on a model.
347+
*
348+
* See
349+
* - https://docs.djangoproject.com/en/3.1/topics/db/sql/#django.db.models.Manager.raw
350+
* - https://docs.djangoproject.com/en/3.1/ref/models/querysets/#raw
351+
*/
352+
private class ObjectsRaw extends SqlExecution::Range, DataFlow::CfgNode {
353+
override CallNode node;
354+
355+
ObjectsRaw() { node.getFunction() = django::db::models::objects_attr("raw").asCfgNode() }
356+
357+
override DataFlow::Node getSql() { result.asCfgNode() = node.getArg(0) }
358+
}
359+
360+
/**
361+
* A call to the `extra` function on a model.
362+
*
363+
* See https://docs.djangoproject.com/en/3.1/ref/models/querysets/#extra
364+
*/
365+
private class ObjectsExtra extends SqlExecution::Range, DataFlow::CfgNode {
366+
override CallNode node;
367+
368+
ObjectsExtra() { node.getFunction() = django::db::models::objects_attr("extra").asCfgNode() }
369+
370+
override DataFlow::Node getSql() {
371+
result.asCfgNode() =
372+
[node.getArg([0, 1, 3, 4]), node.getArgByName(["select", "where", "tables", "order_by"])]
373+
}
374+
}
375+
73376
// -------------------------------------------------------------------------
74377
// django.urls
75378
// -------------------------------------------------------------------------

python/ql/test/experimental/library-tests/frameworks/django/ConceptsTest.expected

Whitespace-only changes.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
import python
2+
import experimental.meta.ConceptsTest

0 commit comments

Comments
 (0)