Skip to content

Commit ba1ca70

Browse files
committed
Python: Add source modeling of stdlib HTTPRequestHandlers
1 parent 3486372 commit ba1ca70

File tree

2 files changed

+344
-11
lines changed

2 files changed

+344
-11
lines changed

python/ql/src/semmle/python/frameworks/Stdlib.qll

Lines changed: 333 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,6 +1231,339 @@ private module Stdlib {
12311231
}
12321232
}
12331233
}
1234+
1235+
// ---------------------------------------------------------------------------
1236+
// BaseHTTPServer (Python 2 only)
1237+
// ---------------------------------------------------------------------------
1238+
/** Gets a reference to the `BaseHTTPServer` module. */
1239+
private DataFlow::Node baseHTTPServer(DataFlow::TypeTracker t) {
1240+
t.start() and
1241+
result = DataFlow::importNode("BaseHTTPServer")
1242+
or
1243+
exists(DataFlow::TypeTracker t2 | result = baseHTTPServer(t2).track(t2, t))
1244+
}
1245+
1246+
/** Gets a reference to the `BaseHTTPServer` module. */
1247+
DataFlow::Node baseHTTPServer() { result = baseHTTPServer(DataFlow::TypeTracker::end()) }
1248+
1249+
/** Provides models for the `BaseHTTPServer` module. */
1250+
module BaseHTTPServer {
1251+
/**
1252+
* Provides models for the `BaseHTTPServer.BaseHTTPRequestHandler` class (Python 2 only).
1253+
*/
1254+
module BaseHTTPRequestHandler {
1255+
/** Gets a reference to the `BaseHTTPServer.BaseHTTPRequestHandler` class. */
1256+
private DataFlow::Node classRef(DataFlow::TypeTracker t) {
1257+
t.start() and
1258+
result = DataFlow::importNode("BaseHTTPServer" + "." + "BaseHTTPRequestHandler")
1259+
or
1260+
t.startInAttr("BaseHTTPRequestHandler") and
1261+
result = baseHTTPServer()
1262+
or
1263+
exists(DataFlow::TypeTracker t2 | result = classRef(t2).track(t2, t))
1264+
}
1265+
1266+
/** Gets a reference to the `BaseHTTPServer.BaseHTTPRequestHandler` class. */
1267+
DataFlow::Node classRef() { result = classRef(DataFlow::TypeTracker::end()) }
1268+
}
1269+
}
1270+
1271+
// ---------------------------------------------------------------------------
1272+
// SimpleHTTPServer (Python 2 only)
1273+
// ---------------------------------------------------------------------------
1274+
/** Gets a reference to the `SimpleHTTPServer` module. */
1275+
private DataFlow::Node simpleHTTPServer(DataFlow::TypeTracker t) {
1276+
t.start() and
1277+
result = DataFlow::importNode("SimpleHTTPServer")
1278+
or
1279+
exists(DataFlow::TypeTracker t2 | result = simpleHTTPServer(t2).track(t2, t))
1280+
}
1281+
1282+
/** Gets a reference to the `SimpleHTTPServer` module. */
1283+
DataFlow::Node simpleHTTPServer() { result = simpleHTTPServer(DataFlow::TypeTracker::end()) }
1284+
1285+
/** Provides models for the `SimpleHTTPServer` module. */
1286+
module SimpleHTTPServer {
1287+
/**
1288+
* Provides models for the `SimpleHTTPServer.SimpleHTTPRequestHandler` class (Python 2 only).
1289+
*/
1290+
module SimpleHTTPRequestHandler {
1291+
/** Gets a reference to the `SimpleHTTPServer.SimpleHTTPRequestHandler` class. */
1292+
private DataFlow::Node classRef(DataFlow::TypeTracker t) {
1293+
t.start() and
1294+
result = DataFlow::importNode("SimpleHTTPServer" + "." + "SimpleHTTPRequestHandler")
1295+
or
1296+
t.startInAttr("SimpleHTTPRequestHandler") and
1297+
result = simpleHTTPServer()
1298+
or
1299+
exists(DataFlow::TypeTracker t2 | result = classRef(t2).track(t2, t))
1300+
}
1301+
1302+
/** Gets a reference to the `SimpleHTTPServer.SimpleHTTPRequestHandler` class. */
1303+
DataFlow::Node classRef() { result = classRef(DataFlow::TypeTracker::end()) }
1304+
}
1305+
}
1306+
1307+
// ---------------------------------------------------------------------------
1308+
// CGIHTTPServer (Python 2 only)
1309+
// ---------------------------------------------------------------------------
1310+
/** Gets a reference to the `CGIHTTPServer` module. */
1311+
private DataFlow::Node cgiHTTPServer(DataFlow::TypeTracker t) {
1312+
t.start() and
1313+
result = DataFlow::importNode("CGIHTTPServer")
1314+
or
1315+
exists(DataFlow::TypeTracker t2 | result = cgiHTTPServer(t2).track(t2, t))
1316+
}
1317+
1318+
/** Gets a reference to the `CGIHTTPServer` module. */
1319+
DataFlow::Node cgiHTTPServer() { result = cgiHTTPServer(DataFlow::TypeTracker::end()) }
1320+
1321+
/** Provides models for the `CGIHTTPServer` module. */
1322+
module CGIHTTPServer {
1323+
/**
1324+
* Provides models for the `CGIHTTPServer.CGIHTTPRequestHandler` class (Python 2 only).
1325+
*/
1326+
module CGIHTTPRequestHandler {
1327+
/** Gets a reference to the `CGIHTTPServer.CGIHTTPRequestHandler` class. */
1328+
private DataFlow::Node classRef(DataFlow::TypeTracker t) {
1329+
t.start() and
1330+
result = DataFlow::importNode("CGIHTTPServer" + "." + "CGIHTTPRequestHandler")
1331+
or
1332+
t.startInAttr("CGIHTTPRequestHandler") and
1333+
result = cgiHTTPServer()
1334+
or
1335+
exists(DataFlow::TypeTracker t2 | result = classRef(t2).track(t2, t))
1336+
}
1337+
1338+
/** Gets a reference to the `CGIHTTPServer.CGIHTTPRequestHandler` class. */
1339+
DataFlow::Node classRef() { result = classRef(DataFlow::TypeTracker::end()) }
1340+
}
1341+
}
1342+
1343+
// ---------------------------------------------------------------------------
1344+
// http (Python 3 only)
1345+
// ---------------------------------------------------------------------------
1346+
/** Gets a reference to the `http` module. */
1347+
private DataFlow::Node http(DataFlow::TypeTracker t) {
1348+
t.start() and
1349+
result = DataFlow::importNode("http")
1350+
or
1351+
exists(DataFlow::TypeTracker t2 | result = http(t2).track(t2, t))
1352+
}
1353+
1354+
/** Gets a reference to the `http` module. */
1355+
DataFlow::Node http() { result = http(DataFlow::TypeTracker::end()) }
1356+
1357+
/**
1358+
* Gets a reference to the attribute `attr_name` of the `http` module.
1359+
* WARNING: Only holds for a few predefined attributes.
1360+
*/
1361+
private DataFlow::Node http_attr(DataFlow::TypeTracker t, string attr_name) {
1362+
attr_name in ["server"] and
1363+
(
1364+
t.start() and
1365+
result = DataFlow::importNode("http" + "." + attr_name)
1366+
or
1367+
t.startInAttr(attr_name) and
1368+
result = http()
1369+
)
1370+
or
1371+
// Due to bad performance when using normal setup with `http_attr(t2, attr_name).track(t2, t)`
1372+
// we have inlined that code and forced a join
1373+
exists(DataFlow::TypeTracker t2 |
1374+
exists(DataFlow::StepSummary summary |
1375+
http_attr_first_join(t2, attr_name, result, summary) and
1376+
t = t2.append(summary)
1377+
)
1378+
)
1379+
}
1380+
1381+
pragma[nomagic]
1382+
private predicate http_attr_first_join(
1383+
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
1384+
) {
1385+
DataFlow::StepSummary::step(http_attr(t2, attr_name), res, summary)
1386+
}
1387+
1388+
/**
1389+
* Gets a reference to the attribute `attr_name` of the `http` module.
1390+
* WARNING: Only holds for a few predefined attributes.
1391+
*/
1392+
private DataFlow::Node http_attr(string attr_name) {
1393+
result = http_attr(DataFlow::TypeTracker::end(), attr_name)
1394+
}
1395+
1396+
/** Provides models for the `http` module. */
1397+
module http {
1398+
// -------------------------------------------------------------------------
1399+
// http.server
1400+
// -------------------------------------------------------------------------
1401+
/** Gets a reference to the `http.server` module. */
1402+
DataFlow::Node server() { result = http_attr("server") }
1403+
1404+
/** Provides models for the `http.server` module */
1405+
module server {
1406+
/**
1407+
* Gets a reference to the attribute `attr_name` of the `http.server` module.
1408+
* WARNING: Only holds for a few predefined attributes.
1409+
*/
1410+
private DataFlow::Node server_attr(DataFlow::TypeTracker t, string attr_name) {
1411+
attr_name in ["BaseHTTPRequestHandler", "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler"] and
1412+
(
1413+
t.start() and
1414+
result = DataFlow::importNode("http.server" + "." + attr_name)
1415+
or
1416+
t.startInAttr(attr_name) and
1417+
result = server()
1418+
)
1419+
or
1420+
// Due to bad performance when using normal setup with `server_attr(t2, attr_name).track(t2, t)`
1421+
// we have inlined that code and forced a join
1422+
exists(DataFlow::TypeTracker t2 |
1423+
exists(DataFlow::StepSummary summary |
1424+
server_attr_first_join(t2, attr_name, result, summary) and
1425+
t = t2.append(summary)
1426+
)
1427+
)
1428+
}
1429+
1430+
pragma[nomagic]
1431+
private predicate server_attr_first_join(
1432+
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res,
1433+
DataFlow::StepSummary summary
1434+
) {
1435+
DataFlow::StepSummary::step(server_attr(t2, attr_name), res, summary)
1436+
}
1437+
1438+
/**
1439+
* Gets a reference to the attribute `attr_name` of the `http.server` module.
1440+
* WARNING: Only holds for a few predefined attributes.
1441+
*/
1442+
private DataFlow::Node server_attr(string attr_name) {
1443+
result = server_attr(DataFlow::TypeTracker::end(), attr_name)
1444+
}
1445+
1446+
/**
1447+
* Provides models for the `http.server.BaseHTTPRequestHandler` class (Python 3 only).
1448+
*
1449+
* See https://docs.python.org/3.9/library/http.server.html#http.server.BaseHTTPRequestHandler.
1450+
*/
1451+
module BaseHTTPRequestHandler {
1452+
/** Gets a reference to the `http.server.BaseHTTPRequestHandler` class. */
1453+
DataFlow::Node classRef() { result = server_attr("BaseHTTPRequestHandler") }
1454+
}
1455+
1456+
/**
1457+
* Provides models for the `http.server.SimpleHTTPRequestHandler` class (Python 3 only).
1458+
*
1459+
* See https://docs.python.org/3.9/library/http.server.html#http.server.SimpleHTTPRequestHandler.
1460+
*/
1461+
module SimpleHTTPRequestHandler {
1462+
/** Gets a reference to the `http.server.SimpleHTTPRequestHandler` class. */
1463+
DataFlow::Node classRef() { result = server_attr("SimpleHTTPRequestHandler") }
1464+
}
1465+
1466+
/**
1467+
* Provides models for the `http.server.CGIHTTPRequestHandler` class (Python 3 only).
1468+
*
1469+
* See https://docs.python.org/3.9/library/http.server.html#http.server.CGIHTTPRequestHandler.
1470+
*/
1471+
module CGIHTTPRequestHandler {
1472+
/** Gets a reference to the `http.server.CGIHTTPRequestHandler` class. */
1473+
DataFlow::Node classRef() { result = server_attr("CGIHTTPRequestHandler") }
1474+
}
1475+
}
1476+
}
1477+
1478+
/**
1479+
* Provides models for the `BaseHTTPRequestHandler` class and subclasses.
1480+
*
1481+
* See
1482+
* - https://docs.python.org/3.9/library/http.server.html#http.server.BaseHTTPRequestHandler
1483+
* - https://docs.python.org/2.7/library/basehttpserver.html#BaseHTTPServer.BaseHTTPRequestHandler
1484+
*/
1485+
private module HTTPRequestHandler {
1486+
/** Gets a reference to the `BaseHTTPRequestHandler` class or any subclass. */
1487+
private DataFlow::Node subclassRef(DataFlow::TypeTracker t) {
1488+
// Python 2
1489+
t.start() and
1490+
result in [
1491+
BaseHTTPServer::BaseHTTPRequestHandler::classRef(),
1492+
SimpleHTTPServer::SimpleHTTPRequestHandler::classRef(),
1493+
CGIHTTPServer::CGIHTTPRequestHandler::classRef()
1494+
]
1495+
or
1496+
// Python 3
1497+
t.start() and
1498+
result in [
1499+
http::server::BaseHTTPRequestHandler::classRef(),
1500+
http::server::SimpleHTTPRequestHandler::classRef(),
1501+
http::server::CGIHTTPRequestHandler::classRef()
1502+
]
1503+
or
1504+
// subclasses in project code
1505+
result.asExpr().(ClassExpr).getABase() = subclassRef(t.continue()).asExpr()
1506+
or
1507+
exists(DataFlow::TypeTracker t2 | result = subclassRef(t2).track(t2, t))
1508+
}
1509+
1510+
/** Gets a reference to the `BaseHTTPRequestHandler` class or any subclass. */
1511+
DataFlow::Node subclassRef() { result = subclassRef(DataFlow::TypeTracker::end()) }
1512+
1513+
/** A HTTPRequestHandler class definition (most likely in project code). */
1514+
class HTTPRequestHandlerClassDef extends Class {
1515+
HTTPRequestHandlerClassDef() { this.getParent() = subclassRef().asExpr() }
1516+
}
1517+
1518+
/**
1519+
* A source of an instance of the `BaseHTTPRequestHandler` class or any subclass.
1520+
*
1521+
* This can include instantiation of the class, return value from function
1522+
* calls, or a special parameter that will be set when functions are call by external
1523+
* library.
1524+
*
1525+
* Use `classname::instance()` predicate to get references to instances of the `BaseHTTPRequestHandler` class or any subclass.
1526+
*/
1527+
abstract class InstanceSource extends DataFlow::Node { }
1528+
1529+
/** The `self` parameter in a method on the `BaseHTTPRequestHandler` class or any subclass. */
1530+
private class SelfParam extends InstanceSource, RemoteFlowSource::Range, DataFlow::ParameterNode {
1531+
SelfParam() {
1532+
exists(HTTPRequestHandlerClassDef cls | cls.getAMethod().getArg(0) = this.getParameter())
1533+
}
1534+
1535+
override string getSourceType() { result = "stdlib HTTPRequestHandler" }
1536+
}
1537+
1538+
/** Gets a reference to an instance of the `BaseHTTPRequestHandler` class or any subclass. */
1539+
private DataFlow::Node instance(DataFlow::TypeTracker t) {
1540+
t.start() and
1541+
result instanceof InstanceSource
1542+
or
1543+
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
1544+
}
1545+
1546+
/** Gets a reference to an instance of the `BaseHTTPRequestHandler` class or any subclass. */
1547+
DataFlow::Node instance() { result = instance(DataFlow::TypeTracker::end()) }
1548+
1549+
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
1550+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
1551+
nodeFrom = instance() and
1552+
exists(DataFlow::AttrRead read | nodeTo = read and read.getObject() = nodeFrom |
1553+
read.getAttributeName() in [
1554+
// str
1555+
"requestline", "path",
1556+
// by default dict-like http.client.HTTPMessage, which is a subclass of email.message.Message
1557+
// see https://docs.python.org/3.9/library/email.compat32-message.html#email.message.Message
1558+
// TODO: Implement custom methods (at least `get_all`, `as_bytes`, `as_string`)
1559+
"headers",
1560+
// file-like
1561+
"rfile"
1562+
]
1563+
)
1564+
}
1565+
}
1566+
}
12341567
}
12351568

12361569
// ---------------------------------------------------------------------------

python/ql/test/experimental/library-tests/frameworks/stdlib/TestTaint.expected

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,20 @@
1818
| http_server.py:39 | ok | test_cgi_FieldStorage_taint | form.getlist(..) |
1919
| http_server.py:40 | ok | test_cgi_FieldStorage_taint | form.getlist(..)[0] |
2020
| http_server.py:41 | fail | test_cgi_FieldStorage_taint | ListComp |
21-
| http_server.py:50 | fail | taint_sources | self |
22-
| http_server.py:52 | fail | taint_sources | self.requestline |
23-
| http_server.py:54 | fail | taint_sources | self.path |
24-
| http_server.py:56 | fail | taint_sources | self.headers |
25-
| http_server.py:57 | fail | taint_sources | self.headers['Foo'] |
26-
| http_server.py:58 | fail | taint_sources | self.headers.get(..) |
21+
| http_server.py:50 | ok | taint_sources | self |
22+
| http_server.py:52 | ok | taint_sources | self.requestline |
23+
| http_server.py:54 | ok | taint_sources | self.path |
24+
| http_server.py:56 | ok | taint_sources | self.headers |
25+
| http_server.py:57 | ok | taint_sources | self.headers['Foo'] |
26+
| http_server.py:58 | ok | taint_sources | self.headers.get(..) |
2727
| http_server.py:59 | fail | taint_sources | self.headers.get_all(..) |
2828
| http_server.py:60 | fail | taint_sources | self.headers.keys() |
29-
| http_server.py:61 | fail | taint_sources | self.headers.values() |
30-
| http_server.py:62 | fail | taint_sources | self.headers.items() |
29+
| http_server.py:61 | ok | taint_sources | self.headers.values() |
30+
| http_server.py:62 | ok | taint_sources | self.headers.items() |
3131
| http_server.py:63 | fail | taint_sources | self.headers.as_bytes() |
3232
| http_server.py:64 | fail | taint_sources | self.headers.as_string() |
33-
| http_server.py:65 | fail | taint_sources | str(..) |
34-
| http_server.py:66 | fail | taint_sources | bytes(..) |
35-
| http_server.py:68 | fail | taint_sources | self.rfile |
33+
| http_server.py:65 | ok | taint_sources | str(..) |
34+
| http_server.py:66 | ok | taint_sources | bytes(..) |
35+
| http_server.py:68 | ok | taint_sources | self.rfile |
3636
| http_server.py:69 | fail | taint_sources | self.rfile.read() |
3737
| http_server.py:78 | ok | taint_sources | form |

0 commit comments

Comments
 (0)