Skip to content

Commit a0c7365

Browse files
committed
Python: Proper models of json.loads and json.dumps
1 parent 346a007 commit a0c7365

File tree

3 files changed

+90
-15
lines changed

3 files changed

+90
-15
lines changed

python/ql/src/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,6 @@ predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeT
3131
or
3232
stringManipulation(nodeFrom, nodeTo)
3333
or
34-
jsonStep(nodeFrom, nodeTo)
35-
or
3634
containerStep(nodeFrom, nodeTo)
3735
or
3836
copyStep(nodeFrom, nodeTo)
@@ -135,16 +133,6 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
135133
// TODO: Handle functions in https://docs.python.org/3/library/binascii.html
136134
}
137135

138-
/**
139-
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to JSON encoding/decoding.
140-
*/
141-
predicate jsonStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
142-
exists(CallNode call | call = nodeTo.getNode() |
143-
call.getFunction().(AttrNode).getObject(["load", "loads", "dumps"]).(NameNode).getId() = "json" and
144-
call.getArg(0) = nodeFrom.getNode()
145-
)
146-
}
147-
148136
/**
149137
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to containers
150138
* (lists/sets/dictionaries): literals, constructor invocation, methods. Note that this

python/ql/src/semmle/python/frameworks/Stdlib.qll

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -945,6 +945,93 @@ private module Stdlib {
945945
private DataFlow::Node io_attr(string attr_name) {
946946
result = io_attr(DataFlow::TypeTracker::end(), attr_name)
947947
}
948+
949+
// ---------------------------------------------------------------------------
950+
// json
951+
// ---------------------------------------------------------------------------
952+
/** Gets a reference to the `json` module. */
953+
private DataFlow::Node json(DataFlow::TypeTracker t) {
954+
t.start() and
955+
result = DataFlow::importNode("json")
956+
or
957+
exists(DataFlow::TypeTracker t2 | result = json(t2).track(t2, t))
958+
}
959+
960+
/** Gets a reference to the `json` module. */
961+
DataFlow::Node json() { result = json(DataFlow::TypeTracker::end()) }
962+
963+
/**
964+
* Gets a reference to the attribute `attr_name` of the `json` module.
965+
* WARNING: Only holds for a few predefined attributes.
966+
*/
967+
private DataFlow::Node json_attr(DataFlow::TypeTracker t, string attr_name) {
968+
attr_name in ["loads", "dumps"] and
969+
(
970+
t.start() and
971+
result = DataFlow::importNode("json" + "." + attr_name)
972+
or
973+
t.startInAttr(attr_name) and
974+
result = json()
975+
)
976+
or
977+
// Due to bad performance when using normal setup with `json_attr(t2, attr_name).track(t2, t)`
978+
// we have inlined that code and forced a join
979+
exists(DataFlow::TypeTracker t2 |
980+
exists(DataFlow::StepSummary summary |
981+
json_attr_first_join(t2, attr_name, result, summary) and
982+
t = t2.append(summary)
983+
)
984+
)
985+
}
986+
987+
pragma[nomagic]
988+
private predicate json_attr_first_join(
989+
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
990+
) {
991+
DataFlow::StepSummary::step(json_attr(t2, attr_name), res, summary)
992+
}
993+
994+
/**
995+
* Gets a reference to the attribute `attr_name` of the `json` module.
996+
* WARNING: Only holds for a few predefined attributes.
997+
*/
998+
private DataFlow::Node json_attr(string attr_name) {
999+
result = json_attr(DataFlow::TypeTracker::end(), attr_name)
1000+
}
1001+
1002+
/**
1003+
* A call to `json.loads`
1004+
* See https://docs.python.org/3/library/json.html#json.loads
1005+
*/
1006+
private class JsonLoadsCall extends Decoding::Range, DataFlow::CfgNode {
1007+
override CallNode node;
1008+
1009+
JsonLoadsCall() { node.getFunction() = json_attr("loads").asCfgNode() }
1010+
1011+
override predicate mayExecuteInput() { none() }
1012+
1013+
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
1014+
1015+
override DataFlow::Node getOutput() { result = this }
1016+
1017+
override string getFormat() { result = "JSON" }
1018+
}
1019+
1020+
/**
1021+
* A call to `json.dumps`
1022+
* See https://docs.python.org/3/library/json.html#json.dumps
1023+
*/
1024+
private class JsonDumpsCall extends Encoding::Range, DataFlow::CfgNode {
1025+
override CallNode node;
1026+
1027+
JsonDumpsCall() { node.getFunction() = json_attr("dumps").asCfgNode() }
1028+
1029+
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
1030+
1031+
override DataFlow::Node getOutput() { result = this }
1032+
1033+
override string getFormat() { result = "JSON" }
1034+
}
9481035
}
9491036

9501037
// ---------------------------------------------------------------------------

python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/TestTaint.expected

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,9 @@
6868
| test_json.py:27 | ok | test | json.loads(..) |
6969
| test_json.py:34 | fail | test | tainted_filelike |
7070
| test_json.py:35 | fail | test | json.load(..) |
71-
| test_json.py:48 | fail | non_syntacical | dumps(..) |
72-
| test_json.py:49 | fail | non_syntacical | dumps_alias(..) |
73-
| test_json.py:50 | fail | non_syntacical | loads(..) |
71+
| test_json.py:48 | ok | non_syntacical | dumps(..) |
72+
| test_json.py:49 | ok | non_syntacical | dumps_alias(..) |
73+
| test_json.py:50 | ok | non_syntacical | loads(..) |
7474
| test_json.py:57 | fail | non_syntacical | tainted_filelike |
7575
| test_json.py:58 | fail | non_syntacical | load(..) |
7676
| test_string.py:25 | ok | str_operations | ts |

0 commit comments

Comments
 (0)