Skip to content

Commit 29b3759

Browse files
authored
Merge pull request #3961 from tausbn/python-add-typetracker
Python: Add type tracker and step summary implementation.
2 parents db45b29 + 6a96c53 commit 29b3759

File tree

6 files changed

+455
-0
lines changed

6 files changed

+455
-0
lines changed
Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
/** Step Summaries and Type Tracking */
2+
3+
import python
4+
import internal.DataFlowPublic
5+
import internal.DataFlowPrivate
6+
7+
/** Any string that may appear as the name of an attribute or access path. */
8+
class AttributeName extends string {
9+
AttributeName() { this = any(Attribute a).getName() }
10+
}
11+
12+
/** Either an attribute name, or the empty string (representing no attribute). */
13+
class OptionalAttributeName extends string {
14+
OptionalAttributeName() { this instanceof AttributeName or this = "" }
15+
}
16+
17+
/**
18+
* A description of a step on an inter-procedural data flow path.
19+
*/
20+
private newtype TStepSummary =
21+
LevelStep() or
22+
CallStep() or
23+
ReturnStep() or
24+
StoreStep(AttributeName attr) or
25+
LoadStep(AttributeName attr)
26+
27+
/**
28+
* INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
29+
*
30+
* A description of a step on an inter-procedural data flow path.
31+
*/
32+
class StepSummary extends TStepSummary {
33+
/** Gets a textual representation of this step summary. */
34+
string toString() {
35+
this instanceof LevelStep and result = "level"
36+
or
37+
this instanceof CallStep and result = "call"
38+
or
39+
this instanceof ReturnStep and result = "return"
40+
or
41+
exists(string attr | this = StoreStep(attr) | result = "store " + attr)
42+
or
43+
exists(string attr | this = LoadStep(attr) | result = "load " + attr)
44+
}
45+
}
46+
47+
module StepSummary {
48+
cached
49+
predicate step(Node nodeFrom, Node nodeTo, StepSummary summary) {
50+
exists(Node mid | EssaFlow::essaFlowStep*(nodeFrom, mid) and smallstep(mid, nodeTo, summary))
51+
}
52+
53+
predicate smallstep(Node nodeFrom, Node nodeTo, StepSummary summary) {
54+
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
55+
summary = LevelStep()
56+
or
57+
callStep(nodeFrom, nodeTo) and summary = CallStep()
58+
or
59+
returnStep(nodeFrom, nodeTo) and
60+
summary = ReturnStep()
61+
or
62+
exists(string attr |
63+
basicStoreStep(nodeFrom, nodeTo, attr) and
64+
summary = StoreStep(attr)
65+
or
66+
basicLoadStep(nodeFrom, nodeTo, attr) and summary = LoadStep(attr)
67+
)
68+
}
69+
}
70+
71+
/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
72+
predicate callStep(ArgumentNode nodeFrom, ParameterNode nodeTo) {
73+
// TODO: Support special methods?
74+
exists(DataFlowCall call, int i |
75+
nodeFrom.argumentOf(call, i) and nodeTo.isParameterOf(call.getCallable(), i)
76+
)
77+
}
78+
79+
/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */
80+
predicate returnStep(ReturnNode nodeFrom, Node nodeTo) {
81+
exists(DataFlowCall call |
82+
nodeFrom.getEnclosingCallable() = call.getCallable() and nodeTo.asCfgNode() = call.getNode()
83+
)
84+
}
85+
86+
/**
87+
* Holds if `nodeFrom` is being written to the `attr` attribute of the object in `nodeTo`.
88+
*
89+
* Note that the choice of `nodeTo` does not have to make sense "chronologically".
90+
* All we care about is whether the `attr` attribute of `nodeTo` can have a specific type,
91+
* and the assumption is that if a specific type appears here, then any access of that
92+
* particular attribute can yield something of that particular type.
93+
*
94+
* Thus, in an example such as
95+
*
96+
* ```python
97+
* def foo(y):
98+
* x = Foo()
99+
* bar(x)
100+
* x.attr = y
101+
* baz(x)
102+
*
103+
* def bar(x):
104+
* z = x.attr
105+
* ```
106+
* for the attribute write `x.attr = y`, we will have `attr` being the literal string `"attr"`,
107+
* `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
108+
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
109+
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
110+
*/
111+
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
112+
exists(AttributeAssignment a, Node var |
113+
a.getName() = attr and
114+
EssaFlow::essaFlowStep*(nodeTo, var) and
115+
var.asVar() = a.getInput() and
116+
nodeFrom.asCfgNode() = a.getValue()
117+
)
118+
}
119+
120+
/**
121+
* Holds if `nodeTo` is the result of accessing the `attr` attribute of `nodeFrom`.
122+
*/
123+
predicate basicLoadStep(Node nodeFrom, Node nodeTo, string attr) {
124+
exists(AttrNode s | nodeTo.asCfgNode() = s and s.getObject(attr) = nodeFrom.asCfgNode())
125+
}
126+
127+
/**
128+
* A utility class that is equivalent to `boolean` but does not require type joining.
129+
*/
130+
private class Boolean extends boolean {
131+
Boolean() { this = true or this = false }
132+
}
133+
134+
private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalAttributeName attr)
135+
136+
/**
137+
* Summary of the steps needed to track a value to a given dataflow node.
138+
*
139+
* This can be used to track objects that implement a certain API in order to
140+
* recognize calls to that API. Note that type-tracking does not by itself provide a
141+
* source/sink relation, that is, it may determine that a node has a given type,
142+
* but it won't determine where that type came from.
143+
*
144+
* It is recommended that all uses of this type are written in the following form,
145+
* for tracking some type `myType`:
146+
* ```
147+
* Node myType(DataFlow::TypeTracker t) {
148+
* t.start() and
149+
* result = < source of myType >
150+
* or
151+
* exists (TypeTracker t2 |
152+
* result = myType(t2).track(t2, t)
153+
* )
154+
* }
155+
*
156+
* DataFlow::SourceNode myType() { result = myType(DataFlow::TypeTracker::end()) }
157+
* ```
158+
*
159+
* Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
160+
* `t = t2.step(myType(t2), result)`. If you additionally want to track individual
161+
* intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
162+
*/
163+
class TypeTracker extends TTypeTracker {
164+
Boolean hasCall;
165+
OptionalAttributeName attr;
166+
167+
TypeTracker() { this = MkTypeTracker(hasCall, attr) }
168+
169+
/** Gets the summary resulting from appending `step` to this type-tracking summary. */
170+
cached
171+
TypeTracker append(StepSummary step) {
172+
step = LevelStep() and result = this
173+
or
174+
step = CallStep() and result = MkTypeTracker(true, attr)
175+
or
176+
step = ReturnStep() and hasCall = false and result = this
177+
or
178+
step = LoadStep(attr) and result = MkTypeTracker(hasCall, "")
179+
or
180+
exists(string p | step = StoreStep(p) and attr = "" and result = MkTypeTracker(hasCall, p))
181+
}
182+
183+
/** Gets a textual representation of this summary. */
184+
string toString() {
185+
exists(string withCall, string withAttr |
186+
(if hasCall = true then withCall = "with" else withCall = "without") and
187+
(if attr != "" then withAttr = " with attribute " + attr else withAttr = "") and
188+
result = "type tracker " + withCall + " call steps" + withAttr
189+
)
190+
}
191+
192+
/**
193+
* Holds if this is the starting point of type tracking.
194+
*/
195+
predicate start() { hasCall = false and attr = "" }
196+
197+
/**
198+
* Holds if this is the starting point of type tracking, and the value starts in the attribute named `attrName`.
199+
* The type tracking only ends after the attribute has been loaded.
200+
*/
201+
predicate startInAttr(AttributeName attrName) { hasCall = false and attr = attrName }
202+
203+
/**
204+
* Holds if this is the starting point of type tracking
205+
* when tracking a parameter into a call, but not out of it.
206+
*/
207+
predicate call() { hasCall = true and attr = "" }
208+
209+
/**
210+
* Holds if this is the end point of type tracking.
211+
*/
212+
predicate end() { attr = "" }
213+
214+
/**
215+
* INTERNAL. DO NOT USE.
216+
*
217+
* Holds if this type has been tracked into a call.
218+
*/
219+
boolean hasCall() { result = hasCall }
220+
221+
/**
222+
* INTERNAL. DO NOT USE.
223+
*
224+
* Gets the attribute associated with this type tracker.
225+
*/
226+
string getAttr() { result = attr }
227+
228+
/**
229+
* Gets a type tracker that starts where this one has left off to allow continued
230+
* tracking.
231+
*
232+
* This predicate is only defined if the type has not been tracked into an attribute.
233+
*/
234+
TypeTracker continue() { attr = "" and result = this }
235+
236+
/**
237+
* Gets the summary that corresponds to having taken a forwards
238+
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
239+
*/
240+
pragma[inline]
241+
TypeTracker step(Node nodeFrom, Node nodeTo) {
242+
exists(StepSummary summary |
243+
StepSummary::step(nodeFrom, nodeTo, summary) and
244+
result = this.append(summary)
245+
)
246+
}
247+
248+
/**
249+
* Gets the summary that corresponds to having taken a forwards
250+
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
251+
*
252+
* Unlike `TypeTracker::step`, this predicate exposes all edges
253+
* in the flow graph, and not just the edges between `Node`s.
254+
* It may therefore be less performant.
255+
*
256+
* Type tracking predicates using small steps typically take the following form:
257+
* ```ql
258+
* DataFlow::Node myType(DataFlow::TypeTracker t) {
259+
* t.start() and
260+
* result = < source of myType >
261+
* or
262+
* exists (DataFlow::TypeTracker t2 |
263+
* t = t2.smallstep(myType(t2), result)
264+
* )
265+
* }
266+
*
267+
* DataFlow::Node myType() {
268+
* result = myType(DataFlow::TypeTracker::end())
269+
* }
270+
* ```
271+
*/
272+
pragma[inline]
273+
TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
274+
exists(StepSummary summary |
275+
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
276+
result = this.append(summary)
277+
)
278+
or
279+
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
280+
result = this
281+
}
282+
}

python/ql/src/experimental/dataflow/internal/DataFlowPublic.qll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
private import python
66
private import DataFlowPrivate
7+
import experimental.dataflow.TypeTracker
78

89
/**
910
* IPA type for data flow nodes.
@@ -69,6 +70,14 @@ class Node extends TNode {
6970

7071
/** Convenience method for casting to ExprNode and calling getNode and getNode again. */
7172
Expr asExpr() { none() }
73+
74+
/**
75+
* Gets a node that this node may flow to using one heap and/or interprocedural step.
76+
*
77+
* See `TypeTracker` for more details about how to use this.
78+
*/
79+
pragma[inline]
80+
Node track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
7281
}
7382

7483
class EssaNode extends Node, TEssaNode {
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
class SomeClass:
2+
pass
3+
4+
def simple_read_write():
5+
x = SomeClass() # $tracked=foo
6+
x.foo = tracked # $tracked $tracked=foo
7+
y = x.foo # $tracked=foo $tracked
8+
do_stuff(y) # $tracked
9+
10+
def foo():
11+
x = SomeClass() # $tracked=attr
12+
bar(x) # $tracked=attr
13+
x.attr = tracked # $tracked=attr $tracked
14+
baz(x) # $tracked=attr
15+
16+
def bar(x): # $tracked=attr
17+
z = x.attr # $tracked $tracked=attr
18+
do_stuff(z) # $tracked
19+
20+
def expects_int(x): # $int=field $f+:str=field
21+
do_int_stuff(x.field) # $int $f+:str $int=field $f+:str=field
22+
23+
def expects_string(x): # $f+:int=field $str=field
24+
do_string_stuff(x.field) # $f+:int $str $f+:int=field $str=field
25+
26+
def test_incompatible_types():
27+
x = SomeClass() # $int,str=field
28+
x.field = int(5) # $int=field $f+:str=field $int $f+:str
29+
expects_int(x) # $int=field $f+:str=field
30+
x.field = str("Hello") # $f+:int=field $str=field $f+:int $str
31+
expects_string(x) # $f+:int=field $str=field
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
def get_tracked():
2+
x = tracked # $tracked
3+
return x # $tracked
4+
5+
def use_tracked_foo(x): # $tracked
6+
do_stuff(x) # $tracked
7+
8+
def foo():
9+
use_tracked_foo(
10+
get_tracked() # $tracked
11+
)
12+
13+
def use_tracked_bar(x): # $tracked
14+
do_stuff(x) # $tracked
15+
16+
def bar():
17+
x = get_tracked() # $tracked
18+
use_tracked_bar(x) # $tracked
19+
20+
def use_tracked_baz(x): # $tracked
21+
do_stuff(x) # $tracked
22+
23+
def baz():
24+
x = tracked # $tracked
25+
use_tracked_baz(x) # $tracked
26+
27+
def id(x): # $tracked
28+
return x # $tracked
29+
30+
def use_tracked_quux(x): # $f-:tracked
31+
do_stuff(y) # call after return -- not tracked in here.
32+
33+
def quux():
34+
x = tracked # $tracked
35+
y = id(x) # $tracked
36+
use_tracked_quux(y) # not tracked out of call to id.
37+
38+
g = None
39+
40+
def write_g(x): # $tracked
41+
g = x # $tracked
42+
43+
def use_g():
44+
do_stuff(g) # $f-:tracked // no global flow for now.
45+
46+
def global_var_write_test():
47+
x = tracked # $tracked
48+
write_g(x) # $tracked
49+
use_g()
50+
51+
def expects_int(x): # $int
52+
do_int_stuff(x) # $int
53+
54+
def expects_string(x): # $str
55+
do_string_stuff(x) # $str
56+
57+
def redefine_test():
58+
x = int(5) # $int
59+
expects_int(x) # $int
60+
x = str("Hello") # $str
61+
expects_string(x) # $str

python/ql/test/experimental/dataflow/typetracking/tracked.expected

Whitespace-only changes.

0 commit comments

Comments
 (0)