Skip to content

Commit cec3694

Browse files
committed
Python: Add type tracker and step summary implementation.
1 parent 2e5af67 commit cec3694

File tree

4 files changed

+277
-0
lines changed

4 files changed

+277
-0
lines changed
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
/** Step Summaries and Type Tracking */
2+
3+
import python
4+
import internal.DataFlowPublic
5+
import internal.DataFlowPrivate
6+
7+
/** Any string that may appear as the name of an attribute or access path. */
8+
class AttributeName extends string {
9+
AttributeName() { this = any(Attribute a).getName() }
10+
}
11+
12+
/** Either an attribute name, or the empty string (representing no attribute). */
13+
class OptionalAttributeName extends string {
14+
OptionalAttributeName() { this instanceof AttributeName or this = "" }
15+
}
16+
17+
/**
18+
* A description of a step on an inter-procedural data flow path.
19+
*/
20+
newtype TStepSummary =
21+
LevelStep() or
22+
CallStep() or
23+
ReturnStep() or
24+
StoreStep(AttributeName attr) or
25+
LoadStep(AttributeName attr) or
26+
CopyStep(AttributeName attr)
27+
28+
/**
29+
* INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
30+
*
31+
* A description of a step on an inter-procedural data flow path.
32+
*/
33+
class StepSummary extends TStepSummary {
34+
/** Gets a textual representation of this step summary. */
35+
string toString() {
36+
this instanceof LevelStep and result = "level"
37+
or
38+
this instanceof CallStep and result = "call"
39+
or
40+
this instanceof ReturnStep and result = "return"
41+
or
42+
exists(string prop | this = StoreStep(prop) | result = "store " + prop)
43+
or
44+
exists(string prop | this = LoadStep(prop) | result = "load " + prop)
45+
}
46+
}
47+
48+
module StepSummary {
49+
cached
50+
predicate step(Node pred, Node succ, StepSummary summary) {
51+
exists(Node mid | simpleLocalFlowStep(pred, mid) and smallstep(mid, succ, summary))
52+
}
53+
54+
predicate smallstep(Node pred, Node succ, StepSummary summary) {
55+
simpleLocalFlowStep(pred, succ) and
56+
summary = LevelStep()
57+
or
58+
callStep(pred, succ) and summary = CallStep()
59+
or
60+
returnStep(pred, succ) and
61+
summary = ReturnStep()
62+
or
63+
exists(string attr |
64+
basicStoreStep(pred, succ, attr) and
65+
summary = StoreStep(attr)
66+
or
67+
basicLoadStep(pred, succ, attr) and summary = LoadStep(attr)
68+
)
69+
}
70+
}
71+
72+
predicate callStep(ArgumentNode pred, ParameterNode succ) {
73+
exists(DataFlowCall call, int i |
74+
pred.argumentOf(call, i) and succ.isParameterOf(call.getCallable(), i)
75+
)
76+
}
77+
78+
predicate returnStep(ReturnNode pred, Node succ) {
79+
exists(DataFlowCall call |
80+
pred.getEnclosingCallable() = call.getCallable() and succ = TCfgNode(call)
81+
)
82+
}
83+
84+
/** TODO: Implement these. */
85+
predicate basicStoreStep(Node pred, Node succ, string attr) { none() }
86+
87+
predicate basicLoadStep(Node pred, Node succ, string attr) { none() }
88+
89+
/**
90+
* A utility class that is equivalent to `boolean` but does not require type joining.
91+
*/
92+
class Boolean extends boolean {
93+
Boolean() { this = true or this = false }
94+
}
95+
96+
private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalAttributeName prop)
97+
98+
/**
99+
* Summary of the steps needed to track a value to a given dataflow node.
100+
*
101+
* This can be used to track objects that implement a certain API in order to
102+
* recognize calls to that API. Note that type-tracking does not by itself provide a
103+
* source/sink relation, that is, it may determine that a node has a given type,
104+
* but it won't determine where that type came from.
105+
*
106+
* It is recommended that all uses of this type are written in the following form,
107+
* for tracking some type `myType`:
108+
* ```
109+
* Node myType(DataFlow::TypeTracker t) {
110+
* t.start() and
111+
* result = < source of myType >
112+
* or
113+
* exists (TypeTracker t2 |
114+
* result = myType(t2).track(t2, t)
115+
* )
116+
* }
117+
*
118+
* DataFlow::SourceNode myType() { result = myType(DataFlow::TypeTracker::end()) }
119+
* ```
120+
*
121+
* Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
122+
* `t = t2.step(myType(t2), result)`. If you additionally want to track individual
123+
* intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
124+
*/
125+
class TypeTracker extends TTypeTracker {
126+
Boolean hasCall;
127+
OptionalAttributeName prop;
128+
129+
TypeTracker() { this = MkTypeTracker(hasCall, prop) }
130+
131+
/** Gets the summary resulting from appending `step` to this type-tracking summary. */
132+
cached
133+
TypeTracker append(StepSummary step) {
134+
step = LevelStep() and result = this
135+
or
136+
step = CallStep() and result = MkTypeTracker(true, prop)
137+
or
138+
step = ReturnStep() and hasCall = false and result = this
139+
or
140+
step = LoadStep(prop) and result = MkTypeTracker(hasCall, "")
141+
or
142+
exists(string p | step = StoreStep(p) and prop = "" and result = MkTypeTracker(hasCall, p))
143+
}
144+
145+
/** Gets a textual representation of this summary. */
146+
string toString() {
147+
exists(string withCall, string withProp |
148+
(if hasCall = true then withCall = "with" else withCall = "without") and
149+
(if prop != "" then withProp = " with property " + prop else withProp = "") and
150+
result = "type tracker " + withCall + " call steps" + withProp
151+
)
152+
}
153+
154+
/**
155+
* Holds if this is the starting point of type tracking.
156+
*/
157+
predicate start() { hasCall = false and prop = "" }
158+
159+
/**
160+
* Holds if this is the starting point of type tracking, and the value starts in the property named `propName`.
161+
* The type tracking only ends after the property has been loaded.
162+
*/
163+
predicate startInProp(AttributeName propName) { hasCall = false and prop = propName }
164+
165+
/**
166+
* Holds if this is the starting point of type tracking
167+
* when tracking a parameter into a call, but not out of it.
168+
*/
169+
predicate call() { hasCall = true and prop = "" }
170+
171+
/**
172+
* Holds if this is the end point of type tracking.
173+
*/
174+
predicate end() { prop = "" }
175+
176+
/**
177+
* INTERNAL. DO NOT USE.
178+
*
179+
* Holds if this type has been tracked into a call.
180+
*/
181+
boolean hasCall() { result = hasCall }
182+
183+
/**
184+
* Gets a type tracker that starts where this one has left off to allow continued
185+
* tracking.
186+
*
187+
* This predicate is only defined if the type has not been tracked into a property.
188+
*/
189+
TypeTracker continue() { prop = "" and result = this }
190+
191+
/**
192+
* Gets the summary that corresponds to having taken a forwards
193+
* heap and/or inter-procedural step from `pred` to `succ`.
194+
*/
195+
pragma[inline]
196+
TypeTracker step(Node pred, Node succ) {
197+
exists(StepSummary summary |
198+
StepSummary::step(pred, succ, summary) and
199+
result = this.append(summary)
200+
)
201+
}
202+
203+
/**
204+
* Gets the summary that corresponds to having taken a forwards
205+
* local, heap and/or inter-procedural step from `pred` to `succ`.
206+
*
207+
* Unlike `TypeTracker::step`, this predicate exposes all edges
208+
* in the flow graph, and not just the edges between `SourceNode`s.
209+
* It may therefore be less performant.
210+
*
211+
* Type tracking predicates using small steps typically take the following form:
212+
* ```ql
213+
* DataFlow::Node myType(DataFlow::TypeTracker t) {
214+
* t.start() and
215+
* result = < source of myType >
216+
* or
217+
* exists (DataFlow::TypeTracker t2 |
218+
* t = t2.smallstep(myType(t2), result)
219+
* )
220+
* }
221+
*
222+
* DataFlow::Node myType() {
223+
* result = myType(DataFlow::TypeTracker::end())
224+
* }
225+
* ```
226+
*/
227+
pragma[inline]
228+
TypeTracker smallstep(Node pred, Node succ) {
229+
exists(StepSummary summary |
230+
StepSummary::smallstep(pred, succ, summary) and
231+
result = this.append(summary)
232+
)
233+
or
234+
simpleLocalFlowStep(pred, succ) and
235+
result = this
236+
}
237+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
| test.py:1:1:1:18 | Exit node for Function get_tracked | type tracker without call steps |
2+
| test.py:2:9:2:15 | ControlFlowNode for tracked | type tracker without call steps |
3+
| test.py:3:12:3:12 | ControlFlowNode for x | type tracker without call steps |
4+
| test.py:16:9:16:15 | ControlFlowNode for tracked | type tracker without call steps |
5+
| test.py:17:5:17:18 | SSA variable x | type tracker without call steps |
6+
| test.py:17:17:17:17 | ControlFlowNode for x | type tracker without call steps |
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import python
2+
import experimental.dataflow.TypeTracker
3+
4+
Node tracked(TypeTracker t) {
5+
t.start() and
6+
result = TCfgNode(any(NameNode n | n.getId() = "tracked"))
7+
or
8+
exists(TypeTracker t2 | t = t2.step(tracked(t2), result))
9+
}
10+
11+
from Node n, TypeTracker t
12+
where n = tracked(t)
13+
select n, t
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
def get_tracked():
2+
x = tracked
3+
return x
4+
5+
def use_tracked(x):
6+
do_stuff(x)
7+
8+
def foo():
9+
use_tracked(get_tracked())
10+
11+
def bar():
12+
x = get_tracked()
13+
use_tracked(x)
14+
15+
def baz():
16+
x = tracked
17+
use_tracked(x)
18+
19+
foo()
20+
bar()
21+
baz()

0 commit comments

Comments
 (0)