Skip to content

Commit d3f8fb5

Browse files
authored
Merge pull request #4423 from tausbn/python-add-attribute-access-interface
Approved by RasmusWL
2 parents e2b0c60 + 3288cf1 commit d3f8fb5

File tree

9 files changed

+426
-10
lines changed

9 files changed

+426
-10
lines changed

python/ql/src/experimental/dataflow/TypeTracker.qll

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ private import internal.DataFlowPrivate
66

77
/** Any string that may appear as the name of an attribute or access path. */
88
class AttributeName extends string {
9-
AttributeName() { this = any(Attribute a).getName() }
9+
AttributeName() { this = any(AttrRef a).getAttributeName() }
1010
}
1111

1212
/** Either an attribute name, or the empty string (representing no attribute). */
@@ -115,19 +115,22 @@ predicate returnStep(ReturnNode nodeFrom, Node nodeTo) {
115115
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
116116
*/
117117
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
118-
exists(AttributeAssignment a, Node var |
119-
a.getName() = attr and
120-
simpleLocalFlowStep*(nodeTo, var) and
121-
var.asVar() = a.getInput() and
122-
nodeFrom.asCfgNode() = a.getValue()
118+
exists(AttrWrite a |
119+
a.mayHaveAttributeName(attr) and
120+
nodeFrom = a.getValue() and
121+
simpleLocalFlowStep*(nodeTo, a.getObject())
123122
)
124123
}
125124

126125
/**
127126
* Holds if `nodeTo` is the result of accessing the `attr` attribute of `nodeFrom`.
128127
*/
129128
predicate basicLoadStep(Node nodeFrom, Node nodeTo, string attr) {
130-
exists(AttrNode s | nodeTo.asCfgNode() = s and s.getObject(attr) = nodeFrom.asCfgNode())
129+
exists(AttrRead a |
130+
a.mayHaveAttributeName(attr) and
131+
nodeFrom = a.getObject() and
132+
nodeTo = a
133+
)
131134
}
132135

133136
/**
Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
/** This module provides an API for attribute reads and writes. */
2+
3+
import DataFlowUtil
4+
import DataFlowPublic
5+
private import DataFlowPrivate
6+
7+
/**
8+
* A data flow node that reads or writes an attribute of an object.
9+
*
10+
* This abstract base class only knows about the base object on which the attribute is being
11+
* accessed, and the attribute itself, if it is statically inferrable.
12+
*/
13+
abstract class AttrRef extends Node {
14+
/**
15+
* Gets the data flow node corresponding to the object whose attribute is being read or written.
16+
*/
17+
abstract Node getObject();
18+
19+
/**
20+
* Gets the expression node that defines the attribute being accessed, if any. This is
21+
* usually an identifier or literal.
22+
*/
23+
abstract ExprNode getAttributeNameExpr();
24+
25+
/**
26+
* Holds if this attribute reference may access an attribute named `attrName`.
27+
* Uses local data flow to track potential attribute names, which may lead to imprecision. If more
28+
* precision is needed, consider using `getAttributeName` instead.
29+
*/
30+
predicate mayHaveAttributeName(string attrName) {
31+
attrName = this.getAttributeName()
32+
or
33+
exists(Node nodeFrom |
34+
localFlow(nodeFrom, this.getAttributeNameExpr()) and
35+
attrName = nodeFrom.asExpr().(StrConst).getText()
36+
)
37+
}
38+
39+
/**
40+
* Gets the name of the attribute being read or written. For dynamic attribute accesses, this
41+
* method is not guaranteed to return a result. For such cases, using `mayHaveAttributeName` may yield
42+
* better results.
43+
*/
44+
abstract string getAttributeName();
45+
}
46+
47+
/**
48+
* A data flow node that writes an attribute of an object. This includes
49+
* - Simple attribute writes: `object.attr = value`
50+
* - Dynamic attribute writes: `setattr(object, attr, value)`
51+
* - Fields written during class initialization: `class MyClass: attr = value`
52+
*/
53+
abstract class AttrWrite extends AttrRef {
54+
/** Gets the data flow node corresponding to the value that is written to the attribute. */
55+
abstract Node getValue();
56+
}
57+
58+
/**
59+
* Represents a control flow node for a simple attribute assignment. That is,
60+
* ```python
61+
* object.attr = value
62+
* ```
63+
* Also gives access to the `value` being written, by extending `DefinitionNode`.
64+
*/
65+
private class AttributeAssignmentNode extends DefinitionNode, AttrNode, DataFlowCfgNode {
66+
override ControlFlowNode getValue() { result = DefinitionNode.super.getValue() }
67+
}
68+
69+
/** A simple attribute assignment: `object.attr = value`. */
70+
private class AttributeAssignmentAsAttrWrite extends AttrWrite, CfgNode {
71+
override AttributeAssignmentNode node;
72+
73+
override Node getValue() { result.asCfgNode() = node.getValue() }
74+
75+
override Node getObject() { result.asCfgNode() = node.getObject() }
76+
77+
override ExprNode getAttributeNameExpr() {
78+
// Attribute names don't exist as `Node`s in the control flow graph, as they can only ever be
79+
// identifiers, and are therefore represented directly as strings.
80+
// Use `getAttributeName` to access the name of the attribute.
81+
none()
82+
}
83+
84+
override string getAttributeName() { result = node.getName() }
85+
}
86+
87+
import semmle.python.types.Builtins
88+
89+
/** Represents `CallNode`s that may refer to calls to built-in functions or classes. */
90+
private class BuiltInCallNode extends CallNode, DataFlowCfgNode {
91+
string name;
92+
93+
BuiltInCallNode() {
94+
// TODO disallow instances where the name of the built-in may refer to an in-scope variable of that name.
95+
exists(NameNode id | this.getFunction() = id and id.getId() = name and id.isGlobal()) and
96+
name = any(Builtin b).getName()
97+
}
98+
99+
/** Gets the name of the built-in function that is called at this `CallNode` */
100+
string getBuiltinName() { result = name }
101+
}
102+
103+
/**
104+
* Represents a call to the built-ins that handle dynamic inspection and modification of
105+
* attributes: `getattr`, `setattr`, `hasattr`, and `delattr`.
106+
*/
107+
private class BuiltinAttrCallNode extends BuiltInCallNode {
108+
BuiltinAttrCallNode() { name in ["setattr", "getattr", "hasattr", "delattr"] }
109+
110+
/** Gets the control flow node for object on which the attribute is accessed. */
111+
ControlFlowNode getObject() { result in [this.getArg(0), this.getArgByName("object")] }
112+
113+
/**
114+
* Gets the control flow node for the value that is being written to the attribute.
115+
* Only relevant for `setattr` calls.
116+
*/
117+
ControlFlowNode getValue() {
118+
// only valid for `setattr`
119+
name = "setattr" and
120+
result in [this.getArg(2), this.getArgByName("value")]
121+
}
122+
123+
/** Gets the control flow node that defines the name of the attribute being accessed. */
124+
ControlFlowNode getName() { result in [this.getArg(1), this.getArgByName("name")] }
125+
}
126+
127+
/** Represents calls to the built-in `setattr`. */
128+
private class SetAttrCallNode extends BuiltinAttrCallNode {
129+
SetAttrCallNode() { name = "setattr" }
130+
}
131+
132+
/** Represents calls to the built-in `getattr`. */
133+
private class GetAttrCallNode extends BuiltinAttrCallNode {
134+
GetAttrCallNode() { name = "getattr" }
135+
}
136+
137+
/** An attribute assignment using `setattr`, e.g. `setattr(object, attr, value)` */
138+
private class SetAttrCallAsAttrWrite extends AttrWrite, CfgNode {
139+
override SetAttrCallNode node;
140+
141+
override Node getValue() { result.asCfgNode() = node.getValue() }
142+
143+
override Node getObject() { result.asCfgNode() = node.getObject() }
144+
145+
override ExprNode getAttributeNameExpr() { result.asCfgNode() = node.getName() }
146+
147+
override string getAttributeName() {
148+
result = this.getAttributeNameExpr().asExpr().(StrConst).getText()
149+
}
150+
}
151+
152+
/**
153+
* Represents an attribute of a class that is assigned statically during class definition. For instance
154+
* ```python
155+
* class MyClass:
156+
* attr = value
157+
* ...
158+
* ```
159+
* Instances of this class correspond to the `NameNode` for `attr`, and also gives access to `value` by
160+
* virtue of being a `DefinitionNode`.
161+
*/
162+
private class ClassAttributeAssignmentNode extends DefinitionNode, NameNode, DataFlowCfgNode { }
163+
164+
/**
165+
* An attribute assignment via a class field, e.g.
166+
* ```python
167+
* class MyClass:
168+
* attr = value
169+
* ```
170+
* is treated as equivalent to `MyClass.attr = value`.
171+
*/
172+
private class ClassDefinitionAsAttrWrite extends AttrWrite, CfgNode {
173+
ClassExpr cls;
174+
override ClassAttributeAssignmentNode node;
175+
176+
ClassDefinitionAsAttrWrite() { node.getScope() = cls.getInnerScope() }
177+
178+
override Node getValue() { result.asCfgNode() = node.getValue() }
179+
180+
override Node getObject() { result.asCfgNode() = cls.getAFlowNode() }
181+
182+
override ExprNode getAttributeNameExpr() { none() }
183+
184+
override string getAttributeName() { result = node.getId() }
185+
}
186+
187+
/**
188+
* A read of an attribute on an object. This includes
189+
* - Simple attribute reads: `object.attr`
190+
* - Dynamic attribute reads using `getattr`: `getattr(object, attr)`
191+
* - Qualified imports: `from module import attr as name`
192+
*/
193+
abstract class AttrRead extends AttrRef, Node { }
194+
195+
/**
196+
* A convenience class for embedding `AttrNode` into `DataFlowCfgNode`, as the former is not
197+
* obviously a subtype of the latter.
198+
*/
199+
private class DataFlowAttrNode extends AttrNode, DataFlowCfgNode { }
200+
201+
/** A simple attribute read, e.g. `object.attr` */
202+
private class AttributeReadAsAttrRead extends AttrRead, CfgNode {
203+
override DataFlowAttrNode node;
204+
205+
override Node getObject() { result.asCfgNode() = node.getObject() }
206+
207+
override ExprNode getAttributeNameExpr() {
208+
// Attribute names don't exist as `Node`s in the control flow graph, as they can only ever be
209+
// identifiers, and are therefore represented directly as strings.
210+
// Use `getAttributeName` to access the name of the attribute.
211+
none()
212+
}
213+
214+
override string getAttributeName() { result = node.getName() }
215+
}
216+
217+
/** An attribute read using `getattr`: `getattr(object, attr)` */
218+
private class GetAttrCallAsAttrRead extends AttrRead, CfgNode {
219+
override GetAttrCallNode node;
220+
221+
override Node getObject() { result.asCfgNode() = node.getObject() }
222+
223+
override ExprNode getAttributeNameExpr() { result.asCfgNode() = node.getName() }
224+
225+
override string getAttributeName() {
226+
result = this.getAttributeNameExpr().asExpr().(StrConst).getText()
227+
}
228+
}
229+
230+
/**
231+
* A convenience class for embedding `ImportMemberNode` into `DataFlowCfgNode`, as the former is not
232+
* obviously a subtype of the latter.
233+
*/
234+
private class DataFlowImportMemberNode extends ImportMemberNode, DataFlowCfgNode { }
235+
236+
/**
237+
* Represents a named import as an attribute read. That is,
238+
* ```python
239+
* from module import attr as attr_ref
240+
* ```
241+
* is treated as if it is a read of the attribute `module.attr`, even if `module` is not imported directly.
242+
*/
243+
private class ModuleAttributeImportAsAttrRead extends AttrRead, CfgNode {
244+
override DataFlowImportMemberNode node;
245+
246+
override Node getObject() { result.asCfgNode() = node.getModule(_) }
247+
248+
override ExprNode getAttributeNameExpr() {
249+
// The name of an imported attribute doesn't exist as a `Node` in the control flow graph, as it
250+
// can only ever be an identifier, and is therefore represented directly as a string.
251+
// Use `getAttributeName` to access the name of the attribute.
252+
none()
253+
}
254+
255+
override string getAttributeName() { exists(node.getModule(result)) }
256+
}

python/ql/src/experimental/dataflow/internal/DataFlowPublic.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
private import python
66
private import DataFlowPrivate
77
import experimental.dataflow.TypeTracker
8+
import Attributes
89
private import semmle.python.essa.SsaCompute
910

1011
/**

python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFr
1818
predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
1919

2020
/**
21-
* Gets an EssaNode that holds the module imported by `name`.
21+
* Gets a `Node` that refers to the module referenced by `name`.
2222
* Note that for the statement `import pkg.mod`, the new variable introduced is `pkg` that is a
2323
* reference to the module `pkg`.
2424
*
@@ -27,6 +27,9 @@ predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
2727
* 2. `from <package> import <module>` when `<name> = <package> + "." + <module>`
2828
* 3. `from <module> import <member>` when `<name> = <module> + "." + <member>`
2929
*
30+
* Finally, in `from <module> import <member>` we consider the `ImportExpr` corresponding to
31+
* `<module>` to be a reference to that module.
32+
*
3033
* Note:
3134
* While it is technically possible that `import mypkg.foo` and `from mypkg import foo` can give different values,
3235
* it's highly unlikely that this will be a problem in production level code.
@@ -36,7 +39,7 @@ predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
3639
*
3740
* Also see `DataFlow::importMember`
3841
*/
39-
EssaNode importModule(string name) {
42+
Node importModule(string name) {
4043
exists(Variable var, Import imp, Alias alias |
4144
alias = imp.getAName() and
4245
alias.getAsname() = var.getAStore() and
@@ -45,8 +48,29 @@ EssaNode importModule(string name) {
4548
or
4649
name = alias.getValue().(ImportExpr).getImportedModuleName()
4750
) and
48-
result.getVar().(AssignmentDefinition).getSourceVariable() = var
51+
result.(EssaNode).getVar().(AssignmentDefinition).getSourceVariable() = var
4952
)
53+
or
54+
// Although it may seem superfluous to consider the `foo` part of `from foo import bar as baz` to
55+
// be a reference to a module (since that reference only makes sense locally within the `import`
56+
// statement), it's important for our use of type trackers to consider this local reference to
57+
// also refer to the `foo` module. That way, if one wants to track references to the `bar`
58+
// attribute using a type tracker, one can simply write
59+
//
60+
// ```ql
61+
// DataFlow::Node bar_attr_tracker(TypeTracker t) {
62+
// t.startInAttr("bar") and
63+
// result = foo_module_tracker()
64+
// or
65+
// exists(TypeTracker t2 | result = bar_attr_tracker(t2).track(t2, t))
66+
// }
67+
// ```
68+
//
69+
// Where `foo_module_tracker` is a type tracker that tracks references to the `foo` module.
70+
// Because named imports are modelled as `AttrRead`s, the statement `from foo import bar as baz`
71+
// is interpreted as if it was an assignment `baz = foo.bar`, which means `baz` gets tracked as a
72+
// reference to `foo.bar`, as desired.
73+
result.asCfgNode().getNode() = any(ImportExpr i | i.getAnImportedModuleName() = name)
5074
}
5175

5276
/**

python/ql/test/experimental/dataflow/import-helper/ImportHelper.expected

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,36 @@
11
importModule
2+
| test1.py:1:8:1:12 | ControlFlowNode for ImportExpr | mypkg |
23
| test1.py:1:8:1:12 | GSSA Variable mypkg | mypkg |
4+
| test2.py:1:6:1:10 | ControlFlowNode for ImportExpr | mypkg |
5+
| test2.py:1:6:1:10 | ControlFlowNode for ImportExpr | mypkg |
36
| test2.py:1:19:1:21 | GSSA Variable foo | mypkg.foo |
47
| test2.py:1:24:1:26 | GSSA Variable bar | mypkg.bar |
8+
| test3.py:1:8:1:16 | ControlFlowNode for ImportExpr | mypkg |
9+
| test3.py:1:8:1:16 | ControlFlowNode for ImportExpr | mypkg.foo |
10+
| test3.py:2:8:2:16 | ControlFlowNode for ImportExpr | mypkg |
11+
| test3.py:2:8:2:16 | ControlFlowNode for ImportExpr | mypkg.bar |
512
| test3.py:2:8:2:16 | GSSA Variable mypkg | mypkg |
13+
| test4.py:1:8:1:16 | ControlFlowNode for ImportExpr | mypkg |
14+
| test4.py:1:8:1:16 | ControlFlowNode for ImportExpr | mypkg.foo |
615
| test4.py:1:21:1:24 | GSSA Variable _foo | mypkg.foo |
16+
| test4.py:2:8:2:16 | ControlFlowNode for ImportExpr | mypkg |
17+
| test4.py:2:8:2:16 | ControlFlowNode for ImportExpr | mypkg.bar |
718
| test4.py:2:21:2:24 | GSSA Variable _bar | mypkg.bar |
19+
| test5.py:1:8:1:12 | ControlFlowNode for ImportExpr | mypkg |
820
| test5.py:1:8:1:12 | GSSA Variable mypkg | mypkg |
21+
| test5.py:9:6:9:10 | ControlFlowNode for ImportExpr | mypkg |
922
| test5.py:9:26:9:29 | GSSA Variable _bar | mypkg.bar |
23+
| test6.py:1:8:1:12 | ControlFlowNode for ImportExpr | mypkg |
1024
| test6.py:1:8:1:12 | GSSA Variable mypkg | mypkg |
25+
| test6.py:5:8:5:16 | ControlFlowNode for ImportExpr | mypkg |
26+
| test6.py:5:8:5:16 | ControlFlowNode for ImportExpr | mypkg.foo |
1127
| test6.py:5:8:5:16 | GSSA Variable mypkg | mypkg |
28+
| test7.py:1:6:1:10 | ControlFlowNode for ImportExpr | mypkg |
1229
| test7.py:1:19:1:21 | GSSA Variable foo | mypkg.foo |
30+
| test7.py:5:8:5:16 | ControlFlowNode for ImportExpr | mypkg |
31+
| test7.py:5:8:5:16 | ControlFlowNode for ImportExpr | mypkg.foo |
1332
| test7.py:5:8:5:16 | GSSA Variable mypkg | mypkg |
33+
| test7.py:9:6:9:10 | ControlFlowNode for ImportExpr | mypkg |
1434
| test7.py:9:19:9:21 | GSSA Variable foo | mypkg.foo |
1535
importMember
1636
| test2.py:1:19:1:21 | GSSA Variable foo | mypkg | foo |

0 commit comments

Comments
 (0)