Skip to content

Commit 7d17454

Browse files
authored
Merge pull request #21138 from github/tausbn/python-prepare-for-overlay-annotations
Prepare dataflow for local annotations
2 parents 3e5c2dd + 62fb38d commit 7d17454

File tree

19 files changed

+211
-65
lines changed

19 files changed

+211
-65
lines changed

python/ql/consistency-queries/DataFlowConsistency.ql

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ private module Input implements InputSig<Location, PythonDataFlow> {
2626
or
2727
// TODO: Implement post-updates for **kwargs, see tests added in https://github.com/github/codeql/pull/14936
2828
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat())
29+
or
30+
missingArgumentCallExclude(n)
2931
}
3032

3133
predicate reverseReadExclude(Node n) {
@@ -134,6 +136,18 @@ private module Input implements InputSig<Location, PythonDataFlow> {
134136
other.getNode().getScope() = f
135137
)
136138
}
139+
140+
predicate missingArgumentCallExclude(ArgumentNode arg) {
141+
// We overapproximate the argument nodes in order to not rely on the global `getCallArg`
142+
// predicate.
143+
// Because of this, we must exclude the cases where we have an approximation but no actual
144+
// argument node.
145+
arg = getCallArgApproximation() and not getCallArg(_, _, _, arg, _)
146+
or
147+
// Likewise, capturing closure arguments do not have corresponding argument nodes in some cases.
148+
arg instanceof SynthCapturedVariablesArgumentNode and
149+
not arg.argumentOf(_, _)
150+
}
137151
}
138152

139153
import MakeConsistency<Location, PythonDataFlow, PythonTaintTracking, Input>

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll

Lines changed: 54 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1714,36 +1714,66 @@ private class SummaryPostUpdateNode extends FlowSummaryNode, PostUpdateNodeImpl
17141714
* This is also known as the environment part of a closure.
17151715
*
17161716
* This is used for tracking flow through captured variables.
1717-
*
1718-
* TODO:
1719-
* We might want a synthetic node here, but currently that incurs problems
1720-
* with non-monotonic recursion, because of the use of `resolveCall` in the
1721-
* char pred. This may be solvable by using
1722-
* `CallGraphConstruction::Make` in stead of
1723-
* `CallGraphConstruction::Simple::Make` appropriately.
17241717
*/
1725-
class CapturedVariablesArgumentNode extends CfgNode, ArgumentNode {
1726-
CallNode callNode;
1718+
class SynthCapturedVariablesArgumentNode extends Node, TSynthCapturedVariablesArgumentNode {
1719+
ControlFlowNode callable;
17271720

1728-
CapturedVariablesArgumentNode() {
1729-
node = callNode.getFunction() and
1730-
exists(Function target | resolveCall(callNode, target, _) |
1731-
target = any(VariableCapture::CapturedVariable v).getACapturingScope()
1732-
)
1733-
}
1721+
SynthCapturedVariablesArgumentNode() { this = TSynthCapturedVariablesArgumentNode(callable) }
1722+
1723+
/** Gets the `CallNode` corresponding to this captured variables argument node. */
1724+
CallNode getCallNode() { result.getFunction() = callable }
1725+
1726+
/** Gets the `CfgNode` that corresponds to this synthetic node. */
1727+
CfgNode getUnderlyingNode() { result.asCfgNode() = callable }
1728+
1729+
override Scope getScope() { result = callable.getScope() }
1730+
1731+
override Location getLocation() { result = callable.getLocation() }
17341732

17351733
override string toString() { result = "Capturing closure argument" }
1734+
}
17361735

1736+
/** A captured variables argument node viewed as an argument node. Needed because `argumentOf` is a global predicate. */
1737+
class CapturedVariablesArgumentNodeAsArgumentNode extends ArgumentNode,
1738+
SynthCapturedVariablesArgumentNode
1739+
{
17371740
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
1738-
callNode = call.getNode() and
1739-
pos.isLambdaSelf()
1741+
exists(CallNode callNode | callNode = this.getCallNode() |
1742+
callNode = call.getNode() and
1743+
exists(Function target | resolveCall(callNode, target, _) |
1744+
target = any(VariableCapture::CapturedVariable v).getACapturingScope()
1745+
) and
1746+
pos.isLambdaSelf()
1747+
)
17401748
}
17411749
}
17421750

1743-
/** A synthetic node representing the values of variables captured by a comprehension. */
1744-
class SynthCompCapturedVariablesArgumentNode extends Node, TSynthCompCapturedVariablesArgumentNode,
1745-
ArgumentNode
1751+
/** A synthetic node representing the values of captured variables after the output has been computed. */
1752+
class SynthCapturedVariablesArgumentPostUpdateNode extends PostUpdateNodeImpl,
1753+
TSynthCapturedVariablesArgumentPostUpdateNode
17461754
{
1755+
ControlFlowNode callable;
1756+
1757+
SynthCapturedVariablesArgumentPostUpdateNode() {
1758+
this = TSynthCapturedVariablesArgumentPostUpdateNode(callable)
1759+
}
1760+
1761+
/** Gets the `PostUpdateNode` (for a `CfgNode`) that corresponds to this synthetic node. */
1762+
PostUpdateNode getUnderlyingNode() { result.getPreUpdateNode().asCfgNode() = callable }
1763+
1764+
override string toString() { result = "[post] Capturing closure argument" }
1765+
1766+
override Scope getScope() { result = callable.getScope() }
1767+
1768+
override Location getLocation() { result = callable.getLocation() }
1769+
1770+
override SynthCapturedVariablesArgumentNode getPreUpdateNode() {
1771+
result = TSynthCapturedVariablesArgumentNode(callable)
1772+
}
1773+
}
1774+
1775+
/** A synthetic node representing the values of variables captured by a comprehension. */
1776+
class SynthCompCapturedVariablesArgumentNode extends Node, TSynthCompCapturedVariablesArgumentNode {
17471777
Comp comp;
17481778

17491779
SynthCompCapturedVariablesArgumentNode() { this = TSynthCompCapturedVariablesArgumentNode(comp) }
@@ -1755,7 +1785,11 @@ class SynthCompCapturedVariablesArgumentNode extends Node, TSynthCompCapturedVar
17551785
override Location getLocation() { result = comp.getLocation() }
17561786

17571787
Comp getComprehension() { result = comp }
1788+
}
17581789

1790+
class SynthCompCapturedVariablesArgumentNodeAsArgumentNode extends SynthCompCapturedVariablesArgumentNode,
1791+
ArgumentNode
1792+
{
17591793
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
17601794
call.(ComprehensionCall).getComprehension() = comp and
17611795
pos.isLambdaSelf()

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,6 +1128,14 @@ predicate nodeIsHidden(Node n) {
11281128
n instanceof SynthCaptureNode
11291129
or
11301130
n instanceof SynthCapturedVariablesParameterNode
1131+
or
1132+
n instanceof SynthCapturedVariablesArgumentNode
1133+
or
1134+
n instanceof SynthCapturedVariablesArgumentPostUpdateNode
1135+
or
1136+
n instanceof SynthCompCapturedVariablesArgumentNode
1137+
or
1138+
n instanceof SynthCompCapturedVariablesArgumentPostUpdateNode
11311139
}
11321140

11331141
class LambdaCallKind = Unit;

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll

Lines changed: 57 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -76,15 +76,7 @@ newtype TNode =
7676
node.getNode() = any(Comp c).getIterable()
7777
} or
7878
/** A node representing a global (module-level) variable in a specific module. */
79-
TModuleVariableNode(Module m, GlobalVariable v) {
80-
v.getScope() = m and
81-
(
82-
v.escapes()
83-
or
84-
isAccessedThroughImportStar(m) and
85-
ImportStar::globalNameDefinedInModule(v.getId(), m)
86-
)
87-
} or
79+
TModuleVariableNode(Module m, GlobalVariable v) { v.getScope() = m } or
8880
/**
8981
* A synthetic node representing that an iterable sequence flows to consumer.
9082
*/
@@ -129,6 +121,20 @@ newtype TNode =
129121
f = any(VariableCapture::CapturedVariable v).getACapturingScope() and
130122
exists(TFunction(f))
131123
} or
124+
/**
125+
* A synthetic node representing the values of the variables captured
126+
* by the callable being called.
127+
*/
128+
TSynthCapturedVariablesArgumentNode(ControlFlowNode callable) {
129+
callable = any(CallNode c).getFunction()
130+
} or
131+
/**
132+
* A synthetic node representing the values of the variables captured
133+
* by the callable being called, after the output has been computed.
134+
*/
135+
TSynthCapturedVariablesArgumentPostUpdateNode(ControlFlowNode callable) {
136+
callable = any(CallNode c).getFunction()
137+
} or
132138
/** A synthetic node representing the values of variables captured by a comprehension. */
133139
TSynthCompCapturedVariablesArgumentNode(Comp comp) {
134140
comp.getFunction() = any(VariableCapture::CapturedVariable v).getACapturingScope()
@@ -347,27 +353,51 @@ abstract class ArgumentNode extends Node {
347353
final ExtractedDataFlowCall getCall() { this.argumentOf(result, _) }
348354
}
349355

356+
/** Gets an overapproximation of the argument nodes that are included in `getCallArg`. */
357+
Node getCallArgApproximation() {
358+
// pre-update nodes for calls
359+
result = any(CallCfgNode c).(PostUpdateNode).getPreUpdateNode()
360+
or
361+
// self parameters in methods
362+
exists(Class c | result.asExpr() = c.getAMethod().getArg(0))
363+
or
364+
// the object part of an attribute expression (which might be a bound method)
365+
result.asCfgNode() = any(AttrNode a).getObject()
366+
or
367+
// the function part of any call
368+
result.asCfgNode() = any(CallNode c).getFunction()
369+
}
370+
371+
/** Gets the extracted argument nodes that do not rely on `getCallArg`. */
372+
private Node implicitArgumentNode() {
373+
// for potential summaries we allow all normal call arguments
374+
normalCallArg(_, result, _)
375+
or
376+
// and self arguments
377+
result.asCfgNode() = any(CallNode c).getFunction().(AttrNode).getObject()
378+
or
379+
// for comprehensions, we allow the synthetic `iterable` argument
380+
result.asExpr() = any(Comp c).getIterable()
381+
}
382+
350383
/**
351384
* A data flow node that represents a call argument found in the source code.
352385
*/
353386
class ExtractedArgumentNode extends ArgumentNode {
354387
ExtractedArgumentNode() {
355-
// for resolved calls, we need to allow all argument nodes
356-
getCallArg(_, _, _, this, _)
357-
or
358-
// for potential summaries we allow all normal call arguments
359-
normalCallArg(_, this, _)
388+
this = getCallArgApproximation()
360389
or
361-
// and self arguments
362-
this.asCfgNode() = any(CallNode c).getFunction().(AttrNode).getObject()
363-
or
364-
// for comprehensions, we allow the synthetic `iterable` argument
365-
this.asExpr() = any(Comp c).getIterable()
390+
this = implicitArgumentNode()
366391
}
367392

368393
final override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
369394
this = call.getArgument(pos) and
370-
call instanceof ExtractedDataFlowCall
395+
call instanceof ExtractedDataFlowCall and
396+
(
397+
this = implicitArgumentNode()
398+
or
399+
this = getCallArgApproximation() and getCallArg(_, _, _, this, _)
400+
)
371401
}
372402
}
373403

@@ -440,13 +470,17 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
440470

441471
/** Gets a node that reads this variable. */
442472
Node getARead() {
443-
result.asCfgNode() = var.getALoad().getAFlowNode() and
444-
// Ignore reads that happen when the module is imported. These are only executed once.
445-
not result.getScope() = mod
473+
result = this.getALocalRead()
446474
or
447475
this = import_star_read(result)
448476
}
449477

478+
/** Gets a node that reads this variable, excluding reads that happen through `from ... import *`. */
479+
Node getALocalRead() {
480+
result.asCfgNode() = var.getALoad().getAFlowNode() and
481+
not result.getScope() = mod
482+
}
483+
450484
/** Gets an `EssaNode` that corresponds to an assignment of this global variable. */
451485
Node getAWrite() {
452486
any(EssaNodeDefinition def).definedBy(var, result.asCfgNode().(DefinitionNode))
@@ -466,8 +500,6 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
466500
override Location getLocation() { result = mod.getLocation() }
467501
}
468502

469-
private predicate isAccessedThroughImportStar(Module m) { m = ImportStar::getStarImported(_) }
470-
471503
private ModuleVariableNode import_star_read(Node n) {
472504
resolved_import_star_module(result.getModule(), result.getVariable().getId(), n)
473505
}

python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ class LocalSourceNode extends Node {
6767
or
6868
// We explicitly include any read of a global variable, as some of these may have local flow going
6969
// into them.
70-
this = any(ModuleVariableNode mvn).getARead()
70+
this = any(ModuleVariableNode v).getALocalRead()
7171
or
7272
// We include all scope entry definitions, as these act as the local source within the scope they
7373
// enter.
@@ -248,7 +248,7 @@ private module Cached {
248248
pragma[nomagic]
249249
private predicate localSourceFlowStep(Node nodeFrom, Node nodeTo) {
250250
simpleLocalFlowStep(nodeFrom, nodeTo, _) and
251-
not nodeTo = any(ModuleVariableNode v).getARead()
251+
not nodeTo = any(ModuleVariableNode v).getALocalRead()
252252
}
253253

254254
/**

python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,12 @@ private Flow::ClosureNode asClosureNode(Node n) {
114114
result.(Flow::ExprNode).getExpr().getNode() = comp
115115
)
116116
or
117+
// For captured variable argument nodes (and their post-update variants), we use the closure node
118+
// for the underlying node.
119+
result = asClosureNode(n.(SynthCapturedVariablesArgumentNode).getUnderlyingNode())
120+
or
121+
result = asClosureNode(n.(SynthCapturedVariablesArgumentPostUpdateNode).getUnderlyingNode())
122+
or
117123
// TODO: Should the `Comp`s above be excluded here?
118124
result.(Flow::ExprNode).getExpr() = n.(CfgNode).getNode()
119125
or

python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ module MaximalFlowTest implements FlowTestSig {
88

99
predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
1010
source != sink and
11-
MaximalFlows::flow(source, sink)
11+
MaximalFlows::flow(source, sink) and
12+
// exclude ModuleVariableNodes (which have location 0:0:0:0)
13+
not sink instanceof DataFlow::ModuleVariableNode
1214
}
1315
}
1416

@@ -33,7 +35,7 @@ module MaximalFlowsConfig implements DataFlow::ConfigSig {
3335
predicate isSink(DataFlow::Node node) {
3436
exists(node.getLocation().getFile().getRelativePath()) and
3537
not any(CallNode c).getArg(_) = node.asCfgNode() and
36-
not node instanceof DataFlow::ArgumentNode and
38+
not isArgumentNode(node, _, _) and
3739
not node.asCfgNode().(NameNode).getId().matches("SINK%") and
3840
not DataFlow::localFlowStep(node, _)
3941
}

python/ql/lib/utils/test/dataflow/callGraphConfig.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ module CallGraphConfig implements DataFlow::ConfigSig {
99
predicate isSource(DataFlow::Node node) {
1010
node instanceof DataFlowPrivate::ReturnNode
1111
or
12-
node instanceof DataFlow::ArgumentNode
12+
DataFlowPrivate::isArgumentNode(node, _, _)
1313
}
1414

1515
predicate isSink(DataFlow::Node node) {

python/ql/src/Classes/InitCallsSubclass/InitCallsSubclassMethod.ql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import python
1616
import semmle.python.dataflow.new.DataFlow
1717
import semmle.python.dataflow.new.internal.DataFlowDispatch
18+
import semmle.python.dataflow.new.internal.DataFlowPrivate
1819

1920
predicate initSelfCallOverridden(
2021
Function init, DataFlow::Node self, DataFlow::MethodCallNode call, Function target,
@@ -39,7 +40,7 @@ predicate readsFromSelf(Function method) {
3940
self.getParameter() = method.getArg(0) and
4041
DataFlow::localFlow(self, sink)
4142
|
42-
sink instanceof DataFlow::ArgumentNode
43+
isArgumentNode(sink, _, _)
4344
or
4445
sink = any(DataFlow::AttrRead a).getObject()
4546
)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
| test.py:5:9:5:16 | ControlFlowNode for __init__ | test.py:4:1:4:20 | ControlFlowNode for ClassExpr | __init__ | test.py:5:5:5:28 | ControlFlowNode for FunctionExpr |
22
| test.py:6:9:6:16 | ControlFlowNode for Attribute | test.py:6:9:6:12 | ControlFlowNode for self | foo | test.py:6:20:6:22 | ControlFlowNode for foo |
3+
| test.py:9:1:9:9 | ControlFlowNode for Attribute | test.py:0:0:0:0 | ModuleVariableNode in Module test for myobj | foo | test.py:9:13:9:17 | ControlFlowNode for StringLiteral |
34
| test.py:9:1:9:9 | ControlFlowNode for Attribute | test.py:9:1:9:5 | ControlFlowNode for myobj | foo | test.py:9:13:9:17 | ControlFlowNode for StringLiteral |
45
| test.py:12:1:12:25 | ControlFlowNode for setattr() | test.py:12:9:12:13 | ControlFlowNode for myobj | foo | test.py:12:23:12:24 | ControlFlowNode for IntegerLiteral |

0 commit comments

Comments
 (0)