@@ -21,53 +21,104 @@ predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
2121 */
2222cached
2323predicate localAdditionalTaintStep ( DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo ) {
24- localInstructionTaintStep ( nodeFrom .asInstruction ( ) , nodeTo .asInstruction ( ) )
24+ operandToInstructionTaintStep ( nodeFrom .asOperand ( ) , nodeTo .asInstruction ( ) )
2525 or
26- modeledTaintStep ( nodeFrom , nodeTo )
26+ instructionToOperandTaintStep ( nodeFrom .asInstruction ( ) , nodeTo .asOperand ( ) )
27+ }
28+
29+ private predicate instructionToOperandTaintStep ( Instruction fromInstr , Operand toOperand ) {
30+ // Propagate flow from the definition of an operand to the operand, even when the overlap is inexact.
31+ // We only do this in certain cases:
32+ // 1. The instruction's result must not be conflated, and
33+ // 2. The instruction's result type is one the types where we expect element-to-object flow. Currently
34+ // this is array types and union types. This matches the other two cases of element-to-object flow in
35+ // `DefaultTaintTracking`.
36+ toOperand .getAnyDef ( ) = fromInstr and
37+ not fromInstr .isResultConflated ( ) and
38+ (
39+ fromInstr .getResultType ( ) instanceof ArrayType or
40+ fromInstr .getResultType ( ) instanceof Union
41+ )
42+ or
43+ exists ( ReadSideEffectInstruction readInstr |
44+ fromInstr = readInstr .getArgumentDef ( ) and
45+ toOperand = readInstr .getSideEffectOperand ( )
46+ )
47+ or
48+ toOperand .( LoadOperand ) .getAnyDef ( ) = fromInstr
2749}
2850
2951/**
3052 * Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
3153 * (intra-procedural) step.
3254 */
33- private predicate localInstructionTaintStep ( Instruction nodeFrom , Instruction nodeTo ) {
55+ private predicate operandToInstructionTaintStep ( Operand opFrom , Instruction instrTo ) {
3456 // Taint can flow through expressions that alter the value but preserve
3557 // more than one bit of it _or_ expressions that follow data through
3658 // pointer indirections.
37- nodeTo .getAnOperand ( ) . getAnyDef ( ) = nodeFrom and
59+ instrTo .getAnOperand ( ) = opFrom and
3860 (
39- nodeTo instanceof ArithmeticInstruction
40- or
41- nodeTo instanceof BitwiseInstruction
61+ instrTo instanceof ArithmeticInstruction
4262 or
43- nodeTo instanceof PointerArithmeticInstruction
63+ instrTo instanceof BitwiseInstruction
4464 or
45- nodeTo instanceof FieldAddressInstruction
65+ instrTo instanceof PointerArithmeticInstruction
4666 or
4767 // The `CopyInstruction` case is also present in non-taint data flow, but
4868 // that uses `getDef` rather than `getAnyDef`. For taint, we want flow
4969 // from a definition of `myStruct` to a `myStruct.myField` expression.
50- nodeTo instanceof CopyInstruction
70+ instrTo instanceof CopyInstruction
5171 )
5272 or
53- nodeTo .( LoadInstruction ) .getSourceAddress ( ) = nodeFrom
54- or
55- // Flow through partial reads of arrays and unions
56- nodeTo .( LoadInstruction ) .getSourceValueOperand ( ) .getAnyDef ( ) = nodeFrom and
57- not nodeFrom .isResultConflated ( ) and
73+ // Unary instructions tend to preserve enough information in practice that we
74+ // want taint to flow through.
75+ // The exception is `FieldAddressInstruction`. Together with the rules below for
76+ // `LoadInstruction`s and `ChiInstruction`s, flow through `FieldAddressInstruction`
77+ // could cause flow into one field to come out an unrelated field.
78+ // This would happen across function boundaries, where the IR would not be able to
79+ // match loads to stores.
80+ instrTo .( UnaryInstruction ) .getUnaryOperand ( ) = opFrom and
5881 (
59- nodeFrom .getResultType ( ) instanceof ArrayType or
60- nodeFrom .getResultType ( ) instanceof Union
82+ not instrTo instanceof FieldAddressInstruction
83+ or
84+ instrTo .( FieldAddressInstruction ) .getField ( ) .getDeclaringType ( ) instanceof Union
6185 )
6286 or
87+ instrTo .( LoadInstruction ) .getSourceAddressOperand ( ) = opFrom
88+ or
6389 // Flow from an element to an array or union that contains it.
64- nodeTo .( ChiInstruction ) .getPartial ( ) = nodeFrom and
65- not nodeTo .isResultConflated ( ) and
66- exists ( Type t | nodeTo .getResultLanguageType ( ) .hasType ( t , false ) |
90+ instrTo .( ChiInstruction ) .getPartialOperand ( ) = opFrom and
91+ not instrTo .isResultConflated ( ) and
92+ exists ( Type t | instrTo .getResultLanguageType ( ) .hasType ( t , false ) |
6793 t instanceof Union
6894 or
6995 t instanceof ArrayType
7096 )
97+ or
98+ // Until we have flow through indirections across calls, we'll take flow out
99+ // of the indirection and into the argument.
100+ // When we get proper flow through indirections across calls, this code can be
101+ // moved to `adjusedSink` or possibly into the `DataFlow::ExprNode` class.
102+ exists ( ReadSideEffectInstruction read |
103+ read .getSideEffectOperand ( ) = opFrom and
104+ read .getArgumentDef ( ) = instrTo
105+ )
106+ or
107+ // Until we have from through indirections across calls, we'll take flow out
108+ // of the parameter and into its indirection.
109+ // `InitializeIndirectionInstruction` only has a single operand: the address of the
110+ // value whose indirection we are initializing. When initializing an indirection of a parameter `p`,
111+ // the IR looks like this:
112+ // ```
113+ // m1 = InitializeParameter[p] : &r1
114+ // r2 = Load[p] : r2, m1
115+ // m3 = InitializeIndirection[p] : &r2
116+ // ```
117+ // So by having flow from `r2` to `m3` we're enabling flow from `m1` to `m3`. This relies on the
118+ // `LoadOperand`'s overlap being exact.
119+ instrTo .( InitializeIndirectionInstruction ) .getAnOperand ( ) = opFrom
120+ or
121+ modeledTaintStep ( opFrom , instrTo )
71122}
72123
73124/**
@@ -110,17 +161,19 @@ predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
110161 * Holds if taint can flow from `instrIn` to `instrOut` through a call to a
111162 * modeled function.
112163 */
113- predicate modeledTaintStep ( DataFlow :: Node nodeIn , DataFlow :: Node nodeOut ) {
164+ predicate modeledTaintStep ( Operand nodeIn , Instruction nodeOut ) {
114165 exists ( CallInstruction call , TaintFunction func , FunctionInput modelIn , FunctionOutput modelOut |
115166 (
116167 nodeIn = callInput ( call , modelIn )
117168 or
118169 exists ( int n |
119- modelIn .isParameterDeref ( n ) and
120- nodeIn = callInput ( call , any ( InParameter inParam | inParam .getIndex ( ) = n ) )
170+ modelIn .isParameterDerefOrQualifierObject ( n ) and
171+ if n = - 1
172+ then nodeIn = callInput ( call , any ( InQualifierObject inQualifier ) )
173+ else nodeIn = callInput ( call , any ( InParameter inParam | inParam .getIndex ( ) = n ) )
121174 )
122175 ) and
123- nodeOut . asInstruction ( ) = callOutput ( call , modelOut ) and
176+ nodeOut = callOutput ( call , modelOut ) and
124177 call .getStaticCallTarget ( ) = func and
125178 func .hasTaintFlow ( modelIn , modelOut )
126179 )
@@ -135,11 +188,29 @@ predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
135188 int indexMid , InParameter modelMidIn , OutReturnValue modelOut
136189 |
137190 nodeIn = callInput ( call , modelIn ) and
138- nodeOut . asInstruction ( ) = callOutput ( call , modelOut ) and
191+ nodeOut = callOutput ( call , modelOut ) and
139192 call .getStaticCallTarget ( ) = func and
140193 func .( TaintFunction ) .hasTaintFlow ( modelIn , modelMidOut ) and
141194 func .( DataFlowFunction ) .hasDataFlow ( modelMidIn , modelOut ) and
142195 modelMidOut .isParameterDeref ( indexMid ) and
143196 modelMidIn .isParameter ( indexMid )
144197 )
198+ or
199+ // Taint flow from a pointer argument to an output, when the model specifies flow from the deref
200+ // to that output, but the deref is not modeled in the IR for the caller.
201+ exists (
202+ CallInstruction call , ReadSideEffectInstruction read , Function func , FunctionInput modelIn ,
203+ FunctionOutput modelOut
204+ |
205+ read .getSideEffectOperand ( ) = callInput ( call , modelIn ) and
206+ read .getArgumentDef ( ) = nodeIn .getDef ( ) and
207+ not read .getSideEffect ( ) .isResultModeled ( ) and
208+ call .getStaticCallTarget ( ) = func and
209+ (
210+ func .( DataFlowFunction ) .hasDataFlow ( modelIn , modelOut )
211+ or
212+ func .( TaintFunction ) .hasTaintFlow ( modelIn , modelOut )
213+ ) and
214+ nodeOut = callOutput ( call , modelOut )
215+ )
145216}
0 commit comments