@@ -204,16 +204,27 @@ private predicate nodeIsBarrierIn(DataFlow::Node node) {
204204
205205cached
206206private predicate commonTaintStep ( DataFlow:: Node fromNode , DataFlow:: Node toNode ) {
207- instructionToInstructionTaintStep ( fromNode .asInstruction ( ) , toNode .asInstruction ( ) )
208- or
209207 operandToInstructionTaintStep ( fromNode .asOperand ( ) , toNode .asInstruction ( ) )
210208 or
211- operandToOperandTaintStep ( fromNode .asOperand ( ) , toNode .asOperand ( ) )
209+ instructionToOperandTaintStep ( fromNode .asInstruction ( ) , toNode .asOperand ( ) )
212210}
213211
214- private predicate operandToOperandTaintStep ( Operand fromOperand , Operand toOperand ) {
212+ private predicate instructionToOperandTaintStep ( Instruction fromInstr , Operand toOperand ) {
213+ // Propagate flow from the definition of an operand to the operand, even when the overlap is inexact.
214+ // We only do this in certain cases:
215+ // 1. The instruction's result must not be conflated, and
216+ // 2. The instruction's result type is one the types where we expect element-to-object flow. Currently
217+ // this is array types and union types. This matches the other two cases of element-to-object flow in
218+ // `DefaultTaintTracking`.
219+ toOperand .getAnyDef ( ) = fromInstr and
220+ not fromInstr .isResultConflated ( ) and
221+ (
222+ fromInstr .getResultType ( ) instanceof ArrayType or
223+ fromInstr .getResultType ( ) instanceof Union
224+ )
225+ or
215226 exists ( ReadSideEffectInstruction readInstr |
216- fromOperand = readInstr .getArgumentOperand ( ) and
227+ fromInstr = readInstr .getArgumentDef ( ) and
217228 toOperand = readInstr .getSideEffectOperand ( )
218229 )
219230}
@@ -256,18 +267,18 @@ private predicate operandToInstructionTaintStep(Operand fromOperand, Instruction
256267 outInstr .getPrimaryInstruction ( ) = call
257268 )
258269 )
259- }
260-
261- private predicate instructionToInstructionTaintStep ( Instruction i1 , Instruction i2 ) {
270+ or
262271 // Flow through pointer dereference
263- i2 .( LoadInstruction ) .getSourceAddress ( ) = i1
272+ toInstr .( LoadInstruction ) .getSourceAddressOperand ( ) = fromOperand
264273 or
265274 // Flow through partial reads of arrays and unions
266- i2 .( LoadInstruction ) .getSourceValueOperand ( ) .getAnyDef ( ) = i1 and
267- not i1 .isResultConflated ( ) and
268- (
269- i1 .getResultType ( ) instanceof ArrayType or
270- i1 .getResultType ( ) instanceof Union
275+ toInstr .( LoadInstruction ) .getSourceValueOperand ( ) = fromOperand and
276+ exists ( Instruction fromInstr | fromInstr = fromOperand .getAnyDef ( ) |
277+ not fromInstr .isResultConflated ( ) and
278+ (
279+ fromInstr .getResultType ( ) instanceof ArrayType or
280+ fromInstr .getResultType ( ) instanceof Union
281+ )
271282 )
272283 or
273284 // Unary instructions tend to preserve enough information in practice that we
@@ -277,63 +288,54 @@ private predicate instructionToInstructionTaintStep(Instruction i1, Instruction
277288 // `FieldAddressInstruction` could cause flow into one field to come out an
278289 // unrelated field. This would happen across function boundaries, where the IR
279290 // would not be able to match loads to stores.
280- i2 .( UnaryInstruction ) .getUnary ( ) = i1 and
291+ toInstr .( UnaryInstruction ) .getUnaryOperand ( ) = fromOperand and
281292 (
282- not i2 instanceof FieldAddressInstruction
293+ not toInstr instanceof FieldAddressInstruction
283294 or
284- i2 .( FieldAddressInstruction ) .getField ( ) .getDeclaringType ( ) instanceof Union
295+ toInstr .( FieldAddressInstruction ) .getField ( ) .getDeclaringType ( ) instanceof Union
285296 )
286297 or
287- // Flow out of definition-by-reference
288- i2 .( ChiInstruction ) .getPartial ( ) = i1 .( WriteSideEffectInstruction ) and
289- not i2 .isResultConflated ( )
290- or
291298 // Flow from an element to an array or union that contains it.
292- i2 .( ChiInstruction ) .getPartial ( ) = i1 and
293- not i2 .isResultConflated ( ) and
294- exists ( Type t | i2 .getResultLanguageType ( ) .hasType ( t , false ) |
299+ toInstr .( ChiInstruction ) .getPartialOperand ( ) = fromOperand and
300+ not toInstr .isResultConflated ( ) and
301+ exists ( Type t | toInstr .getResultLanguageType ( ) .hasType ( t , false ) |
295302 t instanceof Union
296303 or
297304 t instanceof ArrayType
298305 )
299306 or
300307 exists ( BinaryInstruction bin |
301- bin = i2 and
302- predictableInstruction ( i2 .getAnOperand ( ) .getDef ( ) ) and
303- i1 = i2 .getAnOperand ( ) . getDef ( )
308+ bin = toInstr and
309+ predictableInstruction ( toInstr .getAnOperand ( ) .getDef ( ) ) and
310+ fromOperand = toInstr .getAnOperand ( )
304311 )
305312 or
306313 // This is part of the translation of `a[i]`, where we want taint to flow
307314 // from `a`.
308- i2 .( PointerAddInstruction ) .getLeft ( ) = i1
309- or
310- // Until we have from through indirections across calls, we'll take flow out
311- // of the parameter and into its indirection.
312- exists ( IRFunction f , Parameter parameter |
313- i1 = getInitializeParameter ( f , parameter ) and
314- i2 = getInitializeIndirection ( f , parameter )
315- )
315+ toInstr .( PointerAddInstruction ) .getLeftOperand ( ) = fromOperand
316316 or
317317 // Until we have flow through indirections across calls, we'll take flow out
318318 // of the indirection and into the argument.
319319 // When we get proper flow through indirections across calls, this code can be
320320 // moved to `adjusedSink` or possibly into the `DataFlow::ExprNode` class.
321321 exists ( ReadSideEffectInstruction read |
322- read .getAnOperand ( ) . ( SideEffectOperand ) . getAnyDef ( ) = i1 and
323- read .getArgumentDef ( ) = i2
322+ read .getSideEffectOperand ( ) = fromOperand and
323+ read .getArgumentDef ( ) = toInstr
324324 )
325- }
326-
327- pragma [ noinline]
328- private InitializeIndirectionInstruction getInitializeIndirection ( IRFunction f , Parameter p ) {
329- result .getParameter ( ) = p and
330- result .getEnclosingIRFunction ( ) = f
331- }
332-
333- pragma [ noinline]
334- private InitializeParameterInstruction getInitializeParameter ( IRFunction f , Parameter p ) {
335- result .getParameter ( ) = p and
336- result .getEnclosingIRFunction ( ) = f
325+ or
326+ // Until we have from through indirections across calls, we'll take flow out
327+ // of the parameter and into its indirection.
328+ // `InitializeIndirectionInstruction` only has a single operand: the address of the
329+ // value whose indirection we are initializing. When initializing an indirection of a parameter `p`,
330+ // the IR looks like this:
331+ // ```
332+ // m1 = InitializeParameter[p] : &r1
333+ // r2 = Load[p] : r2, m1
334+ // m3 = InitializeIndirection[p] : &r2
335+ // ```
336+ // So by having flow from `r2` to `m3` we're enabling flow from `m1` to `m3`. This relies on the
337+ // `LoadOperand`'s overlap being exact.
338+ toInstr .( InitializeIndirectionInstruction ) .getAnOperand ( ) = fromOperand
337339}
338340
339341/**
0 commit comments