@@ -74,10 +74,6 @@ private class DefaultTaintTrackingCfg extends TaintTracking::Configuration {
7474
7575 override predicate isSink ( DataFlow:: Node sink ) { exists ( adjustedSink ( sink ) ) }
7676
77- override predicate isAdditionalTaintStep ( DataFlow:: Node n1 , DataFlow:: Node n2 ) {
78- commonTaintStep ( n1 , n2 )
79- }
80-
8177 override predicate isSanitizer ( DataFlow:: Node node ) { nodeIsBarrier ( node ) }
8278
8379 override predicate isSanitizerIn ( DataFlow:: Node node ) { nodeIsBarrierIn ( node ) }
@@ -93,8 +89,6 @@ private class ToGlobalVarTaintTrackingCfg extends TaintTracking::Configuration {
9389 }
9490
9591 override predicate isAdditionalTaintStep ( DataFlow:: Node n1 , DataFlow:: Node n2 ) {
96- commonTaintStep ( n1 , n2 )
97- or
9892 writesVariable ( n1 .asInstruction ( ) , n2 .asVariable ( ) .( GlobalOrNamespaceVariable ) )
9993 or
10094 readsVariable ( n2 .asInstruction ( ) , n1 .asVariable ( ) .( GlobalOrNamespaceVariable ) )
@@ -117,8 +111,6 @@ private class FromGlobalVarTaintTrackingCfg extends TaintTracking2::Configuratio
117111 override predicate isSink ( DataFlow:: Node sink ) { exists ( adjustedSink ( sink ) ) }
118112
119113 override predicate isAdditionalTaintStep ( DataFlow:: Node n1 , DataFlow:: Node n2 ) {
120- commonTaintStep ( n1 , n2 )
121- or
122114 // Additional step for flow out of variables. There is no flow _into_
123115 // variables in this configuration, so this step only serves to take flow
124116 // out of a variable that's a source.
@@ -227,207 +219,6 @@ private predicate nodeIsBarrierIn(DataFlow::Node node) {
227219 )
228220}
229221
230- cached
231- private predicate commonTaintStep ( DataFlow:: Node fromNode , DataFlow:: Node toNode ) {
232- operandToInstructionTaintStep ( fromNode .asOperand ( ) , toNode .asInstruction ( ) )
233- or
234- instructionToOperandTaintStep ( fromNode .asInstruction ( ) , toNode .asOperand ( ) )
235- }
236-
237- private predicate instructionToOperandTaintStep ( Instruction fromInstr , Operand toOperand ) {
238- // Propagate flow from the definition of an operand to the operand, even when the overlap is inexact.
239- // We only do this in certain cases:
240- // 1. The instruction's result must not be conflated, and
241- // 2. The instruction's result type is one the types where we expect element-to-object flow. Currently
242- // this is array types and union types. This matches the other two cases of element-to-object flow in
243- // `DefaultTaintTracking`.
244- toOperand .getAnyDef ( ) = fromInstr and
245- not fromInstr .isResultConflated ( ) and
246- (
247- fromInstr .getResultType ( ) instanceof ArrayType or
248- fromInstr .getResultType ( ) instanceof Union
249- )
250- or
251- exists ( ReadSideEffectInstruction readInstr |
252- fromInstr = readInstr .getArgumentDef ( ) and
253- toOperand = readInstr .getSideEffectOperand ( )
254- )
255- }
256-
257- private predicate operandToInstructionTaintStep ( Operand fromOperand , Instruction toInstr ) {
258- // Expressions computed from tainted data are also tainted
259- exists ( CallInstruction call , int argIndex | call = toInstr |
260- isPureFunction ( call .getStaticCallTarget ( ) .getName ( ) ) and
261- fromOperand = getACallArgumentOrIndirection ( call , argIndex ) and
262- forall ( Operand argOperand | argOperand = call .getAnArgumentOperand ( ) |
263- argOperand = getACallArgumentOrIndirection ( call , argIndex ) or
264- predictableInstruction ( argOperand .getAnyDef ( ) )
265- ) and
266- // flow through `strlen` tends to cause dubious results, if the length is
267- // bounded.
268- not call .getStaticCallTarget ( ) .getName ( ) = "strlen"
269- )
270- or
271- // Flow from argument to return value
272- toInstr =
273- any ( CallInstruction call |
274- exists ( int indexIn |
275- modelTaintToReturnValue ( call .getStaticCallTarget ( ) , indexIn ) and
276- fromOperand = getACallArgumentOrIndirection ( call , indexIn ) and
277- not predictableOnlyFlow ( call .getStaticCallTarget ( ) .getName ( ) )
278- )
279- )
280- or
281- // Flow from input argument to output argument
282- // TODO: This won't work in practice as long as all aliased memory is tracked
283- // together in a single virtual variable.
284- // TODO: Will this work on the test for `TaintedPath.ql`, where the output arg
285- // is a pointer addition expression?
286- toInstr =
287- any ( WriteSideEffectInstruction outInstr |
288- exists ( CallInstruction call , int indexIn , int indexOut |
289- modelTaintToParameter ( call .getStaticCallTarget ( ) , indexIn , indexOut ) and
290- fromOperand = getACallArgumentOrIndirection ( call , indexIn ) and
291- outInstr .getIndex ( ) = indexOut and
292- outInstr .getPrimaryInstruction ( ) = call
293- )
294- )
295- or
296- // Flow through pointer dereference
297- toInstr .( LoadInstruction ) .getSourceAddressOperand ( ) = fromOperand
298- or
299- // Flow through partial reads of arrays and unions
300- toInstr .( LoadInstruction ) .getSourceValueOperand ( ) = fromOperand and
301- exists ( Instruction fromInstr | fromInstr = fromOperand .getAnyDef ( ) |
302- not fromInstr .isResultConflated ( ) and
303- (
304- fromInstr .getResultType ( ) instanceof ArrayType or
305- fromInstr .getResultType ( ) instanceof Union
306- )
307- )
308- or
309- // Unary instructions tend to preserve enough information in practice that we
310- // want taint to flow through.
311- // The exception is `FieldAddressInstruction`. Together with the rule for
312- // `LoadInstruction` above and for `ChiInstruction` below, flow through
313- // `FieldAddressInstruction` could cause flow into one field to come out an
314- // unrelated field. This would happen across function boundaries, where the IR
315- // would not be able to match loads to stores.
316- toInstr .( UnaryInstruction ) .getUnaryOperand ( ) = fromOperand and
317- (
318- not toInstr instanceof FieldAddressInstruction
319- or
320- toInstr .( FieldAddressInstruction ) .getField ( ) .getDeclaringType ( ) instanceof Union
321- )
322- or
323- // Flow from an element to an array or union that contains it.
324- toInstr .( ChiInstruction ) .getPartialOperand ( ) = fromOperand and
325- not toInstr .isResultConflated ( ) and
326- exists ( Type t | toInstr .getResultLanguageType ( ) .hasType ( t , false ) |
327- t instanceof Union
328- or
329- t instanceof ArrayType
330- )
331- or
332- exists ( BinaryInstruction bin |
333- bin = toInstr and
334- predictableInstruction ( toInstr .getAnOperand ( ) .getDef ( ) ) and
335- fromOperand = toInstr .getAnOperand ( )
336- )
337- or
338- // This is part of the translation of `a[i]`, where we want taint to flow
339- // from `a`.
340- toInstr .( PointerAddInstruction ) .getLeftOperand ( ) = fromOperand
341- or
342- // Until we have flow through indirections across calls, we'll take flow out
343- // of the indirection and into the argument.
344- // When we get proper flow through indirections across calls, this code can be
345- // moved to `adjusedSink` or possibly into the `DataFlow::ExprNode` class.
346- exists ( ReadSideEffectInstruction read |
347- read .getSideEffectOperand ( ) = fromOperand and
348- read .getArgumentDef ( ) = toInstr
349- )
350- or
351- // Until we have from through indirections across calls, we'll take flow out
352- // of the parameter and into its indirection.
353- // `InitializeIndirectionInstruction` only has a single operand: the address of the
354- // value whose indirection we are initializing. When initializing an indirection of a parameter `p`,
355- // the IR looks like this:
356- // ```
357- // m1 = InitializeParameter[p] : &r1
358- // r2 = Load[p] : r2, m1
359- // m3 = InitializeIndirection[p] : &r2
360- // ```
361- // So by having flow from `r2` to `m3` we're enabling flow from `m1` to `m3`. This relies on the
362- // `LoadOperand`'s overlap being exact.
363- toInstr .( InitializeIndirectionInstruction ) .getAnOperand ( ) = fromOperand
364- }
365-
366- /**
367- * Returns the index of the side effect instruction corresponding to the specified function output,
368- * if one exists.
369- */
370- private int getWriteSideEffectIndex ( FunctionOutput output ) {
371- output .isParameterDeref ( result )
372- or
373- output .isQualifierObject ( ) and result = - 1
374- }
375-
376- /**
377- * Get an operand that goes into argument `argumentIndex` of `call`. This
378- * can be either directly or through one pointer indirection.
379- */
380- private Operand getACallArgumentOrIndirection ( CallInstruction call , int argumentIndex ) {
381- result = call .getPositionalArgumentOperand ( argumentIndex )
382- or
383- exists ( ReadSideEffectInstruction readSE |
384- // TODO: why are read side effect operands imprecise?
385- result = readSE .getSideEffectOperand ( ) and
386- readSE .getPrimaryInstruction ( ) = call and
387- readSE .getIndex ( ) = argumentIndex
388- )
389- }
390-
391- private predicate modelTaintToParameter ( Function f , int parameterIn , int parameterOut ) {
392- exists ( FunctionInput modelIn , FunctionOutput modelOut |
393- (
394- f .( DataFlowFunction ) .hasDataFlow ( modelIn , modelOut )
395- or
396- f .( TaintFunction ) .hasTaintFlow ( modelIn , modelOut )
397- ) and
398- ( modelIn .isParameter ( parameterIn ) or modelIn .isParameterDeref ( parameterIn ) ) and
399- parameterOut = getWriteSideEffectIndex ( modelOut )
400- )
401- }
402-
403- private predicate modelTaintToReturnValue ( Function f , int parameterIn ) {
404- // Taint flow from parameter to return value
405- exists ( FunctionInput modelIn , FunctionOutput modelOut |
406- f .( TaintFunction ) .hasTaintFlow ( modelIn , modelOut ) and
407- ( modelIn .isParameter ( parameterIn ) or modelIn .isParameterDeref ( parameterIn ) ) and
408- ( modelOut .isReturnValue ( ) or modelOut .isReturnValueDeref ( ) )
409- )
410- or
411- // Data flow (not taint flow) to where the return value points. For the time
412- // being we will conflate pointers and objects in taint tracking.
413- exists ( FunctionInput modelIn , FunctionOutput modelOut |
414- f .( DataFlowFunction ) .hasDataFlow ( modelIn , modelOut ) and
415- ( modelIn .isParameter ( parameterIn ) or modelIn .isParameterDeref ( parameterIn ) ) and
416- modelOut .isReturnValueDeref ( )
417- )
418- or
419- // Taint flow from one argument to another and data flow from an argument to a
420- // return value. This happens in functions like `strcat` and `memcpy`. We
421- // could model this flow in two separate steps, but that would add reverse
422- // flow from the write side-effect to the call instruction, which may not be
423- // desirable.
424- exists ( int parameterMid , InParameter modelMid , OutReturnValue returnOut |
425- modelTaintToParameter ( f , parameterIn , parameterMid ) and
426- modelMid .isParameter ( parameterMid ) and
427- f .( DataFlowFunction ) .hasDataFlow ( modelMid , returnOut )
428- )
429- }
430-
431222private Element adjustedSink ( DataFlow:: Node sink ) {
432223 // TODO: is it more appropriate to use asConvertedExpr here and avoid
433224 // `getConversion*`? Or will that cause us to miss some cases where there's
0 commit comments