C++: Remove commonTaintStep from DefaultTaintTracking.

MathiasVP · MathiasVP · commit 3914a93504bb · 2021-03-17T11:56:59.000+01:00
diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll
@@ -74,10 +74,6 @@ private class DefaultTaintTrackingCfg extends TaintTracking::Configuration {
 
   override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
 
-  override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
-    commonTaintStep(n1, n2)
-  }
-
   override predicate isSanitizer(DataFlow::Node node) { nodeIsBarrier(node) }
 
   override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
@@ -93,8 +89,6 @@ private class ToGlobalVarTaintTrackingCfg extends TaintTracking::Configuration {
   }
 
   override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
-    commonTaintStep(n1, n2)
-    or
     writesVariable(n1.asInstruction(), n2.asVariable().(GlobalOrNamespaceVariable))
     or
     readsVariable(n2.asInstruction(), n1.asVariable().(GlobalOrNamespaceVariable))
@@ -117,8 +111,6 @@ private class FromGlobalVarTaintTrackingCfg extends TaintTracking2::Configuratio
   override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
 
   override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
-    commonTaintStep(n1, n2)
-    or
     // Additional step for flow out of variables. There is no flow _into_
     // variables in this configuration, so this step only serves to take flow
     // out of a variable that's a source.
@@ -227,207 +219,6 @@ private predicate nodeIsBarrierIn(DataFlow::Node node) {
   )
 }
 
-cached
-private predicate commonTaintStep(DataFlow::Node fromNode, DataFlow::Node toNode) {
-  operandToInstructionTaintStep(fromNode.asOperand(), toNode.asInstruction())
-  or
-  instructionToOperandTaintStep(fromNode.asInstruction(), toNode.asOperand())
-}
-
-private predicate instructionToOperandTaintStep(Instruction fromInstr, Operand toOperand) {
-  // Propagate flow from the definition of an operand to the operand, even when the overlap is inexact.
-  // We only do this in certain cases:
-  // 1. The instruction's result must not be conflated, and
-  // 2. The instruction's result type is one the types where we expect element-to-object flow. Currently
-  // this is array types and union types. This matches the other two cases of element-to-object flow in
-  // `DefaultTaintTracking`.
-  toOperand.getAnyDef() = fromInstr and
-  not fromInstr.isResultConflated() and
-  (
-    fromInstr.getResultType() instanceof ArrayType or
-    fromInstr.getResultType() instanceof Union
-  )
-  or
-  exists(ReadSideEffectInstruction readInstr |
-    fromInstr = readInstr.getArgumentDef() and
-    toOperand = readInstr.getSideEffectOperand()
-  )
-}
-
-private predicate operandToInstructionTaintStep(Operand fromOperand, Instruction toInstr) {
-  // Expressions computed from tainted data are also tainted
-  exists(CallInstruction call, int argIndex | call = toInstr |
-    isPureFunction(call.getStaticCallTarget().getName()) and
-    fromOperand = getACallArgumentOrIndirection(call, argIndex) and
-    forall(Operand argOperand | argOperand = call.getAnArgumentOperand() |
-      argOperand = getACallArgumentOrIndirection(call, argIndex) or
-      predictableInstruction(argOperand.getAnyDef())
-    ) and
-    // flow through `strlen` tends to cause dubious results, if the length is
-    // bounded.
-    not call.getStaticCallTarget().getName() = "strlen"
-  )
-  or
-  // Flow from argument to return value
-  toInstr =
-    any(CallInstruction call |
-      exists(int indexIn |
-        modelTaintToReturnValue(call.getStaticCallTarget(), indexIn) and
-        fromOperand = getACallArgumentOrIndirection(call, indexIn) and
-        not predictableOnlyFlow(call.getStaticCallTarget().getName())
-      )
-    )
-  or
-  // Flow from input argument to output argument
-  // TODO: This won't work in practice as long as all aliased memory is tracked
-  // together in a single virtual variable.
-  // TODO: Will this work on the test for `TaintedPath.ql`, where the output arg
-  // is a pointer addition expression?
-  toInstr =
-    any(WriteSideEffectInstruction outInstr |
-      exists(CallInstruction call, int indexIn, int indexOut |
-        modelTaintToParameter(call.getStaticCallTarget(), indexIn, indexOut) and
-        fromOperand = getACallArgumentOrIndirection(call, indexIn) and
-        outInstr.getIndex() = indexOut and
-        outInstr.getPrimaryInstruction() = call
-      )
-    )
-  or
-  // Flow through pointer dereference
-  toInstr.(LoadInstruction).getSourceAddressOperand() = fromOperand
-  or
-  // Flow through partial reads of arrays and unions
-  toInstr.(LoadInstruction).getSourceValueOperand() = fromOperand and
-  exists(Instruction fromInstr | fromInstr = fromOperand.getAnyDef() |
-    not fromInstr.isResultConflated() and
-    (
-      fromInstr.getResultType() instanceof ArrayType or
-      fromInstr.getResultType() instanceof Union
-    )
-  )
-  or
-  // Unary instructions tend to preserve enough information in practice that we
-  // want taint to flow through.
-  // The exception is `FieldAddressInstruction`. Together with the rule for
-  // `LoadInstruction` above and for `ChiInstruction` below, flow through
-  // `FieldAddressInstruction` could cause flow into one field to come out an
-  // unrelated field. This would happen across function boundaries, where the IR
-  // would not be able to match loads to stores.
-  toInstr.(UnaryInstruction).getUnaryOperand() = fromOperand and
-  (
-    not toInstr instanceof FieldAddressInstruction
-    or
-    toInstr.(FieldAddressInstruction).getField().getDeclaringType() instanceof Union
-  )
-  or
-  // Flow from an element to an array or union that contains it.
-  toInstr.(ChiInstruction).getPartialOperand() = fromOperand and
-  not toInstr.isResultConflated() and
-  exists(Type t | toInstr.getResultLanguageType().hasType(t, false) |
-    t instanceof Union
-    or
-    t instanceof ArrayType
-  )
-  or
-  exists(BinaryInstruction bin |
-    bin = toInstr and
-    predictableInstruction(toInstr.getAnOperand().getDef()) and
-    fromOperand = toInstr.getAnOperand()
-  )
-  or
-  // This is part of the translation of `a[i]`, where we want taint to flow
-  // from `a`.
-  toInstr.(PointerAddInstruction).getLeftOperand() = fromOperand
-  or
-  // Until we have flow through indirections across calls, we'll take flow out
-  // of the indirection and into the argument.
-  // When we get proper flow through indirections across calls, this code can be
-  // moved to `adjusedSink` or possibly into the `DataFlow::ExprNode` class.
-  exists(ReadSideEffectInstruction read |
-    read.getSideEffectOperand() = fromOperand and
-    read.getArgumentDef() = toInstr
-  )
-  or
-  // Until we have from through indirections across calls, we'll take flow out
-  // of the parameter and into its indirection.
-  // `InitializeIndirectionInstruction` only has a single operand: the address of the
-  // value whose indirection we are initializing. When initializing an indirection of a parameter `p`,
-  // the IR looks like this:
-  // ```
-  // m1 = InitializeParameter[p] : &r1
-  // r2 = Load[p] : r2, m1
-  // m3 = InitializeIndirection[p] : &r2
-  // ```
-  // So by having flow from `r2` to `m3` we're enabling flow from `m1` to `m3`. This relies on the
-  // `LoadOperand`'s overlap being exact.
-  toInstr.(InitializeIndirectionInstruction).getAnOperand() = fromOperand
-}
-
-/**
- * Returns the index of the side effect instruction corresponding to the specified function output,
- * if one exists.
- */
-private int getWriteSideEffectIndex(FunctionOutput output) {
-  output.isParameterDeref(result)
-  or
-  output.isQualifierObject() and result = -1
-}
-
-/**
- * Get an operand that goes into argument `argumentIndex` of `call`. This
- * can be either directly or through one pointer indirection.
- */
-private Operand getACallArgumentOrIndirection(CallInstruction call, int argumentIndex) {
-  result = call.getPositionalArgumentOperand(argumentIndex)
-  or
-  exists(ReadSideEffectInstruction readSE |
-    // TODO: why are read side effect operands imprecise?
-    result = readSE.getSideEffectOperand() and
-    readSE.getPrimaryInstruction() = call and
-    readSE.getIndex() = argumentIndex
-  )
-}
-
-private predicate modelTaintToParameter(Function f, int parameterIn, int parameterOut) {
-  exists(FunctionInput modelIn, FunctionOutput modelOut |
-    (
-      f.(DataFlowFunction).hasDataFlow(modelIn, modelOut)
-      or
-      f.(TaintFunction).hasTaintFlow(modelIn, modelOut)
-    ) and
-    (modelIn.isParameter(parameterIn) or modelIn.isParameterDeref(parameterIn)) and
-    parameterOut = getWriteSideEffectIndex(modelOut)
-  )
-}
-
-private predicate modelTaintToReturnValue(Function f, int parameterIn) {
-  // Taint flow from parameter to return value
-  exists(FunctionInput modelIn, FunctionOutput modelOut |
-    f.(TaintFunction).hasTaintFlow(modelIn, modelOut) and
-    (modelIn.isParameter(parameterIn) or modelIn.isParameterDeref(parameterIn)) and
-    (modelOut.isReturnValue() or modelOut.isReturnValueDeref())
-  )
-  or
-  // Data flow (not taint flow) to where the return value points. For the time
-  // being we will conflate pointers and objects in taint tracking.
-  exists(FunctionInput modelIn, FunctionOutput modelOut |
-    f.(DataFlowFunction).hasDataFlow(modelIn, modelOut) and
-    (modelIn.isParameter(parameterIn) or modelIn.isParameterDeref(parameterIn)) and
-    modelOut.isReturnValueDeref()
-  )
-  or
-  // Taint flow from one argument to another and data flow from an argument to a
-  // return value. This happens in functions like `strcat` and `memcpy`. We
-  // could model this flow in two separate steps, but that would add reverse
-  // flow from the write side-effect to the call instruction, which may not be
-  // desirable.
-  exists(int parameterMid, InParameter modelMid, OutReturnValue returnOut |
-    modelTaintToParameter(f, parameterIn, parameterMid) and
-    modelMid.isParameter(parameterMid) and
-    f.(DataFlowFunction).hasDataFlow(modelMid, returnOut)
-  )
-}
-
 private Element adjustedSink(DataFlow::Node sink) {
   // TODO: is it more appropriate to use asConvertedExpr here and avoid
   // `getConversion*`? Or will that cause us to miss some cases where there's