@@ -11,6 +11,7 @@ private import semmle.code.cpp.ir.IR
1111private import semmle.code.cpp.controlflow.IRGuards
1212private import semmle.code.cpp.models.interfaces.DataFlow
1313
14+ cached
1415private newtype TIRDataFlowNode =
1516 TInstructionNode ( Instruction i ) or
1617 TOperandNode ( Operand op ) or
@@ -533,11 +534,11 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFr
533534 * data flow. It may have less flow than the `localFlowStep` predicate.
534535 */
535536predicate simpleLocalFlowStep ( Node nodeFrom , Node nodeTo ) {
536- // Instruction -> Instruction flow
537- simpleInstructionLocalFlowStep ( nodeFrom .asInstruction ( ) , nodeTo .asInstruction ( ) )
538- or
539537 // Operand -> Instruction flow
540- simpleOperandLocalFlowStep ( nodeFrom .asOperand ( ) , nodeTo .asInstruction ( ) )
538+ simpleInstructionLocalFlowStep ( nodeFrom .asOperand ( ) , nodeTo .asInstruction ( ) )
539+ or
540+ // Instruction -> Operand flow
541+ simpleOperandLocalFlowStep ( nodeFrom .asInstruction ( ) , nodeTo .asOperand ( ) )
541542}
542543
543544pragma [ noinline]
@@ -549,26 +550,20 @@ private predicate getFieldSizeOfClass(Class c, Type type, int size) {
549550 )
550551}
551552
552- private predicate simpleOperandLocalFlowStep ( Operand opFrom , Instruction iTo ) {
553- // Certain dataflow steps (for instance `PostUpdateNode.getPreUpdateNode()`) generates flow to
554- // operands, so we include dataflow from those operands to the "result" of the instruction (i.e., to
555- // the instruction itself).
556- exists ( PostUpdateNode post |
557- opFrom = post .getPreUpdateNode ( ) .asOperand ( ) and
558- iTo .getAnOperand ( ) = opFrom
553+ private predicate isSingleFieldClass ( Type type , Class cTo ) {
554+ exists ( int size |
555+ cTo .getSize ( ) = size and
556+ getFieldSizeOfClass ( cTo , type , size )
559557 )
560558}
561559
562- cached
563- private predicate simpleInstructionLocalFlowStep ( Instruction iFrom , Instruction iTo ) {
564- iTo . ( CopyInstruction ) . getSourceValue ( ) = iFrom
560+ private predicate simpleOperandLocalFlowStep ( Instruction iFrom , Operand opTo ) {
561+ // Propagate flow from an instruction to its exact uses.
562+ opTo . getDef ( ) = iFrom
565563 or
566- iTo .( PhiInstruction ) .getAnOperand ( ) .getDef ( ) = iFrom
567- or
568- // A read side effect is almost never exact since we don't know exactly how
569- // much memory the callee will read.
570- iTo .( ReadSideEffectInstruction ) .getSideEffectOperand ( ) .getAnyDef ( ) = iFrom and
571- not iFrom .isResultConflated ( )
564+ opTo = any ( ReadSideEffectInstruction read ) .getSideEffectOperand ( ) and
565+ not iFrom .isResultConflated ( ) and
566+ iFrom = opTo .getAnyDef ( )
572567 or
573568 // Loading a single `int` from an `int *` parameter is not an exact load since
574569 // the parameter may point to an entire array rather than a single `int`. The
@@ -582,20 +577,38 @@ private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction
582577 // leads to a phi node.
583578 exists ( InitializeIndirectionInstruction init |
584579 iFrom = init and
585- iTo . ( LoadInstruction ) . getSourceValueOperand ( ) .getAnyDef ( ) = init and
580+ opTo . ( LoadOperand ) .getAnyDef ( ) = init and
586581 // Check that the types match. Otherwise we can get flow from an object to
587582 // its fields, which leads to field conflation when there's flow from other
588583 // fields to the object elsewhere.
589584 init .getParameter ( ) .getType ( ) .getUnspecifiedType ( ) .( DerivedType ) .getBaseType ( ) =
590- iTo .getResultType ( ) .getUnspecifiedType ( )
585+ opTo .getType ( ) .getUnspecifiedType ( )
586+ )
587+ or
588+ // Flow from stores to structs with a single field to a load of that field.
589+ exists ( LoadInstruction load |
590+ load .getSourceValueOperand ( ) = opTo and
591+ opTo .getAnyDef ( ) = iFrom and
592+ isSingleFieldClass ( iFrom .getResultType ( ) , opTo .getType ( ) )
591593 )
594+ }
595+
596+ cached
597+ private predicate simpleInstructionLocalFlowStep ( Operand opFrom , Instruction iTo ) {
598+ iTo .( CopyInstruction ) .getSourceValueOperand ( ) = opFrom
599+ or
600+ iTo .( PhiInstruction ) .getAnInputOperand ( ) = opFrom
601+ or
602+ // A read side effect is almost never exact since we don't know exactly how
603+ // much memory the callee will read.
604+ iTo .( ReadSideEffectInstruction ) .getSideEffectOperand ( ) = opFrom
592605 or
593606 // Treat all conversions as flow, even conversions between different numeric types.
594- iTo .( ConvertInstruction ) .getUnary ( ) = iFrom
607+ iTo .( ConvertInstruction ) .getUnaryOperand ( ) = opFrom
595608 or
596- iTo .( CheckedConvertOrNullInstruction ) .getUnary ( ) = iFrom
609+ iTo .( CheckedConvertOrNullInstruction ) .getUnaryOperand ( ) = opFrom
597610 or
598- iTo .( InheritanceConversionInstruction ) .getUnary ( ) = iFrom
611+ iTo .( InheritanceConversionInstruction ) .getUnaryOperand ( ) = opFrom
599612 or
600613 // A chi instruction represents a point where a new value (the _partial_
601614 // operand) may overwrite an old value (the _total_ operand), but the alias
@@ -608,7 +621,7 @@ private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction
608621 //
609622 // Flow through the partial operand belongs in the taint-tracking libraries
610623 // for now.
611- iTo .getAnOperand ( ) .( ChiTotalOperand ) . getDef ( ) = iFrom
624+ iTo .getAnOperand ( ) .( ChiTotalOperand ) = opFrom
612625 or
613626 // Add flow from write side-effects to non-conflated chi instructions through their
614627 // partial operands. From there, a `readStep` will find subsequent reads of that field.
@@ -623,24 +636,16 @@ private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction
623636 // Here, a `WriteSideEffectInstruction` will provide a new definition for `p->x` after the call to
624637 // `setX`, which will be melded into `p` through a chi instruction.
625638 exists ( ChiInstruction chi | chi = iTo |
626- chi .getPartialOperand ( ) .getDef ( ) = iFrom .( WriteSideEffectInstruction ) and
639+ opFrom .getAnyDef ( ) instanceof WriteSideEffectInstruction and
640+ chi .getPartialOperand ( ) = opFrom and
627641 not chi .isResultConflated ( )
628642 )
629643 or
630- // Flow from stores to structs with a single field to a load of that field.
631- iTo .( LoadInstruction ) .getSourceValueOperand ( ) .getAnyDef ( ) = iFrom and
632- exists ( int size , Type type , Class cTo |
633- type = iFrom .getResultType ( ) and
634- cTo = iTo .getResultType ( ) and
635- cTo .getSize ( ) = size and
636- getFieldSizeOfClass ( cTo , type , size )
637- )
638- or
639644 // Flow through modeled functions
640- modelFlow ( iFrom , iTo )
645+ modelFlow ( opFrom , iTo )
641646}
642647
643- private predicate modelFlow ( Instruction iFrom , Instruction iTo ) {
648+ private predicate modelFlow ( Operand opFrom , Instruction iTo ) {
644649 exists (
645650 CallInstruction call , DataFlowFunction func , FunctionInput modelIn , FunctionOutput modelOut
646651 |
@@ -665,17 +670,17 @@ private predicate modelFlow(Instruction iFrom, Instruction iTo) {
665670 (
666671 exists ( int index |
667672 modelIn .isParameter ( index ) and
668- iFrom = call .getPositionalArgument ( index )
673+ opFrom = call .getPositionalArgumentOperand ( index )
669674 )
670675 or
671676 exists ( int index , ReadSideEffectInstruction read |
672677 modelIn .isParameterDeref ( index ) and
673678 read = getSideEffectFor ( call , index ) and
674- iFrom = read .getSideEffectOperand ( ) . getAnyDef ( )
679+ opFrom = read .getSideEffectOperand ( )
675680 )
676681 or
677682 modelIn .isQualifierAddress ( ) and
678- iFrom = call .getThisArgument ( )
683+ opFrom = call .getThisArgumentOperand ( )
679684 // TODO: add read side effects for qualifiers
680685 )
681686 )
0 commit comments