Skip to content

Commit eb4f1e1

Browse files
committed
C++: Restore some of the lost test results by doing operand -> instruction taint steps in IR TaintTracking.
1 parent 23d3109 commit eb4f1e1

File tree

5 files changed

+62
-64
lines changed

5 files changed

+62
-64
lines changed

cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,7 @@ module TaintedWithPath {
613613
// Step to return value of a modeled function when an input taints the
614614
// dereference of the return value
615615
exists(CallInstruction call, Function func, FunctionInput modelIn, FunctionOutput modelOut |
616-
n1 = callInput(call, modelIn) and
616+
n1.asOperand() = callInput(call, modelIn) and
617617
(
618618
func.(TaintFunction).hasTaintFlow(modelIn, modelOut)
619619
or

cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,30 +9,18 @@ private import semmle.code.cpp.ir.dataflow.DataFlow
99
/**
1010
* Gets the instruction that goes into `input` for `call`.
1111
*/
12-
DataFlow::Node callInput(CallInstruction call, FunctionInput input) {
13-
// A positional argument
12+
Operand callInput(CallInstruction call, FunctionInput input) {
13+
// An argument or qualifier
1414
exists(int index |
15-
result.asInstruction() = call.getPositionalArgument(index) and
16-
input.isParameter(index)
15+
result = call.getArgumentOperand(index) and
16+
input.isParameterOrQualifierAddress(index)
1717
)
1818
or
19-
// A value pointed to by a positional argument
19+
// A value pointed to by an argument or qualifier
2020
exists(ReadSideEffectInstruction read |
21-
result.asOperand() = read.getSideEffectOperand() and
21+
result = read.getSideEffectOperand() and
2222
read.getPrimaryInstruction() = call and
23-
input.isParameterDeref(read.getIndex())
24-
)
25-
or
26-
// The qualifier pointer
27-
result.asInstruction() = call.getThisArgument() and
28-
input.isQualifierAddress()
29-
or
30-
// The qualifier object
31-
exists(ReadSideEffectInstruction read |
32-
result.asOperand() = read.getSideEffectOperand() and
33-
read.getPrimaryInstruction() = call and
34-
read.getIndex() = -1 and
35-
input.isQualifierObject()
23+
input.isParameterDerefOrQualifierObject(read.getIndex())
3624
)
3725
}
3826

@@ -44,19 +32,11 @@ Instruction callOutput(CallInstruction call, FunctionOutput output) {
4432
result = call and
4533
output.isReturnValue()
4634
or
47-
// The side effect of a call on the value pointed to by a positional argument
48-
exists(WriteSideEffectInstruction effect |
49-
result = effect and
50-
effect.getPrimaryInstruction() = call and
51-
output.isParameterDeref(effect.getIndex())
52-
)
53-
or
54-
// The side effect of a call on the qualifier object
35+
// The side effect of a call on the value pointed to by an argument or qualifier
5536
exists(WriteSideEffectInstruction effect |
5637
result = effect and
5738
effect.getPrimaryInstruction() = call and
58-
effect.getIndex() = -1 and
59-
output.isQualifierObject()
39+
output.isParameterDerefOrQualifierObject(effect.getIndex())
6040
)
6141
// TODO: return value dereference
6242
}

cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/TaintTrackingUtil.qll

Lines changed: 47 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -21,53 +21,69 @@ predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
2121
*/
2222
cached
2323
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
24-
localInstructionTaintStep(nodeFrom.asInstruction(), nodeTo.asInstruction())
24+
operandToInstructionTaintStep(nodeFrom.asOperand(), nodeTo.asInstruction())
2525
or
26-
modeledTaintStep(nodeFrom, nodeTo)
26+
instructionToOperandTaintStep(nodeFrom.asInstruction(), nodeTo.asOperand())
27+
}
28+
29+
private predicate instructionToOperandTaintStep(Instruction fromInstr, Operand toOperand) {
30+
// Propagate flow from the definition of an operand to the operand, even when the overlap is inexact.
31+
// We only do this in certain cases:
32+
// 1. The instruction's result must not be conflated, and
33+
// 2. The instruction's result type is one the types where we expect element-to-object flow. Currently
34+
// this is array types and union types. This matches the other two cases of element-to-object flow in
35+
// `DefaultTaintTracking`.
36+
toOperand.getAnyDef() = fromInstr and
37+
not fromInstr.isResultConflated() and
38+
(
39+
fromInstr.getResultType() instanceof ArrayType or
40+
fromInstr.getResultType() instanceof Union
41+
)
42+
or
43+
exists(ReadSideEffectInstruction readInstr |
44+
fromInstr = readInstr.getArgumentDef() and
45+
toOperand = readInstr.getSideEffectOperand()
46+
)
47+
or
48+
toOperand.(LoadOperand).getAnyDef() = fromInstr
2749
}
2850

2951
/**
3052
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
3153
* (intra-procedural) step.
3254
*/
33-
private predicate localInstructionTaintStep(Instruction nodeFrom, Instruction nodeTo) {
55+
private predicate operandToInstructionTaintStep(Operand opFrom, Instruction instrTo) {
3456
// Taint can flow through expressions that alter the value but preserve
3557
// more than one bit of it _or_ expressions that follow data through
3658
// pointer indirections.
37-
nodeTo.getAnOperand().getAnyDef() = nodeFrom and
59+
instrTo.getAnOperand() = opFrom and
3860
(
39-
nodeTo instanceof ArithmeticInstruction
61+
instrTo instanceof ArithmeticInstruction
4062
or
41-
nodeTo instanceof BitwiseInstruction
63+
instrTo instanceof BitwiseInstruction
4264
or
43-
nodeTo instanceof PointerArithmeticInstruction
65+
instrTo instanceof PointerArithmeticInstruction
4466
or
45-
nodeTo instanceof FieldAddressInstruction
67+
instrTo instanceof FieldAddressInstruction
4668
or
4769
// The `CopyInstruction` case is also present in non-taint data flow, but
4870
// that uses `getDef` rather than `getAnyDef`. For taint, we want flow
4971
// from a definition of `myStruct` to a `myStruct.myField` expression.
50-
nodeTo instanceof CopyInstruction
72+
instrTo instanceof CopyInstruction
5173
)
5274
or
53-
nodeTo.(LoadInstruction).getSourceAddress() = nodeFrom
54-
or
55-
// Flow through partial reads of arrays and unions
56-
nodeTo.(LoadInstruction).getSourceValueOperand().getAnyDef() = nodeFrom and
57-
not nodeFrom.isResultConflated() and
58-
(
59-
nodeFrom.getResultType() instanceof ArrayType or
60-
nodeFrom.getResultType() instanceof Union
61-
)
75+
instrTo.(LoadInstruction).getSourceAddressOperand() = opFrom
6276
or
6377
// Flow from an element to an array or union that contains it.
64-
nodeTo.(ChiInstruction).getPartial() = nodeFrom and
65-
not nodeTo.isResultConflated() and
66-
exists(Type t | nodeTo.getResultLanguageType().hasType(t, false) |
78+
instrTo.(ChiInstruction).getPartialOperand() = opFrom and
79+
not instrTo.isResultConflated() and
80+
exists(Type t | instrTo.getResultLanguageType().hasType(t, false) |
6781
t instanceof Union
6882
or
6983
t instanceof ArrayType
7084
)
85+
or
86+
modeledTaintStep(opFrom, instrTo)
7187
}
7288

7389
/**
@@ -110,17 +126,19 @@ predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
110126
* Holds if taint can flow from `instrIn` to `instrOut` through a call to a
111127
* modeled function.
112128
*/
113-
predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
129+
predicate modeledTaintStep(Operand nodeIn, Instruction nodeOut) {
114130
exists(CallInstruction call, TaintFunction func, FunctionInput modelIn, FunctionOutput modelOut |
115131
(
116132
nodeIn = callInput(call, modelIn)
117133
or
118134
exists(int n |
119-
modelIn.isParameterDeref(n) and
120-
nodeIn = callInput(call, any(InParameter inParam | inParam.getIndex() = n))
135+
modelIn.isParameterDerefOrQualifierObject(n) and
136+
if n = -1
137+
then nodeIn = callInput(call, any(InQualifierObject inQualifier))
138+
else nodeIn = callInput(call, any(InParameter inParam | inParam.getIndex() = n))
121139
)
122140
) and
123-
nodeOut.asInstruction() = callOutput(call, modelOut) and
141+
nodeOut = callOutput(call, modelOut) and
124142
call.getStaticCallTarget() = func and
125143
func.hasTaintFlow(modelIn, modelOut)
126144
)
@@ -135,7 +153,7 @@ predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
135153
int indexMid, InParameter modelMidIn, OutReturnValue modelOut
136154
|
137155
nodeIn = callInput(call, modelIn) and
138-
nodeOut.asInstruction() = callOutput(call, modelOut) and
156+
nodeOut = callOutput(call, modelOut) and
139157
call.getStaticCallTarget() = func and
140158
func.(TaintFunction).hasTaintFlow(modelIn, modelMidOut) and
141159
func.(DataFlowFunction).hasDataFlow(modelMidIn, modelOut) and
@@ -149,15 +167,15 @@ predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
149167
CallInstruction call, ReadSideEffectInstruction read, Function func, FunctionInput modelIn,
150168
FunctionOutput modelOut
151169
|
152-
read.getSideEffectOperand() = callInput(call, modelIn).asOperand() and
153-
read.getArgumentDef() = nodeIn.asInstruction() and
170+
read.getSideEffectOperand() = callInput(call, modelIn) and
171+
read.getArgumentDef() = nodeIn.getDef() and
154172
not read.getSideEffect().isResultModeled() and
155173
call.getStaticCallTarget() = func and
156174
(
157175
func.(DataFlowFunction).hasDataFlow(modelIn, modelOut)
158176
or
159177
func.(TaintFunction).hasTaintFlow(modelIn, modelOut)
160178
) and
161-
nodeOut.asInstruction() = callOutput(call, modelOut)
179+
nodeOut = callOutput(call, modelOut)
162180
)
163181
}

cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/annotate_sinks_only/defaulttainttracking.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ int main() {
1313

1414

1515

16-
sink(_strdup(getenv("VAR"))); // $ MISSING: ast,ir
17-
sink(strdup(getenv("VAR"))); // $ ast MISSING: ir
16+
sink(_strdup(getenv("VAR"))); // $ ir MISSING: ast
17+
sink(strdup(getenv("VAR"))); // $ ast,ir
1818
sink(unmodeled_function(getenv("VAR"))); // clean by assumption
1919

2020
char untainted_buf[100] = "";

cpp/ql/test/library-tests/dataflow/taint-tests/taint.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -369,9 +369,9 @@ void test_strdup(char *source)
369369
a = strdup(source);
370370
b = strdup("hello, world");
371371
c = strndup(source, 100);
372-
sink(a); // $ ast MISSING: ir
372+
sink(a); // $ ast,ir
373373
sink(b);
374-
sink(c); // $ ast MISSING: ir
374+
sink(c); // $ ast,ir
375375
}
376376

377377
void test_strndup(int source)
@@ -388,7 +388,7 @@ void test_wcsdup(wchar_t *source)
388388

389389
a = wcsdup(source);
390390
b = wcsdup(L"hello, world");
391-
sink(a); // $ ast MISSING: ir
391+
sink(a); // $ ast,ir
392392
sink(b);
393393
}
394394

0 commit comments

Comments
 (0)