Skip to content

Commit 4308381

Browse files
authored
Merge pull request #4846 from MathiasVP/default-taint-tracking-operand-instruction-interleaving
C++: Instruction -> Operand interleaving for DefaultTaintTracking
2 parents 06366fa + b510204 commit 4308381

File tree

2 files changed

+59
-49
lines changed

2 files changed

+59
-49
lines changed

cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll

Lines changed: 51 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -204,16 +204,27 @@ private predicate nodeIsBarrierIn(DataFlow::Node node) {
204204

205205
cached
206206
private predicate commonTaintStep(DataFlow::Node fromNode, DataFlow::Node toNode) {
207-
instructionToInstructionTaintStep(fromNode.asInstruction(), toNode.asInstruction())
208-
or
209207
operandToInstructionTaintStep(fromNode.asOperand(), toNode.asInstruction())
210208
or
211-
operandToOperandTaintStep(fromNode.asOperand(), toNode.asOperand())
209+
instructionToOperandTaintStep(fromNode.asInstruction(), toNode.asOperand())
212210
}
213211

214-
private predicate operandToOperandTaintStep(Operand fromOperand, Operand toOperand) {
212+
private predicate instructionToOperandTaintStep(Instruction fromInstr, Operand toOperand) {
213+
// Propagate flow from the definition of an operand to the operand, even when the overlap is inexact.
214+
// We only do this in certain cases:
215+
// 1. The instruction's result must not be conflated, and
216+
// 2. The instruction's result type is one the types where we expect element-to-object flow. Currently
217+
// this is array types and union types. This matches the other two cases of element-to-object flow in
218+
// `DefaultTaintTracking`.
219+
toOperand.getAnyDef() = fromInstr and
220+
not fromInstr.isResultConflated() and
221+
(
222+
fromInstr.getResultType() instanceof ArrayType or
223+
fromInstr.getResultType() instanceof Union
224+
)
225+
or
215226
exists(ReadSideEffectInstruction readInstr |
216-
fromOperand = readInstr.getArgumentOperand() and
227+
fromInstr = readInstr.getArgumentDef() and
217228
toOperand = readInstr.getSideEffectOperand()
218229
)
219230
}
@@ -256,18 +267,18 @@ private predicate operandToInstructionTaintStep(Operand fromOperand, Instruction
256267
outInstr.getPrimaryInstruction() = call
257268
)
258269
)
259-
}
260-
261-
private predicate instructionToInstructionTaintStep(Instruction i1, Instruction i2) {
270+
or
262271
// Flow through pointer dereference
263-
i2.(LoadInstruction).getSourceAddress() = i1
272+
toInstr.(LoadInstruction).getSourceAddressOperand() = fromOperand
264273
or
265274
// Flow through partial reads of arrays and unions
266-
i2.(LoadInstruction).getSourceValueOperand().getAnyDef() = i1 and
267-
not i1.isResultConflated() and
268-
(
269-
i1.getResultType() instanceof ArrayType or
270-
i1.getResultType() instanceof Union
275+
toInstr.(LoadInstruction).getSourceValueOperand() = fromOperand and
276+
exists(Instruction fromInstr | fromInstr = fromOperand.getAnyDef() |
277+
not fromInstr.isResultConflated() and
278+
(
279+
fromInstr.getResultType() instanceof ArrayType or
280+
fromInstr.getResultType() instanceof Union
281+
)
271282
)
272283
or
273284
// Unary instructions tend to preserve enough information in practice that we
@@ -277,63 +288,54 @@ private predicate instructionToInstructionTaintStep(Instruction i1, Instruction
277288
// `FieldAddressInstruction` could cause flow into one field to come out an
278289
// unrelated field. This would happen across function boundaries, where the IR
279290
// would not be able to match loads to stores.
280-
i2.(UnaryInstruction).getUnary() = i1 and
291+
toInstr.(UnaryInstruction).getUnaryOperand() = fromOperand and
281292
(
282-
not i2 instanceof FieldAddressInstruction
293+
not toInstr instanceof FieldAddressInstruction
283294
or
284-
i2.(FieldAddressInstruction).getField().getDeclaringType() instanceof Union
295+
toInstr.(FieldAddressInstruction).getField().getDeclaringType() instanceof Union
285296
)
286297
or
287-
// Flow out of definition-by-reference
288-
i2.(ChiInstruction).getPartial() = i1.(WriteSideEffectInstruction) and
289-
not i2.isResultConflated()
290-
or
291298
// Flow from an element to an array or union that contains it.
292-
i2.(ChiInstruction).getPartial() = i1 and
293-
not i2.isResultConflated() and
294-
exists(Type t | i2.getResultLanguageType().hasType(t, false) |
299+
toInstr.(ChiInstruction).getPartialOperand() = fromOperand and
300+
not toInstr.isResultConflated() and
301+
exists(Type t | toInstr.getResultLanguageType().hasType(t, false) |
295302
t instanceof Union
296303
or
297304
t instanceof ArrayType
298305
)
299306
or
300307
exists(BinaryInstruction bin |
301-
bin = i2 and
302-
predictableInstruction(i2.getAnOperand().getDef()) and
303-
i1 = i2.getAnOperand().getDef()
308+
bin = toInstr and
309+
predictableInstruction(toInstr.getAnOperand().getDef()) and
310+
fromOperand = toInstr.getAnOperand()
304311
)
305312
or
306313
// This is part of the translation of `a[i]`, where we want taint to flow
307314
// from `a`.
308-
i2.(PointerAddInstruction).getLeft() = i1
309-
or
310-
// Until we have from through indirections across calls, we'll take flow out
311-
// of the parameter and into its indirection.
312-
exists(IRFunction f, Parameter parameter |
313-
i1 = getInitializeParameter(f, parameter) and
314-
i2 = getInitializeIndirection(f, parameter)
315-
)
315+
toInstr.(PointerAddInstruction).getLeftOperand() = fromOperand
316316
or
317317
// Until we have flow through indirections across calls, we'll take flow out
318318
// of the indirection and into the argument.
319319
// When we get proper flow through indirections across calls, this code can be
320320
// moved to `adjusedSink` or possibly into the `DataFlow::ExprNode` class.
321321
exists(ReadSideEffectInstruction read |
322-
read.getAnOperand().(SideEffectOperand).getAnyDef() = i1 and
323-
read.getArgumentDef() = i2
322+
read.getSideEffectOperand() = fromOperand and
323+
read.getArgumentDef() = toInstr
324324
)
325-
}
326-
327-
pragma[noinline]
328-
private InitializeIndirectionInstruction getInitializeIndirection(IRFunction f, Parameter p) {
329-
result.getParameter() = p and
330-
result.getEnclosingIRFunction() = f
331-
}
332-
333-
pragma[noinline]
334-
private InitializeParameterInstruction getInitializeParameter(IRFunction f, Parameter p) {
335-
result.getParameter() = p and
336-
result.getEnclosingIRFunction() = f
325+
or
326+
// Until we have from through indirections across calls, we'll take flow out
327+
// of the parameter and into its indirection.
328+
// `InitializeIndirectionInstruction` only has a single operand: the address of the
329+
// value whose indirection we are initializing. When initializing an indirection of a parameter `p`,
330+
// the IR looks like this:
331+
// ```
332+
// m1 = InitializeParameter[p] : &r1
333+
// r2 = Load[p] : r2, m1
334+
// m3 = InitializeIndirection[p] : &r2
335+
// ```
336+
// So by having flow from `r2` to `m3` we're enabling flow from `m1` to `m3`. This relies on the
337+
// `LoadOperand`'s overlap being exact.
338+
toInstr.(InitializeIndirectionInstruction).getAnOperand() = fromOperand
337339
}
338340

339341
/**

cpp/ql/test/query-tests/Security/CWE/CWE-134/semmle/globalVars/UncontrolledFormatStringThroughGlobalVar.expected

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@ edges
44
| globalVars.c:8:7:8:10 | copy | globalVars.c:27:9:27:12 | copy |
55
| globalVars.c:8:7:8:10 | copy | globalVars.c:30:15:30:18 | copy |
66
| globalVars.c:8:7:8:10 | copy | globalVars.c:30:15:30:18 | copy |
7+
| globalVars.c:8:7:8:10 | copy | globalVars.c:30:15:30:18 | copy |
78
| globalVars.c:8:7:8:10 | copy | globalVars.c:35:11:35:14 | copy |
89
| globalVars.c:9:7:9:11 | copy2 | globalVars.c:38:9:38:13 | copy2 |
910
| globalVars.c:9:7:9:11 | copy2 | globalVars.c:38:9:38:13 | copy2 |
1011
| globalVars.c:9:7:9:11 | copy2 | globalVars.c:38:9:38:13 | copy2 |
1112
| globalVars.c:9:7:9:11 | copy2 | globalVars.c:41:15:41:19 | copy2 |
1213
| globalVars.c:9:7:9:11 | copy2 | globalVars.c:41:15:41:19 | copy2 |
14+
| globalVars.c:9:7:9:11 | copy2 | globalVars.c:41:15:41:19 | copy2 |
1315
| globalVars.c:9:7:9:11 | copy2 | globalVars.c:50:9:50:13 | copy2 |
1416
| globalVars.c:9:7:9:11 | copy2 | globalVars.c:50:9:50:13 | copy2 |
1517
| globalVars.c:9:7:9:11 | copy2 | globalVars.c:50:9:50:13 | copy2 |
@@ -25,9 +27,15 @@ edges
2527
| globalVars.c:24:11:24:14 | argv | globalVars.c:11:22:11:25 | argv |
2628
| globalVars.c:27:9:27:12 | copy | globalVars.c:27:9:27:12 | (const char *)... |
2729
| globalVars.c:27:9:27:12 | copy | globalVars.c:27:9:27:12 | copy |
30+
| globalVars.c:30:15:30:18 | copy | globalVars.c:30:15:30:18 | copy |
31+
| globalVars.c:30:15:30:18 | copy | globalVars.c:30:15:30:18 | copy |
32+
| globalVars.c:30:15:30:18 | copy | globalVars.c:30:15:30:18 | copy |
2833
| globalVars.c:35:11:35:14 | copy | globalVars.c:15:21:15:23 | val |
2934
| globalVars.c:38:9:38:13 | copy2 | globalVars.c:38:9:38:13 | (const char *)... |
3035
| globalVars.c:38:9:38:13 | copy2 | globalVars.c:38:9:38:13 | copy2 |
36+
| globalVars.c:41:15:41:19 | copy2 | globalVars.c:41:15:41:19 | copy2 |
37+
| globalVars.c:41:15:41:19 | copy2 | globalVars.c:41:15:41:19 | copy2 |
38+
| globalVars.c:41:15:41:19 | copy2 | globalVars.c:41:15:41:19 | copy2 |
3139
| globalVars.c:50:9:50:13 | copy2 | globalVars.c:50:9:50:13 | (const char *)... |
3240
| globalVars.c:50:9:50:13 | copy2 | globalVars.c:50:9:50:13 | copy2 |
3341
nodes

0 commit comments

Comments
 (0)