Skip to content

Commit 5cbf498

Browse files
authored
Merge pull request #4302 from MathiasVP/fix-field-conflation-after-4230
C++: Fix field conflation after #4230
2 parents c56d5eb + 873e871 commit 5cbf498

File tree

17 files changed

+485
-149
lines changed

17 files changed

+485
-149
lines changed

cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll

Lines changed: 108 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -211,10 +211,17 @@ private predicate fieldStoreStepNoChi(Node node1, FieldContent f, PostUpdateNode
211211
)
212212
}
213213

214+
private FieldAddressInstruction getFieldInstruction(Instruction instr) {
215+
result = instr or
216+
result = instr.(CopyValueInstruction).getUnary()
217+
}
218+
214219
pragma[noinline]
215-
private predicate getWrittenField(StoreInstruction store, Field f, Class c) {
220+
private predicate getWrittenField(Instruction instr, Field f, Class c) {
216221
exists(FieldAddressInstruction fa |
217-
fa = store.getDestinationAddress() and
222+
fa =
223+
getFieldInstruction([instr.(StoreInstruction).getDestinationAddress(),
224+
instr.(WriteSideEffectInstruction).getDestinationAddress()]) and
218225
f = fa.getField() and
219226
c = f.getDeclaringType()
220227
)
@@ -265,7 +272,23 @@ private predicate arrayStoreStepChi(Node node1, ArrayContent a, PostUpdateNode n
265272
predicate storeStep(Node node1, Content f, PostUpdateNode node2) {
266273
fieldStoreStepNoChi(node1, f, node2) or
267274
fieldStoreStepChi(node1, f, node2) or
268-
arrayStoreStepChi(node1, f, node2)
275+
arrayStoreStepChi(node1, f, node2) or
276+
fieldStoreStepAfterArraySuppression(node1, f, node2)
277+
}
278+
279+
// This predicate pushes the correct `FieldContent` onto the access path when the
280+
// `suppressArrayRead` predicate has popped off an `ArrayContent`.
281+
private predicate fieldStoreStepAfterArraySuppression(
282+
Node node1, FieldContent f, PostUpdateNode node2
283+
) {
284+
exists(BufferMayWriteSideEffectInstruction write, ChiInstruction chi, Class c |
285+
not chi.isResultConflated() and
286+
node1.asInstruction() = chi and
287+
node2.asInstruction() = chi and
288+
chi.getPartial() = write and
289+
getWrittenField(write, f.getAField(), c) and
290+
f.hasOffset(c, _, _)
291+
)
269292
}
270293

271294
bindingset[result, i]
@@ -302,11 +325,88 @@ private predicate fieldReadStep(Node node1, FieldContent f, Node node2) {
302325
)
303326
}
304327

328+
/**
329+
* When a store step happens in a function that looks like an array write such as:
330+
* ```cpp
331+
* void f(int* pa) {
332+
* pa = source();
333+
* }
334+
* ```
335+
* it can be a write to an array, but it can also happen that `f` is called as `f(&a.x)`. If that is
336+
* the case, the `ArrayContent` that was written by the call to `f` should be popped off the access
337+
* path, and a `FieldContent` containing `x` should be pushed instead.
338+
* So this case pops `ArrayContent` off the access path, and the `fieldStoreStepAfterArraySuppression`
339+
* predicate in `storeStep` ensures that we push the right `FieldContent` onto the access path.
340+
*/
341+
predicate suppressArrayRead(Node node1, ArrayContent a, Node node2) {
342+
a = TArrayContent() and
343+
exists(BufferMayWriteSideEffectInstruction write, ChiInstruction chi |
344+
node1.asInstruction() = write and
345+
node2.asInstruction() = chi and
346+
chi.getPartial() = write and
347+
getWrittenField(write, _, _)
348+
)
349+
}
350+
351+
private class ArrayToPointerConvertInstruction extends ConvertInstruction {
352+
ArrayToPointerConvertInstruction() {
353+
this.getUnary().getResultType() instanceof ArrayType and
354+
this.getResultType() instanceof PointerType
355+
}
356+
}
357+
358+
private Instruction skipOneCopyValueInstruction(Instruction instr) {
359+
not instr instanceof CopyValueInstruction and result = instr
360+
or
361+
result = instr.(CopyValueInstruction).getUnary()
362+
}
363+
364+
private Instruction skipCopyValueInstructions(Instruction instr) {
365+
result = skipOneCopyValueInstruction*(instr) and not result instanceof CopyValueInstruction
366+
}
367+
305368
private predicate arrayReadStep(Node node1, ArrayContent a, Node node2) {
306369
a = TArrayContent() and
307-
exists(LoadInstruction load |
370+
// Explicit dereferences such as `*p` or `p[i]` where `p` is a pointer or array.
371+
exists(LoadInstruction load, Instruction address |
372+
load.getSourceValueOperand().isDefinitionInexact() and
308373
node1.asInstruction() = load.getSourceValueOperand().getAnyDef() and
309-
load = node2.asInstruction()
374+
load = node2.asInstruction() and
375+
address = skipCopyValueInstructions(load.getSourceAddress()) and
376+
(
377+
address instanceof LoadInstruction or
378+
address instanceof ArrayToPointerConvertInstruction or
379+
address instanceof PointerOffsetInstruction
380+
)
381+
)
382+
}
383+
384+
/**
385+
* In cases such as:
386+
* ```cpp
387+
* void f(int* pa) {
388+
* *pa = source();
389+
* }
390+
* ...
391+
* int x;
392+
* f(&x);
393+
* use(x);
394+
* ```
395+
* the load on `x` in `use(x)` will exactly overlap with its definition (in this case the definition
396+
* is a `BufferMayWriteSideEffect`). This predicate pops the `ArrayContent` (pushed by the store in `f`)
397+
* from the access path.
398+
*/
399+
private predicate exactReadStep(Node node1, ArrayContent a, Node node2) {
400+
a = TArrayContent() and
401+
exists(BufferMayWriteSideEffectInstruction write, ChiInstruction chi |
402+
not chi.isResultConflated() and
403+
chi.getPartial() = write and
404+
node1.asInstruction() = write and
405+
node2.asInstruction() = chi and
406+
// To distinquish this case from the `arrayReadStep` case we require that the entire variable was
407+
// overwritten by the `BufferMayWriteSideEffectInstruction` (i.e., there is a load that reads the
408+
// entire variable).
409+
exists(LoadInstruction load | load.getSourceValue() = chi)
310410
)
311411
}
312412

@@ -317,7 +417,9 @@ private predicate arrayReadStep(Node node1, ArrayContent a, Node node2) {
317417
*/
318418
predicate readStep(Node node1, Content f, Node node2) {
319419
fieldReadStep(node1, f, node2) or
320-
arrayReadStep(node1, f, node2)
420+
arrayReadStep(node1, f, node2) or
421+
exactReadStep(node1, f, node2) or
422+
suppressArrayRead(node1, f, node2)
321423
}
322424

323425
/**

cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,35 @@ private class ExplicitSingleFieldStoreQualifierNode extends PartialDefinitionNod
389389
}
390390
}
391391

392+
private FieldAddressInstruction getFieldInstruction(Instruction instr) {
393+
result = instr or
394+
result = instr.(CopyValueInstruction).getUnary()
395+
}
396+
397+
/**
398+
* The target of a `fieldStoreStepAfterArraySuppression` store step, which is used to convert
399+
* an `ArrayContent` to a `FieldContent` when the `BufferMayWriteSideEffect` instruction stores
400+
* into a field. See the QLDoc for `suppressArrayRead` for an example of where such a conversion
401+
* is inserted.
402+
*/
403+
private class BufferMayWriteSideEffectFieldStoreQualifierNode extends PartialDefinitionNode {
404+
override ChiInstruction instr;
405+
BufferMayWriteSideEffectInstruction write;
406+
FieldAddressInstruction field;
407+
408+
BufferMayWriteSideEffectFieldStoreQualifierNode() {
409+
not instr.isResultConflated() and
410+
instr.getPartial() = write and
411+
field = getFieldInstruction(write.getDestinationAddress())
412+
}
413+
414+
override Node getPreUpdateNode() { result.asOperand() = instr.getTotalOperand() }
415+
416+
override Expr getDefinedExpr() {
417+
result = field.getObjectAddress().getUnconvertedResultExpression()
418+
}
419+
}
420+
392421
/**
393422
* The `PostUpdateNode` that is the target of a `arrayStoreStepChi` store step. The overriden
394423
* `ChiInstruction` corresponds to the instruction represented by `node2` in `arrayStoreStepChi`.

cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/tainted.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@
9393
| defaulttainttracking.cpp:88:18:88:23 | call to getenv | shared.h:5:23:5:31 | sinkparam |
9494
| defaulttainttracking.cpp:97:27:97:32 | call to getenv | defaulttainttracking.cpp:91:42:91:44 | arg |
9595
| defaulttainttracking.cpp:97:27:97:32 | call to getenv | defaulttainttracking.cpp:92:12:92:14 | arg |
96+
| defaulttainttracking.cpp:97:27:97:32 | call to getenv | defaulttainttracking.cpp:96:11:96:12 | p2 |
9697
| defaulttainttracking.cpp:97:27:97:32 | call to getenv | defaulttainttracking.cpp:97:27:97:32 | call to getenv |
9798
| defaulttainttracking.cpp:97:27:97:32 | call to getenv | defaulttainttracking.cpp:98:10:98:11 | (const char *)... |
9899
| defaulttainttracking.cpp:97:27:97:32 | call to getenv | defaulttainttracking.cpp:98:10:98:11 | p2 |

cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/test_diff.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
| defaulttainttracking.cpp:97:27:97:32 | call to getenv | defaulttainttracking.cpp:91:31:91:33 | ret | AST only |
1717
| defaulttainttracking.cpp:97:27:97:32 | call to getenv | defaulttainttracking.cpp:92:5:92:8 | * ... | AST only |
1818
| defaulttainttracking.cpp:97:27:97:32 | call to getenv | defaulttainttracking.cpp:92:6:92:8 | ret | AST only |
19+
| defaulttainttracking.cpp:97:27:97:32 | call to getenv | defaulttainttracking.cpp:96:11:96:12 | p2 | IR only |
1920
| defaulttainttracking.cpp:97:27:97:32 | call to getenv | defaulttainttracking.cpp:98:10:98:11 | (const char *)... | IR only |
2021
| defaulttainttracking.cpp:97:27:97:32 | call to getenv | defaulttainttracking.cpp:98:10:98:11 | p2 | IR only |
2122
| defaulttainttracking.cpp:97:27:97:32 | call to getenv | shared.h:5:23:5:31 | sinkparam | IR only |

cpp/ql/test/library-tests/dataflow/fields/aliasing.cpp

Lines changed: 95 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,5 +109,99 @@ void taint_a_ptr(int* pa) {
109109
void test_field_conflation_array_content() {
110110
S s;
111111
taint_a_ptr(&s.m1);
112-
sink(s.m2); //$f+:ir
112+
sink(s.m2);
113+
}
114+
115+
struct S_with_pointer {
116+
int m1, m2;
117+
int* data;
118+
};
119+
120+
void pointer_deref(int* xs) {
121+
taint_a_ptr(xs);
122+
sink(xs[0]); // $f-:ast $ir
123+
}
124+
125+
void pointer_deref_sub(int* xs) {
126+
taint_a_ptr(xs - 2);
127+
sink(*(xs - 2)); // $f-:ast $ir
128+
}
129+
130+
void pointer_many_addrof_and_deref(int* xs) {
131+
taint_a_ptr(xs);
132+
sink(*&*&*xs); // $f-:ast $ir
133+
}
134+
135+
void pointer_unary_plus(int* xs) {
136+
taint_a_ptr(+xs);
137+
sink(*+xs); // $f-:ast $ir
138+
}
139+
140+
void pointer_member_index(S_with_pointer s) {
141+
taint_a_ptr(s.data);
142+
// `s.data` is points to all-aliased-memory
143+
sink(s.data[0]); // $f-:ast,ir
144+
}
145+
146+
void member_array_different_field(S_with_pointer* s) {
147+
taint_a_ptr(&s[0].m1);
148+
sink(s[0].m2);
149+
}
150+
151+
struct S_with_array {
152+
int m1, m2;
153+
int data[10];
154+
};
155+
156+
void pointer_member_deref() {
157+
S_with_array s;
158+
taint_a_ptr(s.data);
159+
sink(*s.data); // $ir,ast
160+
}
161+
162+
void array_member_deref() {
163+
S_with_array s;
164+
taint_a_ptr(s.data);
165+
sink(s.data[0]); // $ir,ast
166+
}
167+
168+
struct S2 {
169+
S s;
170+
int m3;
171+
};
172+
173+
void deep_member_field_dot() {
174+
S2 s2;
175+
taint_a_ptr(&s2.s.m1);
176+
sink(s2.s.m1); // $ir,ast
177+
}
178+
179+
void deep_member_field_dot_different_fields() {
180+
S2 s2;
181+
taint_a_ptr(&s2.s.m1);
182+
sink(s2.s.m2);
183+
}
184+
185+
void deep_member_field_dot_2() {
186+
S2 s2;
187+
taint_a_ptr(&s2.s.m1);
188+
S2 s2_2 = s2;
189+
sink(s2_2.s.m1); // $ir,ast
190+
}
191+
192+
void deep_member_field_dot_different_fields_2() {
193+
S2 s2;
194+
taint_a_ptr(&s2.s.m1);
195+
S2 s2_2 = s2;
196+
sink(s2_2.s.m2);
197+
}
198+
199+
void deep_member_field_arrow(S2 *ps2) {
200+
taint_a_ptr(&ps2->s.m1);
201+
sink(ps2->s.m1); // $ir,ast
202+
}
203+
204+
void deep_member_field_arrow_different_fields(S2 *ps2) {
205+
taint_a_ptr(&ps2->s.m1);
206+
sink(ps2->s.m2);
113207
}

cpp/ql/test/library-tests/dataflow/fields/flow-diff.expected

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@
2121
| aliasing.cpp:79:11:79:20 | call to user_input | aliasing.cpp:80:12:80:13 | m1 | IR only |
2222
| aliasing.cpp:86:10:86:19 | call to user_input | aliasing.cpp:87:12:87:13 | m1 | IR only |
2323
| aliasing.cpp:98:10:98:19 | call to user_input | aliasing.cpp:102:8:102:10 | * ... | IR only |
24-
| aliasing.cpp:106:9:106:18 | call to user_input | aliasing.cpp:112:10:112:11 | m2 | IR only |
24+
| aliasing.cpp:106:9:106:18 | call to user_input | aliasing.cpp:122:8:122:12 | access to array | IR only |
25+
| aliasing.cpp:106:9:106:18 | call to user_input | aliasing.cpp:127:8:127:16 | * ... | IR only |
26+
| aliasing.cpp:106:9:106:18 | call to user_input | aliasing.cpp:132:8:132:14 | * ... | IR only |
27+
| aliasing.cpp:106:9:106:18 | call to user_input | aliasing.cpp:137:8:137:11 | * ... | IR only |
2528
| arrays.cpp:6:12:6:21 | call to user_input | arrays.cpp:8:8:8:13 | access to array | AST only |
2629
| arrays.cpp:15:14:15:23 | call to user_input | arrays.cpp:17:8:17:13 | access to array | AST only |
2730
| arrays.cpp:36:26:36:35 | call to user_input | arrays.cpp:38:24:38:27 | data | AST only |

0 commit comments

Comments
 (0)