Skip to content

Commit 3e2331c

Browse files
committed
Merge branch 'main' of github.com:github/codeql into SharedDataflow_FieldFlow
2 parents 08b51e6 + 5cbf498 commit 3e2331c

File tree

92 files changed

+1943
-606
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+1943
-606
lines changed

change-notes/1.26/analysis-javascript.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44

55
* Support for the following frameworks and libraries has been improved:
66
- [bluebird](https://www.npmjs.com/package/bluebird)
7+
- [express](https://www.npmjs.com/package/express)
78
- [fast-json-stable-stringify](https://www.npmjs.com/package/fast-json-stable-stringify)
89
- [fast-safe-stringify](https://www.npmjs.com/package/fast-safe-stringify)
10+
- [http](https://nodejs.org/api/http.html)
911
- [javascript-stringify](https://www.npmjs.com/package/javascript-stringify)
1012
- [js-stringify](https://www.npmjs.com/package/js-stringify)
1113
- [json-stable-stringify](https://www.npmjs.com/package/json-stable-stringify)

cpp/ql/src/semmle/code/cpp/controlflow/internal/ConstantExprs.qll

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -279,20 +279,62 @@ private predicate reachableRecursive(ControlFlowNode n) {
279279
reachableRecursive(n.getAPredecessor())
280280
}
281281

282+
/** Holds if `e` is a compile time constant with integer value `val`. */
282283
private predicate compileTimeConstantInt(Expr e, int val) {
283-
val = e.getFullyConverted().getValue().toInt() and
284-
not e instanceof StringLiteral and
285-
not exists(Expr e1 | e1.getConversion() = e) // only values for fully converted expressions
284+
(
285+
// If we have an integer value then we are done.
286+
if exists(e.getValue().toInt())
287+
then val = e.getValue().toInt()
288+
else
289+
// Otherwise, if we are a conversion of another expression with an
290+
// integer value, and that value can be converted into our type,
291+
// then we have that value.
292+
exists(Expr x, int valx |
293+
x.getConversion() = e and
294+
compileTimeConstantInt(x, valx) and
295+
val = convertIntToType(valx, e.getType().getUnspecifiedType())
296+
)
297+
) and
298+
// If our unconverted expression is a string literal `"123"`, then we
299+
// do not have integer value `123`.
300+
not e.getUnconverted() instanceof StringLiteral
286301
}
287302

288-
library class CompileTimeConstantInt extends Expr {
289-
CompileTimeConstantInt() { compileTimeConstantInt(this, _) }
303+
/**
304+
* Get `val` represented as type `t`, if that is possible without
305+
* overflow or underflows.
306+
*/
307+
bindingset[val, t]
308+
private int convertIntToType(int val, IntegralType t) {
309+
if t instanceof BoolType
310+
then if val = 0 then result = 0 else result = 1
311+
else
312+
if t.isUnsigned()
313+
then if val >= 0 and val.bitShiftRight(t.getSize() * 8) = 0 then result = val else none()
314+
else
315+
if val >= 0 and val.bitShiftRight(t.getSize() * 8 - 1) = 0
316+
then result = val
317+
else
318+
if (-(val + 1)).bitShiftRight(t.getSize() * 8 - 1) = 0
319+
then result = val
320+
else none()
321+
}
322+
323+
/**
324+
* INTERNAL: Do not use.
325+
* An expression that has been found to have an integer value at compile
326+
* time.
327+
*/
328+
class CompileTimeConstantInt extends Expr {
329+
int val;
330+
331+
CompileTimeConstantInt() { compileTimeConstantInt(this.getFullyConverted(), val) }
290332

291-
int getIntValue() { compileTimeConstantInt(this, result) }
333+
int getIntValue() { result = val }
292334
}
293335

294336
library class CompileTimeVariableExpr extends Expr {
295-
CompileTimeVariableExpr() { not compileTimeConstantInt(this, _) }
337+
CompileTimeVariableExpr() { not this instanceof CompileTimeConstantInt }
296338
}
297339

298340
/** A helper class for evaluation of expressions. */

cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll

Lines changed: 108 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -211,10 +211,17 @@ private predicate fieldStoreStepNoChi(Node node1, FieldContent f, PostUpdateNode
211211
)
212212
}
213213

214+
private FieldAddressInstruction getFieldInstruction(Instruction instr) {
215+
result = instr or
216+
result = instr.(CopyValueInstruction).getUnary()
217+
}
218+
214219
pragma[noinline]
215-
private predicate getWrittenField(StoreInstruction store, Field f, Class c) {
220+
private predicate getWrittenField(Instruction instr, Field f, Class c) {
216221
exists(FieldAddressInstruction fa |
217-
fa = store.getDestinationAddress() and
222+
fa =
223+
getFieldInstruction([instr.(StoreInstruction).getDestinationAddress(),
224+
instr.(WriteSideEffectInstruction).getDestinationAddress()]) and
218225
f = fa.getField() and
219226
c = f.getDeclaringType()
220227
)
@@ -265,7 +272,23 @@ private predicate arrayStoreStepChi(Node node1, ArrayContent a, PostUpdateNode n
265272
predicate storeStep(Node node1, Content f, PostUpdateNode node2) {
266273
fieldStoreStepNoChi(node1, f, node2) or
267274
fieldStoreStepChi(node1, f, node2) or
268-
arrayStoreStepChi(node1, f, node2)
275+
arrayStoreStepChi(node1, f, node2) or
276+
fieldStoreStepAfterArraySuppression(node1, f, node2)
277+
}
278+
279+
// This predicate pushes the correct `FieldContent` onto the access path when the
280+
// `suppressArrayRead` predicate has popped off an `ArrayContent`.
281+
private predicate fieldStoreStepAfterArraySuppression(
282+
Node node1, FieldContent f, PostUpdateNode node2
283+
) {
284+
exists(BufferMayWriteSideEffectInstruction write, ChiInstruction chi, Class c |
285+
not chi.isResultConflated() and
286+
node1.asInstruction() = chi and
287+
node2.asInstruction() = chi and
288+
chi.getPartial() = write and
289+
getWrittenField(write, f.getAField(), c) and
290+
f.hasOffset(c, _, _)
291+
)
269292
}
270293

271294
bindingset[result, i]
@@ -302,11 +325,88 @@ private predicate fieldReadStep(Node node1, FieldContent f, Node node2) {
302325
)
303326
}
304327

328+
/**
329+
* When a store step happens in a function that looks like an array write such as:
330+
* ```cpp
331+
* void f(int* pa) {
332+
* pa = source();
333+
* }
334+
* ```
335+
* it can be a write to an array, but it can also happen that `f` is called as `f(&a.x)`. If that is
336+
* the case, the `ArrayContent` that was written by the call to `f` should be popped off the access
337+
* path, and a `FieldContent` containing `x` should be pushed instead.
338+
* So this case pops `ArrayContent` off the access path, and the `fieldStoreStepAfterArraySuppression`
339+
* predicate in `storeStep` ensures that we push the right `FieldContent` onto the access path.
340+
*/
341+
predicate suppressArrayRead(Node node1, ArrayContent a, Node node2) {
342+
a = TArrayContent() and
343+
exists(BufferMayWriteSideEffectInstruction write, ChiInstruction chi |
344+
node1.asInstruction() = write and
345+
node2.asInstruction() = chi and
346+
chi.getPartial() = write and
347+
getWrittenField(write, _, _)
348+
)
349+
}
350+
351+
private class ArrayToPointerConvertInstruction extends ConvertInstruction {
352+
ArrayToPointerConvertInstruction() {
353+
this.getUnary().getResultType() instanceof ArrayType and
354+
this.getResultType() instanceof PointerType
355+
}
356+
}
357+
358+
private Instruction skipOneCopyValueInstruction(Instruction instr) {
359+
not instr instanceof CopyValueInstruction and result = instr
360+
or
361+
result = instr.(CopyValueInstruction).getUnary()
362+
}
363+
364+
private Instruction skipCopyValueInstructions(Instruction instr) {
365+
result = skipOneCopyValueInstruction*(instr) and not result instanceof CopyValueInstruction
366+
}
367+
305368
private predicate arrayReadStep(Node node1, ArrayContent a, Node node2) {
306369
a = TArrayContent() and
307-
exists(LoadInstruction load |
370+
// Explicit dereferences such as `*p` or `p[i]` where `p` is a pointer or array.
371+
exists(LoadInstruction load, Instruction address |
372+
load.getSourceValueOperand().isDefinitionInexact() and
308373
node1.asInstruction() = load.getSourceValueOperand().getAnyDef() and
309-
load = node2.asInstruction()
374+
load = node2.asInstruction() and
375+
address = skipCopyValueInstructions(load.getSourceAddress()) and
376+
(
377+
address instanceof LoadInstruction or
378+
address instanceof ArrayToPointerConvertInstruction or
379+
address instanceof PointerOffsetInstruction
380+
)
381+
)
382+
}
383+
384+
/**
385+
* In cases such as:
386+
* ```cpp
387+
* void f(int* pa) {
388+
* *pa = source();
389+
* }
390+
* ...
391+
* int x;
392+
* f(&x);
393+
* use(x);
394+
* ```
395+
* the load on `x` in `use(x)` will exactly overlap with its definition (in this case the definition
396+
* is a `BufferMayWriteSideEffect`). This predicate pops the `ArrayContent` (pushed by the store in `f`)
397+
* from the access path.
398+
*/
399+
private predicate exactReadStep(Node node1, ArrayContent a, Node node2) {
400+
a = TArrayContent() and
401+
exists(BufferMayWriteSideEffectInstruction write, ChiInstruction chi |
402+
not chi.isResultConflated() and
403+
chi.getPartial() = write and
404+
node1.asInstruction() = write and
405+
node2.asInstruction() = chi and
406+
// To distinquish this case from the `arrayReadStep` case we require that the entire variable was
407+
// overwritten by the `BufferMayWriteSideEffectInstruction` (i.e., there is a load that reads the
408+
// entire variable).
409+
exists(LoadInstruction load | load.getSourceValue() = chi)
310410
)
311411
}
312412

@@ -317,7 +417,9 @@ private predicate arrayReadStep(Node node1, ArrayContent a, Node node2) {
317417
*/
318418
predicate readStep(Node node1, Content f, Node node2) {
319419
fieldReadStep(node1, f, node2) or
320-
arrayReadStep(node1, f, node2)
420+
arrayReadStep(node1, f, node2) or
421+
exactReadStep(node1, f, node2) or
422+
suppressArrayRead(node1, f, node2)
321423
}
322424

323425
/**

cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,35 @@ private class ExplicitSingleFieldStoreQualifierNode extends PartialDefinitionNod
389389
}
390390
}
391391

392+
private FieldAddressInstruction getFieldInstruction(Instruction instr) {
393+
result = instr or
394+
result = instr.(CopyValueInstruction).getUnary()
395+
}
396+
397+
/**
398+
* The target of a `fieldStoreStepAfterArraySuppression` store step, which is used to convert
399+
* an `ArrayContent` to a `FieldContent` when the `BufferMayWriteSideEffect` instruction stores
400+
* into a field. See the QLDoc for `suppressArrayRead` for an example of where such a conversion
401+
* is inserted.
402+
*/
403+
private class BufferMayWriteSideEffectFieldStoreQualifierNode extends PartialDefinitionNode {
404+
override ChiInstruction instr;
405+
BufferMayWriteSideEffectInstruction write;
406+
FieldAddressInstruction field;
407+
408+
BufferMayWriteSideEffectFieldStoreQualifierNode() {
409+
not instr.isResultConflated() and
410+
instr.getPartial() = write and
411+
field = getFieldInstruction(write.getDestinationAddress())
412+
}
413+
414+
override Node getPreUpdateNode() { result.asOperand() = instr.getTotalOperand() }
415+
416+
override Expr getDefinedExpr() {
417+
result = field.getObjectAddress().getUnconvertedResultExpression()
418+
}
419+
}
420+
392421
/**
393422
* The `PostUpdateNode` that is the target of a `arrayStoreStepChi` store step. The overriden
394423
* `ChiInstruction` corresponds to the instruction represented by `node2` in `arrayStoreStepChi`.
@@ -717,7 +746,12 @@ private predicate modelFlow(Operand opFrom, Instruction iTo) {
717746
iTo = outNode and
718747
outNode = getSideEffectFor(call, index)
719748
)
720-
// TODO: add write side effects for qualifiers
749+
or
750+
exists(WriteSideEffectInstruction outNode |
751+
modelOut.isQualifierObject() and
752+
iTo = outNode and
753+
outNode = getSideEffectFor(call, -1)
754+
)
721755
) and
722756
(
723757
exists(int index |
@@ -733,7 +767,12 @@ private predicate modelFlow(Operand opFrom, Instruction iTo) {
733767
or
734768
modelIn.isQualifierAddress() and
735769
opFrom = call.getThisArgumentOperand()
736-
// TODO: add read side effects for qualifiers
770+
or
771+
exists(ReadSideEffectInstruction read |
772+
modelIn.isQualifierObject() and
773+
read = getSideEffectFor(call, -1) and
774+
opFrom = read.getSideEffectOperand()
775+
)
737776
)
738777
)
739778
}

cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,24 +9,31 @@ private import semmle.code.cpp.ir.dataflow.DataFlow
99
/**
1010
* Gets the instruction that goes into `input` for `call`.
1111
*/
12-
Instruction callInput(CallInstruction call, FunctionInput input) {
12+
DataFlow::Node callInput(CallInstruction call, FunctionInput input) {
1313
// A positional argument
1414
exists(int index |
15-
result = call.getPositionalArgument(index) and
15+
result.asInstruction() = call.getPositionalArgument(index) and
1616
input.isParameter(index)
1717
)
1818
or
1919
// A value pointed to by a positional argument
2020
exists(ReadSideEffectInstruction read |
21-
result = read and
21+
result.asOperand() = read.getSideEffectOperand() and
2222
read.getPrimaryInstruction() = call and
2323
input.isParameterDeref(read.getIndex())
2424
)
2525
or
2626
// The qualifier pointer
27-
result = call.getThisArgument() and
27+
result.asInstruction() = call.getThisArgument() and
2828
input.isQualifierAddress()
29-
//TODO: qualifier deref
29+
or
30+
// The qualifier object
31+
exists(ReadSideEffectInstruction read |
32+
result.asOperand() = read.getSideEffectOperand() and
33+
read.getPrimaryInstruction() = call and
34+
read.getIndex() = -1 and
35+
input.isQualifierObject()
36+
)
3037
}
3138

3239
/**
@@ -43,5 +50,13 @@ Instruction callOutput(CallInstruction call, FunctionOutput output) {
4350
effect.getPrimaryInstruction() = call and
4451
output.isParameterDeref(effect.getIndex())
4552
)
46-
// TODO: qualifiers, return value dereference
53+
or
54+
// The side effect of a call on the qualifier object
55+
exists(WriteSideEffectInstruction effect |
56+
result = effect and
57+
effect.getPrimaryInstruction() = call and
58+
effect.getIndex() = -1 and
59+
output.isQualifierObject()
60+
)
61+
// TODO: return value dereference
4762
}

cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/TaintTrackingUtil.qll

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,11 @@ predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
1919
* local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent
2020
* different objects.
2121
*/
22+
cached
2223
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
2324
localInstructionTaintStep(nodeFrom.asInstruction(), nodeTo.asInstruction())
25+
or
26+
modeledTaintStep(nodeFrom, nodeTo)
2427
}
2528

2629
/**
@@ -49,8 +52,6 @@ private predicate localInstructionTaintStep(Instruction nodeFrom, Instruction no
4952
or
5053
nodeTo.(LoadInstruction).getSourceAddress() = nodeFrom
5154
or
52-
modeledInstructionTaintStep(nodeFrom, nodeTo)
53-
or
5455
// Flow through partial reads of arrays and unions
5556
nodeTo.(LoadInstruction).getSourceValueOperand().getAnyDef() = nodeFrom and
5657
not nodeFrom.isResultConflated() and
@@ -109,10 +110,17 @@ predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
109110
* Holds if taint can flow from `instrIn` to `instrOut` through a call to a
110111
* modeled function.
111112
*/
112-
predicate modeledInstructionTaintStep(Instruction instrIn, Instruction instrOut) {
113+
predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
113114
exists(CallInstruction call, TaintFunction func, FunctionInput modelIn, FunctionOutput modelOut |
114-
instrIn = callInput(call, modelIn) and
115-
instrOut = callOutput(call, modelOut) and
115+
(
116+
nodeIn = callInput(call, modelIn)
117+
or
118+
exists(int n |
119+
modelIn.isParameterDeref(n) and
120+
nodeIn = callInput(call, any(InParameter inParam | inParam.getIndex() = n))
121+
)
122+
) and
123+
nodeOut.asInstruction() = callOutput(call, modelOut) and
116124
call.getStaticCallTarget() = func and
117125
func.hasTaintFlow(modelIn, modelOut)
118126
)
@@ -126,8 +134,8 @@ predicate modeledInstructionTaintStep(Instruction instrIn, Instruction instrOut)
126134
CallInstruction call, Function func, FunctionInput modelIn, OutParameterDeref modelMidOut,
127135
int indexMid, InParameter modelMidIn, OutReturnValue modelOut
128136
|
129-
instrIn = callInput(call, modelIn) and
130-
instrOut = callOutput(call, modelOut) and
137+
nodeIn = callInput(call, modelIn) and
138+
nodeOut.asInstruction() = callOutput(call, modelOut) and
131139
call.getStaticCallTarget() = func and
132140
func.(TaintFunction).hasTaintFlow(modelIn, modelMidOut) and
133141
func.(DataFlowFunction).hasDataFlow(modelMidIn, modelOut) and

0 commit comments

Comments
 (0)