Skip to content

Commit aaae5be

Browse files
committed
C++: Add addresses to Expr.isConstant
Before this change, `Expr.isConstant` only was only true for those constant expressions that could be represented as QL values: numbers, Booleans, and string literals. It was not true for string literals converted from arrays to pointers, and it was not true for addresses of variables with static lifetime. The concept of a "constant expression" varies between C and C++ and between versions of the standard, but they all include addresses of data with static lifetime. These are modelled by the new library `AddressConstantExpression.qll`, which is based on the code in `EscapesTree.qll` and modified for its new purpose. I've tested the change for performance on Wireshark and for correctness with the included tests. I've also checked on Wireshark that all static initializers in C files are considered constant, which was not the case before.
1 parent 371c09d commit aaae5be

File tree

5 files changed

+300
-1
lines changed

5 files changed

+300
-1
lines changed

cpp/ql/src/semmle/code/cpp/exprs/Expr.qll

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import semmle.code.cpp.Element
33
private import semmle.code.cpp.Enclosing
44
private import semmle.code.cpp.internal.ResolveClass
5+
private import semmle.code.cpp.internal.AddressConstantExpression
56

67
/**
78
* A C/C++ expression.
@@ -84,7 +85,12 @@ class Expr extends StmtParent, @expr {
8485
string getValueText() { exists(@value v | values(v,_,result) and valuebind(v,underlyingElement(this))) }
8586

8687
/** Holds if this expression has a value that can be determined at compile time. */
87-
predicate isConstant() { valuebind(_,underlyingElement(this)) }
88+
cached
89+
predicate isConstant() {
90+
valuebind(_,underlyingElement(this))
91+
or
92+
addressConstantExpression(this)
93+
}
8894

8995
/**
9096
* Holds if this expression is side-effect free (conservative
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
private import cpp
2+
private import semmle.code.cpp.dataflow.EscapesTree
3+
4+
predicate addressConstantExpression(Expr e) {
5+
constantAddressPointer(e)
6+
or
7+
constantAddressReference(e)
8+
or
9+
// Special case for function pointers, where `fp == *fp`.
10+
constantAddressLValue(e) and
11+
e.getType() instanceof FunctionPointerType
12+
}
13+
14+
/** Holds if `v` is a constexpr variable initialized to a constant address. */
15+
private predicate addressConstantVariable(Variable v) {
16+
addressConstantExpression(v.getInitializer().getExpr().getFullyConverted()) and
17+
// Here we should also require that `v` is constexpr, but we don't have that
18+
// information in the db. See CPP-314. Instead, we require that the variable
19+
// is not assigned to.
20+
not exists(VariableAccess va | va.getTarget() = v |
21+
// `v` may be assigned to, completely or partially
22+
exists(Expr lvalue | variableAccessedAsValue(va, lvalue) |
23+
lvalue = any(Assignment a).getLValue().getFullyConverted()
24+
or
25+
lvalue = any(CrementOperation c).getOperand().getFullyConverted()
26+
)
27+
)
28+
}
29+
30+
/**
31+
* Holds if `lvalue` is an lvalue whose address is an _address constant
32+
* expression_.
33+
*/
34+
private predicate constantAddressLValue(Expr lvalue) {
35+
lvalue.(VariableAccess).getTarget() = any(Variable v |
36+
v.(Variable).isStatic()
37+
or
38+
v instanceof GlobalOrNamespaceVariable
39+
)
40+
or
41+
// There is no `Conversion` for the implicit conversion from a function type
42+
// to a function _pointer_ type. Instead, the type of a `FunctionAccess`
43+
// tells us how it's going to be used.
44+
lvalue.(FunctionAccess).getType() instanceof RoutineType
45+
or
46+
// String literals have array types and undergo array-to-pointer conversion.
47+
lvalue instanceof StringLiteral
48+
or
49+
// lvalue -> lvalue
50+
exists(Expr prev |
51+
constantAddressLValue(prev) and
52+
lvalueToLvalueStep(prev, lvalue)
53+
)
54+
or
55+
// pointer -> lvalue
56+
exists(Expr prev |
57+
constantAddressPointer(prev) and
58+
pointerToLvalueStep(prev, lvalue)
59+
)
60+
or
61+
// reference -> lvalue
62+
exists(Expr prev |
63+
constantAddressReference(prev) and
64+
referenceToLvalueStep(prev, lvalue)
65+
)
66+
}
67+
68+
/** Holds if `pointer` is an _address constant expression_ of pointer type. */
69+
private predicate constantAddressPointer(Expr pointer) {
70+
// There is no `Conversion` for the implicit conversion from a function type
71+
// to a function _pointer_ type. Instead, the type of a `FunctionAccess`
72+
// tells us how it's going to be used.
73+
pointer.(FunctionAccess).getType() instanceof FunctionPointerType
74+
or
75+
addressConstantVariable(pointer.(VariableAccess).getTarget()) and
76+
pointer.getType().getUnderlyingType() instanceof PointerType
77+
or
78+
// pointer -> pointer
79+
exists(Expr prev |
80+
constantAddressPointer(prev) and
81+
pointerToPointerStep(prev, pointer)
82+
)
83+
or
84+
// lvalue -> pointer
85+
exists(Expr prev |
86+
constantAddressLValue(prev) and
87+
lvalueToPointerStep(prev, pointer)
88+
)
89+
}
90+
91+
/** Holds if `reference` is an _address constant expression_ of reference type. */
92+
private predicate constantAddressReference(Expr reference) {
93+
addressConstantVariable(reference.(VariableAccess).getTarget()) and
94+
reference.getType().getUnderlyingType() instanceof ReferenceType
95+
or
96+
addressConstantVariable(reference.(VariableAccess).getTarget()) and
97+
reference.getType().getUnderlyingType() instanceof FunctionReferenceType // not a ReferenceType
98+
or
99+
// reference -> reference
100+
exists(Expr prev |
101+
constantAddressReference(prev) and
102+
referenceToReferenceStep(prev, reference)
103+
)
104+
or
105+
// lvalue -> reference
106+
exists(Expr prev |
107+
constantAddressLValue(prev) and
108+
lvalueToReferenceStep(prev, reference)
109+
)
110+
}
111+
112+
private predicate lvalueToLvalueStep(Expr lvalueIn, Expr lvalueOut) {
113+
lvalueIn = lvalueOut.(DotFieldAccess).getQualifier().getFullyConverted()
114+
or
115+
lvalueIn.getConversion() = lvalueOut.(ParenthesisExpr)
116+
or
117+
// Special case for function pointers, where `fp == *fp`.
118+
lvalueIn = lvalueOut.(PointerDereferenceExpr).getOperand().getFullyConverted() and
119+
lvalueIn.getType() instanceof FunctionPointerType
120+
}
121+
122+
private predicate pointerToLvalueStep(Expr pointerIn, Expr lvalueOut) {
123+
lvalueOut = any(ArrayExpr ae |
124+
pointerIn = ae.getArrayBase().getFullyConverted() and
125+
hasConstantValue(ae.getArrayOffset().getFullyConverted())
126+
)
127+
or
128+
pointerIn = lvalueOut.(PointerDereferenceExpr).getOperand().getFullyConverted()
129+
or
130+
pointerIn = lvalueOut.(PointerFieldAccess).getQualifier().getFullyConverted()
131+
}
132+
133+
private predicate lvalueToPointerStep(Expr lvalueIn, Expr pointerOut) {
134+
lvalueIn.getConversion() = pointerOut.(ArrayToPointerConversion)
135+
or
136+
lvalueIn = pointerOut.(AddressOfExpr).getOperand().getFullyConverted()
137+
}
138+
139+
private predicate pointerToPointerStep(Expr pointerIn, Expr pointerOut) {
140+
(
141+
pointerOut instanceof PointerAddExpr
142+
or
143+
pointerOut instanceof PointerSubExpr
144+
) and
145+
pointerIn = pointerOut.getAChild().getFullyConverted() and
146+
pointerIn.getType().getUnspecifiedType() instanceof PointerType and
147+
// The pointer arg won't be constant in the sense of `hasConstantValue`, so
148+
// this will have to match the integer argument.
149+
hasConstantValue(pointerOut.getAChild().getFullyConverted())
150+
or
151+
pointerIn = pointerOut.(UnaryPlusExpr).getOperand().getFullyConverted()
152+
or
153+
pointerIn.getConversion() = pointerOut.(Cast)
154+
or
155+
pointerIn.getConversion() = pointerOut.(ParenthesisExpr)
156+
or
157+
pointerOut = any(ConditionalExpr cond |
158+
cond.getCondition().getFullyConverted().getValue().toInt() != 0 and
159+
pointerIn = cond.getThen().getFullyConverted()
160+
or
161+
cond.getCondition().getFullyConverted().getValue().toInt() = 0 and
162+
pointerIn = cond.getElse().getFullyConverted()
163+
)
164+
or
165+
// The comma operator is allowed by C++17 but disallowed by C99. This
166+
// disjunct is a compromise that's chosen for being easy to implement.
167+
pointerOut = any(CommaExpr comma |
168+
hasConstantValue(comma.getLeftOperand()) and
169+
pointerIn = comma.getRightOperand().getFullyConverted()
170+
)
171+
}
172+
173+
private predicate lvalueToReferenceStep(Expr lvalueIn, Expr referenceOut) {
174+
lvalueIn.getConversion() = referenceOut.(ReferenceToExpr)
175+
}
176+
177+
private predicate referenceToLvalueStep(Expr referenceIn, Expr lvalueOut) {
178+
// This probably cannot happen. It would require an expression to be
179+
// converted to a reference and back again without an intermediate variable
180+
// assignment.
181+
referenceIn.getConversion() = lvalueOut.(ReferenceDereferenceExpr)
182+
}
183+
184+
private predicate referenceToReferenceStep(Expr referenceIn, Expr referenceOut) {
185+
referenceIn.getConversion() = referenceOut.(Cast)
186+
or
187+
referenceIn.getConversion() = referenceOut.(ParenthesisExpr)
188+
}
189+
190+
/** Holds if `e` is constant according to the database. */
191+
private predicate hasConstantValue(Expr e) { valuebind(_, underlyingElement(e)) }
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// semmle-extractor-options: --c++14
2+
3+
const int int_const = 1;
4+
extern const int extern_int_const = 1;
5+
extern const int extern_int_const_noinit;
6+
7+
int int_var;
8+
int int_arr[4];
9+
int int_arr_arr[4][4];
10+
11+
int *const const_ptr = &int_var;
12+
13+
void sideEffect();
14+
using fptr = void (*)();
15+
using fref = void (&)();
16+
17+
18+
19+
// All variables in this function are initialized to constants, as witnessed by
20+
// the `constexpr` annotation that compiles under C++14.
21+
void constantAddresses(int param) {
22+
constexpr int *ptr_int = &int_var;
23+
constexpr int *ptr_deref_chain = &*&int_var;
24+
constexpr int *ptr_array = &int_arr[1] + 1;
25+
constexpr int (*ptr_to_array)[4] = &int_arr_arr[1] + 1;
26+
constexpr int *array2d = &int_arr_arr[1][1] + 1;
27+
constexpr int *const_ints = &int_arr_arr[int_const][extern_int_const];
28+
29+
// Commented out because clang and EDG disagree on whether this is
30+
// constant.
31+
//constexpr int *stmtexpr_int = &int_arr[ ({ 1; }) ];
32+
33+
constexpr int *comma_int = &int_arr[ ((void)0, 1) ];
34+
constexpr int *comma_addr = ((void)0, &int_var);
35+
constexpr int *ternary_true = int_const ? &int_var : &param;
36+
constexpr int *ternary_false = !int_const ? &param : &int_var;
37+
constexpr int *ternary_overflow = (unsigned char)256 ? &param : &int_var;
38+
constexpr int *ternary_ptr_cond = (&int_arr+1) ? &int_var : &param;;
39+
constexpr int *ptr_subtract = &int_arr[&int_arr[1] - &int_arr[0]];
40+
41+
constexpr int *constexpr_va = ptr_subtract + 1;
42+
43+
constexpr int &ref_int = int_var;
44+
constexpr int &ref_array2d = int_arr_arr[1][1];
45+
constexpr int &ref_va = ref_array2d;
46+
constexpr int &ref_va_arith = *(&ref_array2d + 1);
47+
48+
constexpr fptr fp_implicit = sideEffect;
49+
constexpr fptr fp_explicit = &sideEffect;
50+
constexpr fptr fp_chain_addressof = &**&**sideEffect;
51+
constexpr fptr fp_chain_deref = **&**&**sideEffect;
52+
constexpr fptr fp_shortchain_deref = *&sideEffect;
53+
54+
constexpr fref fr_int = sideEffect;
55+
constexpr fref fr_deref = *&sideEffect;
56+
constexpr fref fr_2deref = **sideEffect;
57+
constexpr fref fr_va = fr_int;
58+
59+
constexpr const char *char_ptr = "str";
60+
constexpr const char *char_ptr_1 = "str" + 1;
61+
constexpr char char_arr[] = "str";
62+
}
63+
64+
65+
66+
67+
// All variables in this function are initialized to non-const values. Writing
68+
// `constexpr` in front of any of the variables will be a compile error
69+
// (C++14).
70+
void nonConstantAddresses(const int param, int *const pparam, int &rparam, fref frparam) {
71+
int *int_param = &int_arr[param];
72+
int *int_noinit = &int_arr[extern_int_const_noinit];
73+
74+
int *side_effect_stmtexpr = &int_arr[ ({ sideEffect(); 1; }) ];
75+
int *side_effect_comma_int = &int_arr[ (sideEffect(), 1) ];
76+
int *side_effect_comma_addr = (sideEffect(), &int_var);
77+
int *side_effect_comma_addr2 = ((void)(sideEffect(), 1), &int_var);
78+
int *ternary_int = &int_arr[int_const ? param : 1];
79+
const int *ternary_addr = int_const ? &param : &int_var;
80+
int *va_non_constexpr = pparam;
81+
82+
int *&&ref_to_temporary = &int_var; // reference to temporary is not a const
83+
int &ref_param = rparam;
84+
85+
fref fr_param = frparam;
86+
}

cpp/ql/test/library-tests/constants/addresses/addresses.expected

Whitespace-only changes.
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import cpp
2+
3+
from Expr e, string msg, LocalVariable var, Function f
4+
where
5+
e = var.getInitializer().getExpr().getFullyConverted() and
6+
var.getFunction() = f and
7+
(
8+
e.isConstant() and
9+
f.getName() = "nonConstantAddresses" and
10+
msg = "misclassified as constant"
11+
or
12+
not e.isConstant() and
13+
f.getName() = "constantAddresses" and
14+
msg = "misclassified as NOT constant"
15+
)
16+
select e, var.getName(), msg

0 commit comments

Comments
 (0)