Skip to content

Commit 757ec97

Browse files
authored
Merge pull request #1251 from zlaski-semmle/zlaski/cpp370
[CPP-370] Non-constant `format` arguments to `printf` and friends
2 parents a4fa298 + bc98a80 commit 757ec97

File tree

10 files changed

+764
-684
lines changed

10 files changed

+764
-684
lines changed

cpp/ql/src/Likely Bugs/Format/NonConstantFormat.qhelp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
<qhelp>
55
<overview>
66
<p>The <code>printf</code> function, related functions like <code>sprintf</code> and <code>fprintf</code>,
7-
and other functions built atop <code>vprintf</code> all accept a format string as their first argument.
7+
and other functions built atop <code>vprintf</code> all accept a format string as one of their arguments.
88
When such format strings are literal constants, it is easy for the programmer (and static analysis tools)
99
to verify that the format specifiers (such as <code>%s</code> and <code>%02x</code>) in the format string
1010
are compatible with the trailing arguments of the function call. When such format strings are not literal

cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql

Lines changed: 90 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -13,48 +13,37 @@
1313
* security
1414
* external/cwe/cwe-134
1515
*/
16-
import cpp
1716

18-
/**
19-
* Holds if `t` is a character array or pointer, where `charType` is the type of
20-
* characters in `t`.
21-
*/
22-
predicate stringType(Type t, Type charType) {
23-
(
24-
( charType = t.(PointerType).getBaseType() or
25-
charType = t.(ArrayType).getBaseType()
26-
) and (
27-
charType.getUnspecifiedType() instanceof CharType or
28-
charType.getUnspecifiedType() instanceof Wchar_t
29-
)
30-
)
31-
or
32-
stringType(t.getUnderlyingType(), charType)
33-
or
34-
stringType(t.(SpecifiedType).getBaseType(), charType)
35-
}
17+
import semmle.code.cpp.dataflow.TaintTracking
18+
import semmle.code.cpp.commons.Printf
3619

37-
predicate gettextFunction(Function f, int arg) {
20+
// For the following `...gettext` functions, we assume that
21+
// all translations preserve the type and order of `%` specifiers
22+
// (and hence are safe to use as format strings). This
23+
// assumption is hard-coded into the query.
24+
predicate whitelistFunction(Function f, int arg) {
3825
// basic variations of gettext
39-
(f.getName() = "_" and arg = 0) or
40-
(f.getName() = "gettext" and arg = 0) or
41-
(f.getName() = "dgettext" and arg = 1) or
42-
(f.getName() = "dcgettext" and arg = 1) or
26+
f.getName() = "_" and arg = 0
27+
or
28+
f.getName() = "gettext" and arg = 0
29+
or
30+
f.getName() = "dgettext" and arg = 1
31+
or
32+
f.getName() = "dcgettext" and arg = 1
33+
or
4334
// plural variations of gettext that take one format string for singular and another for plural form
44-
(f.getName() = "ngettext" and (arg = 0 or arg = 1)) or
45-
(f.getName() = "dngettext" and (arg = 1 or arg = 2)) or
46-
(f.getName() = "dcngettext" and (arg = 1 or arg = 2))
47-
}
48-
49-
predicate stringArray(Variable arr, AggregateLiteral init) {
50-
arr.getInitializer().getExpr() = init and
51-
stringType(arr.getUnspecifiedType().(ArrayType).getBaseType(), _)
52-
// Ideally, this predicate should also check that no item of `arr` is ever
53-
// reassigned, but such an analysis could get fairly complicated. Instead, we
54-
// just hope that nobody would initialize an array of constants and then
55-
// overwrite some of them with untrusted data.
35+
f.getName() = "ngettext" and
36+
(arg = 0 or arg = 1)
37+
or
38+
f.getName() = "dngettext" and
39+
(arg = 1 or arg = 2)
40+
or
41+
f.getName() = "dcngettext" and
42+
(arg = 1 or arg = 2)
5643
}
5744

45+
// we assume that ALL uses of the `_` macro
46+
// return constant string literals
5847
predicate underscoreMacro(Expr e) {
5948
exists(MacroInvocation mi |
6049
mi.getMacroName() = "_" and
@@ -63,156 +52,91 @@ predicate underscoreMacro(Expr e) {
6352
}
6453

6554
/**
66-
* Holds if a value of character pointer type may flow _directly_ from `src` to
67-
* `dst`.
55+
* Holds if `t` cannot hold a character array, directly or indirectly.
6856
*/
69-
predicate stringFlowStep(Expr src, Expr dst) {
70-
not underscoreMacro(dst)
71-
and
72-
stringType(dst.getType(), _)
73-
and
74-
(
75-
src = dst.(VariableAccess).getTarget().getAnAssignedValue()
57+
predicate cannotContainString(Type t) {
58+
t.getUnspecifiedType() instanceof BuiltInType
59+
or
60+
t.getUnspecifiedType() instanceof IntegralOrEnumType
61+
}
62+
63+
predicate isNonConst(DataFlow::Node node) {
64+
exists(Expr e | e = node.asExpr() |
65+
exists(FunctionCall fc | fc = e.(FunctionCall) |
66+
not (
67+
whitelistFunction(fc.getTarget(), _) or
68+
fc.getTarget().hasDefinition()
69+
)
70+
)
7671
or
77-
src = dst.(ConditionalExpr).getThen()
72+
exists(Parameter p | p = e.(VariableAccess).getTarget().(Parameter) |
73+
p.getFunction().getName() = "main" and p.getType() instanceof PointerType
74+
)
7875
or
79-
src = dst.(ConditionalExpr).getElse()
76+
e instanceof CrementOperation
8077
or
81-
src = dst.(AssignExpr).getRValue()
78+
e instanceof AddressOfExpr
8279
or
83-
src = dst.(CommaExpr).getRightOperand()
80+
e instanceof ReferenceToExpr
8481
or
85-
src = dst.(UnaryPlusExpr).getOperand()
82+
e instanceof AssignPointerAddExpr
8683
or
87-
stringArray(dst.(ArrayExpr).getArrayBase().(VariableAccess).getTarget(),
88-
src.getParent())
84+
e instanceof AssignPointerSubExpr
8985
or
90-
// Follow a parameter to its arguments in all callers.
91-
exists(Parameter p | p = dst.(VariableAccess).getTarget() |
92-
src = p.getFunction().getACallToThisFunction().getArgument(p.getIndex())
93-
)
86+
e instanceof PointerArithmeticOperation
9487
or
95-
// Follow a call to a gettext function without looking at its body even if
96-
// the body is known. This ensures that we report the error in the relevant
97-
// location rather than inside the body of some `_` function.
98-
exists(Function gettext, FunctionCall call, int idx |
99-
gettextFunction(gettext, idx) and
100-
dst = call and
101-
gettext = call.getTarget()
102-
| src = call.getArgument(idx)
103-
)
88+
e instanceof FieldAccess
10489
or
105-
// Follow a call to a non-gettext function through its return statements.
106-
exists(Function f, ReturnStmt retStmt |
107-
f = dst.(FunctionCall).getTarget() and
108-
f = retStmt.getEnclosingFunction() and
109-
not gettextFunction(f, _)
110-
| src = retStmt.getExpr()
90+
e instanceof PointerDereferenceExpr
91+
or
92+
e instanceof AddressOfExpr
93+
or
94+
e instanceof ExprCall
95+
or
96+
e instanceof NewArrayExpr
97+
or
98+
e instanceof AssignExpr
99+
or
100+
exists(Variable v | v = e.(VariableAccess).getTarget() |
101+
v.getType().(ArrayType).getBaseType() instanceof CharType and
102+
exists(AssignExpr ae |
103+
ae.getLValue().(ArrayExpr).getArrayBase().(VariableAccess).getTarget() = v
104+
)
111105
)
112106
)
113-
}
114-
115-
/** Holds if `v` may be written to, other than through `AssignExpr`. */
116-
predicate nonConstVariable(Variable v) {
117-
exists(Type charType |
118-
stringType(v.getType(), charType) and not charType.isConst()
119-
)
120-
or
121-
exists(AssignPointerAddExpr e |
122-
e.getLValue().(VariableAccess).getTarget() = v
123-
)
124-
or
125-
exists(AssignPointerSubExpr e |
126-
e.getLValue().(VariableAccess).getTarget() = v
127-
)
128-
or
129-
exists(CrementOperation e | e.getOperand().(VariableAccess).getTarget() = v)
130107
or
131-
exists(AddressOfExpr e | e.getOperand().(VariableAccess).getTarget() = v)
132-
or
133-
exists(ReferenceToExpr e | e.getExpr().(VariableAccess).getTarget() = v)
108+
node instanceof DataFlow::DefinitionByReferenceNode
134109
}
135110

136-
/**
137-
* Holds if this expression is _directly_ considered non-constant for the
138-
* purpose of this query.
139-
*
140-
* Each case of `Expr` that may have string type should be listed either in
141-
* `nonConst` or `stringFlowStep`. Omitting a case leads to false negatives.
142-
* Having a case in both places leads to unnecessary computation.
143-
*/
144-
predicate nonConst(Expr e) {
145-
nonConstVariable(e.(VariableAccess).getTarget())
146-
or
147-
e instanceof CrementOperation
148-
or
149-
e instanceof AssignPointerAddExpr
150-
or
151-
e instanceof AssignPointerSubExpr
152-
or
153-
e instanceof PointerArithmeticOperation
154-
or
155-
e instanceof FieldAccess
156-
or
157-
e instanceof PointerDereferenceExpr
111+
pragma[noinline]
112+
predicate isSanitizerNode(DataFlow::Node node) {
113+
underscoreMacro(node.asExpr())
158114
or
159-
e instanceof AddressOfExpr
160-
or
161-
e instanceof ExprCall
162-
or
163-
exists(ArrayExpr ae | ae = e |
164-
not stringArray(ae.getArrayBase().(VariableAccess).getTarget(), _)
165-
)
166-
or
167-
e instanceof NewArrayExpr
168-
or
169-
// e is a call to a function whose definition we cannot see. We assume it is
170-
// not constant.
171-
exists(Function f | f = e.(FunctionCall).getTarget() |
172-
not f.hasDefinition()
173-
)
174-
or
175-
// e is a parameter of a function that's never called. If it were called, we
176-
// would instead have followed the call graph in `stringFlowStep`.
177-
exists(Function f
178-
| f = e.(VariableAccess).getTarget().(Parameter).getFunction()
179-
| not exists(f.getACallToThisFunction())
180-
)
115+
cannotContainString(node.getType())
181116
}
182117

183-
predicate formattingFunctionArgument(
184-
FormattingFunction ff, FormattingFunctionCall fc, Expr arg)
185-
{
186-
fc.getTarget() = ff and
187-
fc.getArgument(ff.getFormatParameterIndex()) = arg and
188-
// Don't look for errors inside functions that are themselves formatting
189-
// functions. We expect that the interesting errors will be in their callers.
190-
not fc.getEnclosingFunction() instanceof FormattingFunction
191-
}
118+
class NonConstFlow extends TaintTracking::Configuration {
119+
NonConstFlow() { this = "NonConstFlow" }
192120

193-
// Reflexive-transitive closure of `stringFlow`, restricting the base case to
194-
// only consider destination expressions that are arguments to formatting
195-
// functions.
196-
predicate stringFlow(Expr src, Expr dst) {
197-
formattingFunctionArgument(_, _, dst) and src = dst
198-
or
199-
exists(Expr mid | stringFlowStep(src, mid) and stringFlow(mid, dst))
200-
}
121+
override predicate isSource(DataFlow::Node source) {
122+
isNonConst(source) and
123+
not cannotContainString(source.getType())
124+
}
201125

202-
predicate whitelisted(Expr e) {
203-
gettextFunction(e.(FunctionCall).getTarget(), _)
204-
or
205-
underscoreMacro(e)
206-
}
126+
override predicate isSink(DataFlow::Node sink) {
127+
exists(FormattingFunctionCall fc | sink.asExpr() = fc.getArgument(fc.getFormatParameterIndex()))
128+
}
207129

208-
predicate flowFromNonConst(Expr src, Expr dst) {
209-
stringFlow(src, dst) and
210-
nonConst(src) and
211-
not whitelisted(src)
130+
override predicate isSanitizer(DataFlow::Node node) { isSanitizerNode(node) }
212131
}
213132

214-
from FormattingFunctionCall fc, FormattingFunction ff, Expr format
215-
where formattingFunctionArgument(ff, fc, format) and
216-
flowFromNonConst(_, format) and
217-
not fc.isInMacroExpansion()
218-
select format, "The format string argument to " + ff.getName() + " should be constant to prevent security issues and other potential errors."
133+
from FormattingFunctionCall call, Expr formatString
134+
where
135+
call.getArgument(call.getFormatParameterIndex()) = formatString and
136+
exists(NonConstFlow cf, DataFlow::Node source, DataFlow::Node sink |
137+
cf.hasFlow(source, sink) and
138+
sink.asExpr() = formatString
139+
)
140+
select formatString,
141+
"The format string argument to " + call.getTarget().getName() +
142+
" should be constant to prevent security issues and other potential errors."

0 commit comments

Comments
 (0)