Skip to content

Commit 75ab311

Browse files
authored
Merge pull request #1223 from geoffw0/commentedoutcode
CPP: Detect commented out preprocessor logic
2 parents c9fbbfe + 13ed50f commit 75ab311

File tree

4 files changed

+138
-91
lines changed

4 files changed

+138
-91
lines changed

change-notes/1.21/analysis-cpp.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
| **Query** | **Expected impact** | **Change** |
1313
|----------------------------|------------------------|------------------------------------------------------------------|
14+
| Commented-out code (`cpp/commented-out-code`) | More correct results | Commented out preprocessor code is now detected by this query. |
1415
| Dead code due to goto or break statement (`cpp/dead-code-goto`) | Fewer false positive results | Functions containing preprocessor logic are now excluded from this analysis. |
1516
| Mismatching new/free or malloc/delete (`cpp/new-free-mismatch`) | Fewer false positive results | Fixed an issue where functions were being identified as allocation functions inappropriately. Also affects `cpp/new-array-delete-mismatch` and `cpp/new-delete-array-mismatch`. |
1617
| Overflow in uncontrolled allocation size (`cpp/uncontrolled-allocation-size`) | More correct results | This query has been reworked so that it can find a wider variety of results. |

cpp/ql/src/Documentation/CommentedOutCode.qll

Lines changed: 104 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -2,45 +2,57 @@ import cpp
22

33
/**
44
* Holds if `line` looks like a line of code.
5-
* Matches comment lines ending with '{', '}' or ';' that do not start with '>' or contain '@{' or '@}', but first filters out:
6-
* * HTML entities in common notation (e.g. > and é)
7-
* * HTML entities in decimal notation (e.g. à)
8-
* * HTML entities in hexadecimal notation (e.g. 灟)
9-
* To account for the code generated by protobuf, we also insist that the comment
10-
* does not begin with `optional` or `repeated` and end with a `;`, which would
11-
* normally be a quoted bit of literal `.proto` specification above the associated
12-
* declaration.
13-
* To account for emacs folding markers, we ignore any line containing
14-
* `{{{` or `}}}`.
15-
*
16-
* Finally, some code tends to embed GUIDs in comments, so we also exclude those.
175
*/
186
bindingset[line]
197
private predicate looksLikeCode(string line) {
20-
exists(string trimmed |
21-
trimmed = line.regexpReplaceAll("(?i)(^\\s+|&#?[a-z0-9]{1,31};|\\s+$)", "") |
22-
trimmed.regexpMatch(".*[{};]")
23-
and (
24-
// If this line looks like code because it ends with a closing
25-
// brace that's preceded by something other than whitespace ...
26-
trimmed.regexpMatch(".*.\\}")
27-
implies
28-
// ... then there has to be ") {" (or some variation)
29-
// on the line, suggesting it's a statement like `if`
30-
// or a function declaration. Otherwise it's likely to be a
31-
// benign use of braces such as a JSON example or explanatory
32-
// pseudocode.
33-
trimmed.regexpMatch(".*(\\)|const|volatile|override|final|noexcept|&)\\s*\\{.*")
34-
)
35-
and not trimmed.regexpMatch("(>.*|.*[\\\\@][{}].*|(optional|repeated) .*;|.*(\\{\\{\\{|\\}\\}\\}).*|\\{[-0-9a-zA-Z]+\\})"))
8+
exists(string trimmed |
9+
// trim leading and trailing whitespace, and HTML codes:
10+
// * HTML entities in common notation (e.g. > and é)
11+
// * HTML entities in decimal notation (e.g. à)
12+
// * HTML entities in hexadecimal notation (e.g. 灟)
13+
trimmed = line.regexpReplaceAll("(?i)(^\\s+|&#?[a-z0-9]{1,31};|\\s+$)", "")
14+
|
15+
(
16+
(
17+
// Match comment lines ending with '{', '}' or ';'
18+
trimmed.regexpMatch(".*[{};]") and
19+
(
20+
// If this line looks like code because it ends with a closing
21+
// brace that's preceded by something other than whitespace ...
22+
trimmed.regexpMatch(".*.\\}")
23+
implies
24+
// ... then there has to be ") {" (or some variation)
25+
// on the line, suggesting it's a statement like `if`
26+
// or a function definition. Otherwise it's likely to be a
27+
// benign use of braces such as a JSON example or explanatory
28+
// pseudocode.
29+
trimmed.regexpMatch(".*(\\)|const|volatile|override|final|noexcept|&)\\s*\\{.*")
30+
)
31+
) or (
32+
// Match comment lines that look like preprocessor code
33+
trimmed.regexpMatch("#\\s*(include|define|undef|if|ifdef|ifndef|elif|else|endif|error|pragma)\\b.*")
34+
)
35+
) and (
36+
// Exclude lines that start with '>' or contain '@{' or '@}'.
37+
// To account for the code generated by protobuf, we also insist that the comment
38+
// does not begin with `optional` or `repeated` and end with a `;`, which would
39+
// normally be a quoted bit of literal `.proto` specification above the associated
40+
// declaration.
41+
// To account for emacs folding markers, we ignore any line containing
42+
// `{{{` or `}}}`.
43+
// Finally, some code tends to embed GUIDs in comments, so we also exclude those.
44+
not trimmed
45+
.regexpMatch("(>.*|.*[\\\\@][{}].*|(optional|repeated) .*;|.*(\\{\\{\\{|\\}\\}\\}).*|\\{[-0-9a-zA-Z]+\\})")
46+
)
47+
)
3648
}
3749

3850
/**
3951
* The line of a C++-style comment within its file `f`.
4052
*/
4153
private int lineInFile(CppStyleComment c, File f) {
42-
f = c.getFile() and
43-
result = c.getLocation().getStartLine()
54+
f = c.getFile() and
55+
result = c.getLocation().getStartLine()
4456
}
4557

4658
/**
@@ -53,11 +65,12 @@ private int lineInFile(CppStyleComment c, File f) {
5365
* without line comments would increase the line number without increasing
5466
* the rank and thus force a change of block ID.
5567
*/
56-
private pragma[nomagic] int commentLineBlockID(File f, int line) {
57-
exists(int lineRank |
58-
line = rank[lineRank](lineInFile(_, f)) and
59-
result = line - lineRank
60-
)
68+
pragma[nomagic]
69+
private int commentLineBlockID(File f, int line) {
70+
exists(int lineRank |
71+
line = rank[lineRank](lineInFile(_, f)) and
72+
result = line - lineRank
73+
)
6174
}
6275

6376
/**
@@ -67,80 +80,80 @@ private pragma[nomagic] int commentLineBlockID(File f, int line) {
6780
* for separate runs.
6881
*/
6982
private int commentId(CppStyleComment c, File f, int line) {
70-
result = commentLineBlockID(f, line) and
71-
line = lineInFile(c, f)
83+
result = commentLineBlockID(f, line) and
84+
line = lineInFile(c, f)
7285
}
7386

7487
/**
7588
* A contiguous block of comments.
7689
*/
7790
class CommentBlock extends Comment {
78-
CommentBlock() {
79-
this instanceof CppStyleComment implies
80-
not exists(CppStyleComment pred, File f | lineInFile(pred, f) + 1 = lineInFile(this, f))
81-
}
91+
CommentBlock() {
92+
this instanceof CppStyleComment
93+
implies
94+
not exists(CppStyleComment pred, File f | lineInFile(pred, f) + 1 = lineInFile(this, f))
95+
}
8296

83-
/**
84-
* Gets the `i`th comment associated with this comment block.
85-
*/
86-
Comment getComment(int i) {
87-
(i = 0 and result = this) or
88-
exists(File f, int thisLine, int resultLine |
89-
commentId(this, f, thisLine) = commentId(result, f, resultLine) |
90-
i = resultLine - thisLine
91-
)
92-
}
97+
/**
98+
* Gets the `i`th comment associated with this comment block.
99+
*/
100+
Comment getComment(int i) {
101+
i = 0 and result = this
102+
or
103+
exists(File f, int thisLine, int resultLine |
104+
commentId(this, f, thisLine) = commentId(result, f, resultLine)
105+
|
106+
i = resultLine - thisLine
107+
)
108+
}
93109

94-
Comment lastComment() {
95-
result = this.getComment(max(int i | exists(this.getComment(i))))
96-
}
110+
Comment lastComment() { result = this.getComment(max(int i | exists(this.getComment(i)))) }
97111

98-
string getLine(int i) {
99-
this instanceof CStyleComment and result = this.getContents().regexpCapture("(?s)/\\*+(.*)\\*+/", 1).splitAt("\n", i)
100-
or
101-
this instanceof CppStyleComment and result = this.getComment(i).getContents().suffix(2)
102-
}
112+
string getLine(int i) {
113+
this instanceof CStyleComment and
114+
result = this.getContents().regexpCapture("(?s)/\\*+(.*)\\*+/", 1).splitAt("\n", i)
115+
or
116+
this instanceof CppStyleComment and result = this.getComment(i).getContents().suffix(2)
117+
}
103118

104-
int numLines() {
105-
result = strictcount(int i, string line | line = this.getLine(i) and line.trim() != "")
106-
}
119+
int numLines() {
120+
result = strictcount(int i, string line | line = this.getLine(i) and line.trim() != "")
121+
}
107122

108-
int numCodeLines() {
109-
result = strictcount(int i, string line | line = this.getLine(i) and looksLikeCode(line))
110-
}
123+
int numCodeLines() {
124+
result = strictcount(int i, string line | line = this.getLine(i) and looksLikeCode(line))
125+
}
111126

112-
predicate isDocumentation() {
113-
// If a C-style comment starts each line with a *, then it's
114-
// probably documentation rather than code.
115-
this instanceof CStyleComment and
116-
forex(int i | i in [1 .. this.numLines() - 1]
117-
| this.getLine(i).trim().matches("*%"))
118-
}
127+
predicate isDocumentation() {
128+
// If a C-style comment starts each line with a *, then it's
129+
// probably documentation rather than code.
130+
this instanceof CStyleComment and
131+
forex(int i | i in [1 .. this.numLines() - 1] | this.getLine(i).trim().matches("*%"))
132+
}
119133

120-
predicate isCommentedOutCode() {
121-
not this.isDocumentation() and
122-
this.numCodeLines().(float) / this.numLines().(float) > 0.5
123-
}
134+
predicate isCommentedOutCode() {
135+
not this.isDocumentation() and
136+
this.numCodeLines().(float) / this.numLines().(float) > 0.5
137+
}
124138

125-
/**
126-
* Holds if this element is at the specified location.
127-
* The location spans column `startcolumn` of line `startline` to
128-
* column `endcolumn` of line `endline` in file `filepath`.
129-
* For more information, see
130-
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
131-
*/
132-
predicate hasLocationInfo(string filepath, int startline, int startcolumn, int endline, int endcolumn) {
133-
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, _, _) and
134-
this.lastComment().getLocation().hasLocationInfo(_, _, _, endline, endcolumn)
135-
}
139+
/**
140+
* Holds if this element is at the specified location.
141+
* The location spans column `startcolumn` of line `startline` to
142+
* column `endcolumn` of line `endline` in file `filepath`.
143+
* For more information, see
144+
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
145+
*/
146+
predicate hasLocationInfo(
147+
string filepath, int startline, int startcolumn, int endline, int endcolumn
148+
) {
149+
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, _, _) and
150+
this.lastComment().getLocation().hasLocationInfo(_, _, _, endline, endcolumn)
151+
}
136152
}
137153

138154
/**
139155
* A piece of commented-out code, identified using heuristics
140156
*/
141157
class CommentedOutCode extends CommentBlock {
142-
CommentedOutCode() {
143-
this.isCommentedOutCode()
144-
}
158+
CommentedOutCode() { this.isCommentedOutCode() }
145159
}
146-

cpp/ql/test/query-tests/Documentation/CommentedOutCode/CommentedOutCode.expected

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
| test2.cpp:37:1:37:39 | // int myFunction() { return myValue; } | This comment appears to contain commented-out code |
22
| test2.cpp:39:1:39:45 | // int myFunction() const { return myValue; } | This comment appears to contain commented-out code |
33
| test2.cpp:41:1:41:54 | // int myFunction() const noexcept { return myValue; } | This comment appears to contain commented-out code |
4+
| test2.cpp:43:1:43:18 | // #define MYMACRO | This comment appears to contain commented-out code |
5+
| test2.cpp:45:1:45:23 | // #include "include.h" | This comment appears to contain commented-out code |
6+
| test2.cpp:47:1:51:2 | /*\n#ifdef\nvoid myFunction();\n#endif\n*/ | This comment appears to contain commented-out code |
7+
| test2.cpp:59:1:59:24 | // #if(defined(MYMACRO)) | This comment appears to contain commented-out code |
8+
| test2.cpp:63:1:63:15 | // #pragma once | This comment appears to contain commented-out code |
9+
| test2.cpp:65:1:65:17 | // # pragma once | This comment appears to contain commented-out code |
10+
| test2.cpp:67:1:67:19 | /*#error"myerror"*/ | This comment appears to contain commented-out code |
411
| test.c:2:1:2:22 | // commented out code; | This comment appears to contain commented-out code |
512
| test.c:4:1:7:8 | // some; | This comment appears to contain commented-out code |
613
| test.c:9:1:13:8 | // also; | This comment appears to contain commented-out code |

cpp/ql/test/query-tests/Documentation/CommentedOutCode/test2.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,29 @@
3939
// int myFunction() const { return myValue; }
4040

4141
// int myFunction() const noexcept { return myValue; }
42+
43+
// #define MYMACRO
44+
45+
// #include "include.h"
46+
47+
/*
48+
#ifdef
49+
void myFunction();
50+
#endif
51+
*/
52+
53+
// define some constants
54+
55+
// don't #include anything here
56+
57+
// #hashtag
58+
59+
// #if(defined(MYMACRO))
60+
61+
// #iffy
62+
63+
// #pragma once
64+
65+
// # pragma once
66+
67+
/*#error"myerror"*/

0 commit comments

Comments
 (0)