@@ -2,45 +2,57 @@ import cpp
22
33/**
44 * Holds if `line` looks like a line of code.
5- * Matches comment lines ending with '{', '}' or ';' that do not start with '>' or contain '@{' or '@}', but first filters out:
6- * * HTML entities in common notation (e.g. > and é)
7- * * HTML entities in decimal notation (e.g. à)
8- * * HTML entities in hexadecimal notation (e.g. 灟)
9- * To account for the code generated by protobuf, we also insist that the comment
10- * does not begin with `optional` or `repeated` and end with a `;`, which would
11- * normally be a quoted bit of literal `.proto` specification above the associated
12- * declaration.
13- * To account for emacs folding markers, we ignore any line containing
14- * `{{{` or `}}}`.
15- *
16- * Finally, some code tends to embed GUIDs in comments, so we also exclude those.
175 */
186bindingset [ line]
197private predicate looksLikeCode ( string line ) {
20- exists ( string trimmed |
21- trimmed = line .regexpReplaceAll ( "(?i)(^\\s+|&#?[a-z0-9]{1,31};|\\s+$)" , "" ) |
22- trimmed .regexpMatch ( ".*[{};]" )
23- and (
24- // If this line looks like code because it ends with a closing
25- // brace that's preceded by something other than whitespace ...
26- trimmed .regexpMatch ( ".*.\\}" )
27- implies
28- // ... then there has to be ") {" (or some variation)
29- // on the line, suggesting it's a statement like `if`
30- // or a function declaration. Otherwise it's likely to be a
31- // benign use of braces such as a JSON example or explanatory
32- // pseudocode.
33- trimmed .regexpMatch ( ".*(\\)|const|volatile|override|final|noexcept|&)\\s*\\{.*" )
34- )
35- and not trimmed .regexpMatch ( "(>.*|.*[\\\\@][{}].*|(optional|repeated) .*;|.*(\\{\\{\\{|\\}\\}\\}).*|\\{[-0-9a-zA-Z]+\\})" ) )
8+ exists ( string trimmed |
9+ // trim leading and trailing whitespace, and HTML codes:
10+ // * HTML entities in common notation (e.g. > and é)
11+ // * HTML entities in decimal notation (e.g. à)
12+ // * HTML entities in hexadecimal notation (e.g. 灟)
13+ trimmed = line .regexpReplaceAll ( "(?i)(^\\s+|&#?[a-z0-9]{1,31};|\\s+$)" , "" )
14+ |
15+ (
16+ (
17+ // Match comment lines ending with '{', '}' or ';'
18+ trimmed .regexpMatch ( ".*[{};]" ) and
19+ (
20+ // If this line looks like code because it ends with a closing
21+ // brace that's preceded by something other than whitespace ...
22+ trimmed .regexpMatch ( ".*.\\}" )
23+ implies
24+ // ... then there has to be ") {" (or some variation)
25+ // on the line, suggesting it's a statement like `if`
26+ // or a function definition. Otherwise it's likely to be a
27+ // benign use of braces such as a JSON example or explanatory
28+ // pseudocode.
29+ trimmed .regexpMatch ( ".*(\\)|const|volatile|override|final|noexcept|&)\\s*\\{.*" )
30+ )
31+ ) or (
32+ // Match comment lines that look like preprocessor code
33+ trimmed .regexpMatch ( "#\\s*(include|define|undef|if|ifdef|ifndef|elif|else|endif|error|pragma)\\b.*" )
34+ )
35+ ) and (
36+ // Exclude lines that start with '>' or contain '@{' or '@}'.
37+ // To account for the code generated by protobuf, we also insist that the comment
38+ // does not begin with `optional` or `repeated` and end with a `;`, which would
39+ // normally be a quoted bit of literal `.proto` specification above the associated
40+ // declaration.
41+ // To account for emacs folding markers, we ignore any line containing
42+ // `{{{` or `}}}`.
43+ // Finally, some code tends to embed GUIDs in comments, so we also exclude those.
44+ not trimmed
45+ .regexpMatch ( "(>.*|.*[\\\\@][{}].*|(optional|repeated) .*;|.*(\\{\\{\\{|\\}\\}\\}).*|\\{[-0-9a-zA-Z]+\\})" )
46+ )
47+ )
3648}
3749
3850/**
3951 * The line of a C++-style comment within its file `f`.
4052 */
4153private int lineInFile ( CppStyleComment c , File f ) {
42- f = c .getFile ( ) and
43- result = c .getLocation ( ) .getStartLine ( )
54+ f = c .getFile ( ) and
55+ result = c .getLocation ( ) .getStartLine ( )
4456}
4557
4658/**
@@ -53,11 +65,12 @@ private int lineInFile(CppStyleComment c, File f) {
5365 * without line comments would increase the line number without increasing
5466 * the rank and thus force a change of block ID.
5567 */
56- private pragma [ nomagic] int commentLineBlockID ( File f , int line ) {
57- exists ( int lineRank |
58- line = rank [ lineRank ] ( lineInFile ( _, f ) ) and
59- result = line - lineRank
60- )
68+ pragma [ nomagic]
69+ private int commentLineBlockID ( File f , int line ) {
70+ exists ( int lineRank |
71+ line = rank [ lineRank ] ( lineInFile ( _, f ) ) and
72+ result = line - lineRank
73+ )
6174}
6275
6376/**
@@ -67,80 +80,80 @@ private pragma[nomagic] int commentLineBlockID(File f, int line) {
6780 * for separate runs.
6881 */
6982private int commentId ( CppStyleComment c , File f , int line ) {
70- result = commentLineBlockID ( f , line ) and
71- line = lineInFile ( c , f )
83+ result = commentLineBlockID ( f , line ) and
84+ line = lineInFile ( c , f )
7285}
7386
7487/**
7588 * A contiguous block of comments.
7689 */
7790class CommentBlock extends Comment {
78- CommentBlock ( ) {
79- this instanceof CppStyleComment implies
80- not exists ( CppStyleComment pred , File f | lineInFile ( pred , f ) + 1 = lineInFile ( this , f ) )
81- }
91+ CommentBlock ( ) {
92+ this instanceof CppStyleComment
93+ implies
94+ not exists ( CppStyleComment pred , File f | lineInFile ( pred , f ) + 1 = lineInFile ( this , f ) )
95+ }
8296
83- /**
84- * Gets the `i`th comment associated with this comment block.
85- */
86- Comment getComment ( int i ) {
87- ( i = 0 and result = this ) or
88- exists ( File f , int thisLine , int resultLine |
89- commentId ( this , f , thisLine ) = commentId ( result , f , resultLine ) |
90- i = resultLine - thisLine
91- )
92- }
97+ /**
98+ * Gets the `i`th comment associated with this comment block.
99+ */
100+ Comment getComment ( int i ) {
101+ i = 0 and result = this
102+ or
103+ exists ( File f , int thisLine , int resultLine |
104+ commentId ( this , f , thisLine ) = commentId ( result , f , resultLine )
105+ |
106+ i = resultLine - thisLine
107+ )
108+ }
93109
94- Comment lastComment ( ) {
95- result = this .getComment ( max ( int i | exists ( this .getComment ( i ) ) ) )
96- }
110+ Comment lastComment ( ) { result = this .getComment ( max ( int i | exists ( this .getComment ( i ) ) ) ) }
97111
98- string getLine ( int i ) {
99- this instanceof CStyleComment and result = this .getContents ( ) .regexpCapture ( "(?s)/\\*+(.*)\\*+/" , 1 ) .splitAt ( "\n" , i )
100- or
101- this instanceof CppStyleComment and result = this .getComment ( i ) .getContents ( ) .suffix ( 2 )
102- }
112+ string getLine ( int i ) {
113+ this instanceof CStyleComment and
114+ result = this .getContents ( ) .regexpCapture ( "(?s)/\\*+(.*)\\*+/" , 1 ) .splitAt ( "\n" , i )
115+ or
116+ this instanceof CppStyleComment and result = this .getComment ( i ) .getContents ( ) .suffix ( 2 )
117+ }
103118
104- int numLines ( ) {
105- result = strictcount ( int i , string line | line = this .getLine ( i ) and line .trim ( ) != "" )
106- }
119+ int numLines ( ) {
120+ result = strictcount ( int i , string line | line = this .getLine ( i ) and line .trim ( ) != "" )
121+ }
107122
108- int numCodeLines ( ) {
109- result = strictcount ( int i , string line | line = this .getLine ( i ) and looksLikeCode ( line ) )
110- }
123+ int numCodeLines ( ) {
124+ result = strictcount ( int i , string line | line = this .getLine ( i ) and looksLikeCode ( line ) )
125+ }
111126
112- predicate isDocumentation ( ) {
113- // If a C-style comment starts each line with a *, then it's
114- // probably documentation rather than code.
115- this instanceof CStyleComment and
116- forex ( int i | i in [ 1 .. this .numLines ( ) - 1 ]
117- | this .getLine ( i ) .trim ( ) .matches ( "*%" ) )
118- }
127+ predicate isDocumentation ( ) {
128+ // If a C-style comment starts each line with a *, then it's
129+ // probably documentation rather than code.
130+ this instanceof CStyleComment and
131+ forex ( int i | i in [ 1 .. this .numLines ( ) - 1 ] | this .getLine ( i ) .trim ( ) .matches ( "*%" ) )
132+ }
119133
120- predicate isCommentedOutCode ( ) {
121- not this .isDocumentation ( ) and
122- this .numCodeLines ( ) .( float ) / this .numLines ( ) .( float ) > 0.5
123- }
134+ predicate isCommentedOutCode ( ) {
135+ not this .isDocumentation ( ) and
136+ this .numCodeLines ( ) .( float ) / this .numLines ( ) .( float ) > 0.5
137+ }
124138
125- /**
126- * Holds if this element is at the specified location.
127- * The location spans column `startcolumn` of line `startline` to
128- * column `endcolumn` of line `endline` in file `filepath`.
129- * For more information, see
130- * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
131- */
132- predicate hasLocationInfo ( string filepath , int startline , int startcolumn , int endline , int endcolumn ) {
133- this .getLocation ( ) .hasLocationInfo ( filepath , startline , startcolumn , _, _) and
134- this .lastComment ( ) .getLocation ( ) .hasLocationInfo ( _, _, _, endline , endcolumn )
135- }
139+ /**
140+ * Holds if this element is at the specified location.
141+ * The location spans column `startcolumn` of line `startline` to
142+ * column `endcolumn` of line `endline` in file `filepath`.
143+ * For more information, see
144+ * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
145+ */
146+ predicate hasLocationInfo (
147+ string filepath , int startline , int startcolumn , int endline , int endcolumn
148+ ) {
149+ this .getLocation ( ) .hasLocationInfo ( filepath , startline , startcolumn , _, _) and
150+ this .lastComment ( ) .getLocation ( ) .hasLocationInfo ( _, _, _, endline , endcolumn )
151+ }
136152}
137153
138154/**
139155 * A piece of commented-out code, identified using heuristics
140156 */
141157class CommentedOutCode extends CommentBlock {
142- CommentedOutCode ( ) {
143- this .isCommentedOutCode ( )
144- }
158+ CommentedOutCode ( ) { this .isCommentedOutCode ( ) }
145159}
146-
0 commit comments