@@ -2,24 +2,17 @@ import cpp
22
33/**
44 * Holds if `line` looks like a line of code.
5- * Matches comment lines ending with '{', '}' or ';' that do not start with '>' or contain '@{' or '@}', but first filters out:
6- * * HTML entities in common notation (e.g. > and é)
7- * * HTML entities in decimal notation (e.g. à)
8- * * HTML entities in hexadecimal notation (e.g. 灟)
9- * To account for the code generated by protobuf, we also insist that the comment
10- * does not begin with `optional` or `repeated` and end with a `;`, which would
11- * normally be a quoted bit of literal `.proto` specification above the associated
12- * declaration.
13- * To account for emacs folding markers, we ignore any line containing
14- * `{{{` or `}}}`.
15- *
16- * Finally, some code tends to embed GUIDs in comments, so we also exclude those.
175 */
186bindingset [ line]
197private predicate looksLikeCode ( string line ) {
208 exists ( string trimmed |
9+ // trim leading and trailing whitespace, and HTML codes:
10+ // * HTML entities in common notation (e.g. > and é)
11+ // * HTML entities in decimal notation (e.g. à)
12+ // * HTML entities in hexadecimal notation (e.g. 灟)
2113 trimmed = line .regexpReplaceAll ( "(?i)(^\\s+|&#?[a-z0-9]{1,31};|\\s+$)" , "" )
2214 |
15+ // Match comment lines ending with '{', '}' or ';'
2316 trimmed .regexpMatch ( ".*[{};]" ) and
2417 (
2518 // If this line looks like code because it ends with a closing
@@ -32,9 +25,18 @@ private predicate looksLikeCode(string line) {
3225 // benign use of braces such as a JSON example or explanatory
3326 // pseudocode.
3427 trimmed .regexpMatch ( ".*(\\)|const|volatile|override|final|noexcept|&)\\s*\\{.*" )
35- ) and
36- not trimmed
28+ ) and (
29+ // Exclude lines that start with '>' or contain '@{' or '@}'.
30+ // To account for the code generated by protobuf, we also insist that the comment
31+ // does not begin with `optional` or `repeated` and end with a `;`, which would
32+ // normally be a quoted bit of literal `.proto` specification above the associated
33+ // declaration.
34+ // To account for emacs folding markers, we ignore any line containing
35+ // `{{{` or `}}}`.
36+ // Finally, some code tends to embed GUIDs in comments, so we also exclude those.
37+ not trimmed
3738 .regexpMatch ( "(>.*|.*[\\\\@][{}].*|(optional|repeated) .*;|.*(\\{\\{\\{|\\}\\}\\}).*|\\{[-0-9a-zA-Z]+\\})" )
39+ )
3840 )
3941}
4042
0 commit comments