@@ -3,11 +3,45 @@ import semmle.code.cpp.File
33import semmle.code.cpp.Preprocessor
44
55/**
6- * Holds if `c` is a comment which is usually seen in autogenerated files.
7- * For example, comments containing 'autogenerated' or ' generated by' .
6+ * Holds if comment `c` indicates that it might be in an auto-generated file, for
7+ * example because it contains the text "auto- generated by" .
88 */
9- predicate isAutogeneratedComment ( Comment c ) {
10- c .getContents ( ) .regexpMatch ( "(?si).*(?:auto[ -]?generated|generated (?:by|file)|changes made in this file will be lost).*" )
9+ private bindingset [ comment] predicate autogeneratedComment ( string comment ) {
10+ // ?s = include newlines in anything (`.`)
11+ // ?i = ignore case
12+ exists ( string cond |
13+ cond =
14+ // generated by (not mid-sentence)
15+ "(^ generated by[^a-z])|" +
16+ "(! generated by[^a-z])|" +
17+
18+ // generated file
19+ "(generated file)|" +
20+
21+ // file [is/was/has been] generated
22+ "(file( is| was| has been)? generated)|" +
23+
24+ // changes made in this file will be lost
25+ "(changes made in this file will be lost)|" +
26+
27+ // do not edit/modify
28+ "(^ do(n't|nt| not) (hand-?)?(edit|modify))|" +
29+ "(! do(n't|nt| not) (hand-?)?(edit|modify))" and
30+
31+ comment .regexpMatch ( "(?si).*(" +
32+ // replace `generated` with a regexp that also catches things like
33+ // `auto-generated`.
34+ cond .replaceAll ( "generated" , "(auto[\\w-]*[\\s/\\*\\r\\n]*)?generated" )
35+
36+ // replace `!` with a regexp for end-of-sentence / separator characters.
37+ .replaceAll ( "!" , "[\\.\\?\\!\\-\\;\\,]" )
38+
39+ // replace ` ` with a regexp for one or more whitespace characters
40+ // (including newlines and `/*`).
41+ .replaceAll ( " " , "[\\s/\\*\\r\\n]+" ) +
42+ ").*"
43+ )
44+ )
1145}
1246
1347/**
@@ -25,6 +59,48 @@ predicate hasPragmaDifferentFile(File f) {
2559 )
2660}
2761
62+ /**
63+ * The line where the first comment in file `f` begins (maximum of 5). This allows
64+ * us to skip past any preprocessor logic or similar code before the first comment.
65+ */
66+ private int fileFirstComment ( File f ) {
67+ result = min ( int line |
68+ exists ( Comment c |
69+ c .getFile ( ) = f and
70+ c .getLocation ( ) .getStartLine ( ) = line and
71+ line < 5
72+ )
73+ ) .minimum ( 5 )
74+ }
75+
76+ /**
77+ * The line where the initial comments of file `f` end. This is just before the
78+ * first bit of code, excluding anything skipped over by `fileFirstComment`.
79+ */
80+ private int fileHeaderLimit ( File f ) {
81+ exists ( int fc |
82+ fc = fileFirstComment ( f ) and
83+ result = min ( int line |
84+ exists ( DeclarationEntry de , Location l |
85+ l = de .getLocation ( ) and
86+ l .getFile ( ) = f and
87+ line = l .getStartLine ( ) - 1 and
88+ line > fc
89+ ) or exists ( PreprocessorDirective pd , Location l |
90+ l = pd .getLocation ( ) and
91+ l .getFile ( ) = f and
92+ line = l .getStartLine ( ) - 1 and
93+ line > fc
94+ ) or exists ( NamespaceDeclarationEntry nde , Location l |
95+ l = nde .getLocation ( ) and
96+ l .getFile ( ) = f and
97+ line = l .getStartLine ( ) - 1 and
98+ line > fc
99+ ) or line = f .getMetrics ( ) .getNumberOfLines ( )
100+ )
101+ )
102+ }
103+
28104/**
29105 * Holds if the file is probably an autogenerated file.
30106 *
@@ -36,12 +112,13 @@ predicate hasPragmaDifferentFile(File f) {
36112 */
37113class AutogeneratedFile extends File {
38114 cached AutogeneratedFile ( ) {
39- exists ( int limit , int head |
40- head <= 5 and
41- limit = max ( int line | locations_default ( _, underlyingElement ( this ) , head , _, line , _) ) + 5
42- |
43- exists ( Comment c | c .getFile ( ) = this and c .getLocation ( ) .getStartLine ( ) <= limit and isAutogeneratedComment ( c ) )
44- )
45- or hasPragmaDifferentFile ( this )
115+ autogeneratedComment (
116+ strictconcat ( Comment c |
117+ c .getFile ( ) = this and
118+ c .getLocation ( ) .getStartLine ( ) <= fileHeaderLimit ( this ) |
119+ c .getContents ( ) order by c .getLocation ( ) .getStartLine ( )
120+ )
121+ ) or
122+ hasPragmaDifferentFile ( this )
46123 }
47124}
0 commit comments