1919import java .util .LinkedHashSet ;
2020import java .util .List ;
2121import java .util .Set ;
22+ import java .util .concurrent .ExecutorService ;
23+ import java .util .concurrent .Executors ;
24+ import java .util .concurrent .TimeUnit ;
2225import java .util .stream .Stream ;
2326
2427import com .semmle .js .extractor .ExtractorConfig .SourceType ;
159162 * </p>
160163 *
161164 * <p>
162- * Finally, the environment variables <code>LGTM_TRAP_CACHE</code> and
163- * <code>LGTM_TRAP_CACHE_BOUND</code> can optionally be used to specify the location and size
164- * of a trap cache to be used during extraction.
165+ * To customise the actual extraction (as opposed to determining which files to extract),
166+ * the following environment variables are available:
165167 * </p>
168+ * <ul>
169+ * <li><code>LGTM_INDEX_THREADS</code> determines how many threads are used for parallel extraction
170+ * of JavaScript files (TypeScript files cannot currently be extracted in parallel). If left
171+ * unspecified, the extractor uses as many threads as there are cores.</li>
172+ * <li><code>LGTM_TRAP_CACHE</code> and <code>LGTM_TRAP_CACHE_BOUND</code> can be used to specify the
173+ * location and size of a trap cache to be used during extraction.</li>
174+ * </ul>
166175 */
167176public class AutoBuild {
168177 private final ExtractorOutputConfig outputConfig ;
169178 private final ITrapCache trapCache ;
170- private Set <Path > includes = new LinkedHashSet <>();
171- private Set <Path > excludes = new LinkedHashSet <>();
179+ private final Set <Path > includes = new LinkedHashSet <>();
180+ private final Set <Path > excludes = new LinkedHashSet <>();
172181 private ProjectLayout filters ;
173182 private final Path LGTM_SRC , SEMMLE_DIST ;
174183 private final TypeScriptMode typeScriptMode ;
175184 private final String defaultEncoding ;
176- private ExtractorState extractorState ;
177- private long timedLogMessageStart = 0 ;
185+ private ExecutorService threadPool ;
178186
179187 public AutoBuild () {
180188 this .LGTM_SRC = toRealPath (getPathFromEnvVar ("LGTM_SRC" ));
@@ -183,7 +191,6 @@ public AutoBuild() {
183191 this .trapCache = mkTrapCache ();
184192 this .typeScriptMode = getEnumFromEnvVar ("LGTM_INDEX_TYPESCRIPT" , TypeScriptMode .class , TypeScriptMode .BASIC );
185193 this .defaultEncoding = getEnvVar ("LGTM_INDEX_DEFAULT_ENCODING" );
186- this .extractorState = new ExtractorState ();
187194 setupMatchers ();
188195 }
189196
@@ -375,8 +382,36 @@ private boolean addPathPattern(Set<Path> patterns, Path base, String pattern) {
375382 * Perform extraction.
376383 */
377384 public void run () throws IOException {
378- extractExterns ();
379- extractSource ();
385+ startThreadPool ();
386+ try {
387+ extractSource ();
388+ extractExterns ();
389+ } finally {
390+ shutdownThreadPool ();
391+ }
392+ }
393+
394+ private void startThreadPool () {
395+ int defaultNumThreads = Runtime .getRuntime ().availableProcessors ();
396+ int numThreads = Env .systemEnv ().getInt ("LGTM_INDEX_THREADS" , defaultNumThreads );
397+ if (numThreads > 1 ) {
398+ System .out .println ("Parallel extraction with " + numThreads + " threads." );
399+ threadPool = Executors .newFixedThreadPool (numThreads );
400+ } else {
401+ System .out .println ("Single-threaded extraction." );
402+ threadPool = null ;
403+ }
404+ }
405+
406+ private void shutdownThreadPool () {
407+ if (threadPool != null ) {
408+ threadPool .shutdown ();
409+ try {
410+ threadPool .awaitTermination (365 , TimeUnit .DAYS );
411+ } catch (InterruptedException e ) {
412+ Exceptions .ignore (e , "Awaiting termination is not essential." );
413+ }
414+ }
380415 }
381416
382417 /**
@@ -414,12 +449,12 @@ public File lookup(String source, ExtractorConfig config, FileType type) {
414449 }
415450 }
416451
417- FileExtractor extractor = new FileExtractor (config , outputConfig , trapCache , extractorState );
452+ FileExtractor extractor = new FileExtractor (config , outputConfig , trapCache );
418453 FileVisitor <? super Path > visitor = new SimpleFileVisitor <Path >() {
419454 @ Override
420455 public FileVisitResult visitFile (Path file , BasicFileAttributes attrs ) throws IOException {
421456 if (".js" .equals (FileUtil .extension (file .toString ())))
422- extract (extractor , file );
457+ extract (extractor , file , null );
423458 return super .visitFile (file , attrs );
424459 }
425460 };
@@ -436,10 +471,91 @@ private void extractSource() throws IOException {
436471 config = config .withTypeScriptMode (typeScriptMode );
437472 if (defaultEncoding != null )
438473 config = config .withDefaultEncoding (defaultEncoding );
439- FileExtractor extractor = new FileExtractor (config , outputConfig , trapCache , extractorState );
474+ FileExtractor extractor = new FileExtractor (config , outputConfig , trapCache );
475+
476+ Set <Path > filesToExtract = new LinkedHashSet <>();
477+ List <Path > tsconfigFiles = new ArrayList <>();
478+ findFilesToExtract (extractor , filesToExtract , tsconfigFiles );
479+
480+ // extract TypeScript projects and files
481+ Set <Path > extractedFiles = extractTypeScript (extractor , filesToExtract , tsconfigFiles );
482+
483+ // extract remaining files
484+ for (Path f : filesToExtract ) {
485+ if (extractedFiles .add (f )) {
486+ extract (extractor , f , null );
487+ }
488+ }
489+ }
490+
491+ private Set <Path > extractTypeScript (FileExtractor extractor , Set <Path > files , List <Path > tsconfig ) {
492+ Set <Path > extractedFiles = new LinkedHashSet <>();
493+
494+ if (hasTypeScriptFiles (files ) || !tsconfig .isEmpty ()) {
495+ ExtractorState extractorState = new ExtractorState ();
496+ TypeScriptParser tsParser = extractorState .getTypeScriptParser ();
497+ verifyTypeScriptInstallation (extractorState );
498+
499+ // Extract TypeScript projects
500+ for (Path projectPath : tsconfig ) {
501+ File projectFile = projectPath .toFile ();
502+ long start = logBeginProcess ("Opening project " + projectFile );
503+ ParsedProject project = tsParser .openProject (projectFile );
504+ logEndProcess (start , "Done opening project " + projectFile );
505+ // Extract all files belonging to this project which are also matched
506+ // by our include/exclude filters.
507+ List <File > typeScriptFiles = new ArrayList <File >();
508+ for (File sourceFile : project .getSourceFiles ()) {
509+ Path sourcePath = sourceFile .toPath ();
510+ if (!files .contains (normalizePath (sourcePath )))
511+ continue ;
512+ if (!extractedFiles .contains (sourcePath )) {
513+ typeScriptFiles .add (sourcePath .toFile ());
514+ }
515+ }
516+ extractTypeScriptFiles (typeScriptFiles , extractedFiles , extractor , extractorState );
517+ tsParser .closeProject (projectFile );
518+ }
519+
520+ // Extract all the types discovered when extracting the ASTs.
521+ if (!tsconfig .isEmpty ()) {
522+ TypeTable typeTable = tsParser .getTypeTable ();
523+ extractTypeTable (tsconfig .iterator ().next (), typeTable );
524+ }
525+
526+ // Extract remaining TypeScript files.
527+ List <File > remainingTypeScriptFiles = new ArrayList <File >();
528+ for (Path f : files ) {
529+ if (!extractedFiles .contains (f ) && FileType .forFileExtension (f .toFile ()) == FileType .TYPESCRIPT ) {
530+ remainingTypeScriptFiles .add (f .toFile ());
531+ }
532+ }
533+ if (!remainingTypeScriptFiles .isEmpty ()) {
534+ extractTypeScriptFiles (remainingTypeScriptFiles , extractedFiles , extractor , extractorState );
535+ }
536+
537+ // The TypeScript compiler instance is no longer needed.
538+ tsParser .killProcess ();
539+ }
540+
541+ return extractedFiles ;
542+ }
543+
544+ private boolean hasTypeScriptFiles (Set <Path > filesToExtract ) {
545+ for (Path file : filesToExtract ) {
546+ // Check if there are any files with the TypeScript extension.
547+ // Do not use FileType.forFile as it involves I/O for file header checks,
548+ // and files with a bad header have already been excluded.
549+ if (FileType .forFileExtension (file .toFile ()) == FileType .TYPESCRIPT )
550+ return true ;
551+ }
552+ return false ;
553+ }
554+
555+ private void findFilesToExtract (FileExtractor extractor ,
556+ final Set <Path > filesToExtract , final List <Path > tsconfigFiles )
557+ throws IOException {
440558 Path [] currentRoot = new Path [1 ];
441- final Set <Path > filesToExtract = new LinkedHashSet <>();
442- final List <Path > tsconfigFiles = new ArrayList <>();
443559 FileVisitor <? super Path > visitor = new SimpleFileVisitor <Path >() {
444560 private boolean isFileIncluded (Path file ) {
445561 // normalise path for matching
@@ -481,87 +597,23 @@ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) th
481597 currentRoot [0 ] = root ;
482598 Files .walkFileTree (currentRoot [0 ], visitor );
483599 }
484-
485- // If there are any .ts files, verify that TypeScript is installed.
486- TypeScriptParser tsParser = extractorState .getTypeScriptParser ();
487- boolean hasTypeScriptFiles = false ;
488- for (Path file : filesToExtract ) {
489- // Check if there are any files with the TypeScript extension.
490- // Do not use FileType.forFile as it involves I/O for file header checks,
491- // and files with a bad header have already been excluded.
492- if (FileType .forFileExtension (file .toFile ()) == FileType .TYPESCRIPT ) {
493- hasTypeScriptFiles = true ;
494- break ;
495- }
496- }
497- if (hasTypeScriptFiles || !tsconfigFiles .isEmpty ()) {
498- verifyTypeScriptInstallation ();
499- }
500-
501- // Extract TypeScript projects
502- Set <Path > extractedFiles = new LinkedHashSet <>();
503- for (Path projectPath : tsconfigFiles ) {
504- File projectFile = projectPath .toFile ();
505- logBeginProcess ("Opening project " + projectFile );
506- ParsedProject project = tsParser .openProject (projectFile );
507- logEndProcess ();
508- // Extract all files belonging to this project which are also matched
509- // by our include/exclude filters.
510- List <File > typeScriptFiles = new ArrayList <File >();
511- for (File sourceFile : project .getSourceFiles ()) {
512- Path sourcePath = sourceFile .toPath ();
513- if (!filesToExtract .contains (normalizePath (sourcePath )))
514- continue ;
515- if (!extractedFiles .contains (sourcePath )) {
516- typeScriptFiles .add (sourcePath .toFile ());
517- }
518- }
519- extractTypeScriptFiles (typeScriptFiles , extractedFiles , extractor );
520- tsParser .closeProject (projectFile );
521- }
522-
523- if (!tsconfigFiles .isEmpty ()) {
524- // Extract all the types discovered when extracting the ASTs.
525- TypeTable typeTable = tsParser .getTypeTable ();
526- extractTypeTable (tsconfigFiles .iterator ().next (), typeTable );
527- }
528-
529- // Extract remaining TypeScript files.
530- List <File > remainingTypeScriptFiles = new ArrayList <File >();
531- for (Path f : filesToExtract ) {
532- if (!extractedFiles .contains (f ) && FileType .forFileExtension (f .toFile ()) == FileType .TYPESCRIPT ) {
533- remainingTypeScriptFiles .add (f .toFile ());
534- }
535- }
536- if (!remainingTypeScriptFiles .isEmpty ()) {
537- extractTypeScriptFiles (remainingTypeScriptFiles , extractedFiles , extractor );
538- }
539-
540- // The TypeScript compiler instance is no longer needed.
541- tsParser .killProcess ();
542-
543- // Extract non-TypeScript files
544- for (Path f : filesToExtract ) {
545- if (extractedFiles .add (f )) {
546- extract (extractor , f );
547- }
548- }
549600 }
550601
551602 /**
552603 * Verifies that Node.js and the TypeScript compiler are installed and can be
553604 * found.
554605 */
555- public void verifyTypeScriptInstallation () {
606+ public void verifyTypeScriptInstallation (ExtractorState extractorState ) {
556607 extractorState .getTypeScriptParser ().verifyInstallation (true );
557608 }
558609
559- public void extractTypeScriptFiles (List <File > files , Set <Path > extractedFiles , FileExtractor extractor ) throws IOException {
610+ public void extractTypeScriptFiles (List <File > files , Set <Path > extractedFiles ,
611+ FileExtractor extractor , ExtractorState extractorState ) {
560612 extractorState .getTypeScriptParser ().prepareFiles (files );
561613 for (File f : files ) {
562614 Path path = f .toPath ();
563615 extractedFiles .add (path );
564- extract (extractor , f .toPath ());
616+ extract (extractor , f .toPath (), extractorState );
565617 }
566618 }
567619
@@ -596,35 +648,51 @@ private SourceType getSourceType() {
596648 }
597649
598650 /**
599- * Extract a single file.
651+ * Extract a single file using the given extractor and state.
652+ *
653+ * If the state is {@code null}, the extraction job will be submitted to the {@link #threadPool},
654+ * otherwise extraction will happen on the main thread.
600655 */
601- protected void extract (FileExtractor extractor , Path file ) throws IOException {
656+ protected void extract (FileExtractor extractor , Path file , ExtractorState state ) {
657+ if (state == null && threadPool != null )
658+ threadPool .submit (() -> doExtract (extractor , file , state ));
659+ else
660+ doExtract (extractor , file , state );
661+ }
662+
663+ private void doExtract (FileExtractor extractor , Path file , ExtractorState state ) {
602664 File f = file .toFile ();
603665 if (!f .exists ()) {
604666 warn ("Skipping " + file + ", which does not exist." );
605667 return ;
606668 }
607669
608- logBeginProcess ("Extracting " + file );
609- extractor .extract (f );
610- logEndProcess ();
670+ try {
671+ long start = logBeginProcess ("Extracting " + file );
672+ extractor .extract (f , state );
673+ logEndProcess (start , "Done extracting " + file );
674+ } catch (Throwable t ) {
675+ System .err .println ("Exception while extracting " + file + "." );
676+ t .printStackTrace (System .err );
677+ System .exit (1 );
678+ }
611679 }
612680
613681 private void warn (String msg ) {
614682 System .err .println (msg );
615683 System .err .flush ();
616684 }
617685
618- private void logBeginProcess (String message ) {
619- System .out .print (message + "..." );
620- System .out .flush ();
621- this .timedLogMessageStart = System .nanoTime ();
686+ private long logBeginProcess (String message ) {
687+ System .out .println (message );
688+ return System .nanoTime ();
622689 }
623690
624- private void logEndProcess () {
691+ private void logEndProcess (long timedLogMessageStart , String message ) {
625692 long end = System .nanoTime ();
626- int milliseconds = (int ) ((end - this .timedLogMessageStart ) / 1000000 );
627- System .out .println (" done (" + milliseconds + " ms)" );
693+ int milliseconds = (int ) ((end - timedLogMessageStart ) / 1_000_000 );
694+ System .out .println (message + " (" + milliseconds + " ms)" );
695+ System .out .flush ();
628696 }
629697
630698 public static void main (String [] args ) {
0 commit comments