Skip to content

Commit dd84b60

Browse files
authored
Merge pull request #794 from xiemaisi/js/parallel-extraction
Approved by asger-semmle, esben-semmle
2 parents 261cd36 + 740acc1 commit dd84b60

File tree

6 files changed

+212
-139
lines changed

6 files changed

+212
-139
lines changed

change-notes/1.20/extractor-javascript.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
## Changes to code extraction
2020

21+
* Extraction of JavaScript files (but not TypeScript files) on LGTM is now parallelized. By default, the extractor uses as many threads as there are processors, but this can be overridden by setting the `LGTM_INDEX_THREADS` environment variable. In particular, setting `LGTM_INDEX_THREADS` to 1 disables parallel extraction.
2122
* The extractor now supports additional [Flow](https://flow.org/) syntax.
2223
* The extractor now supports [Nullish Coalescing](https://github.com/tc39/proposal-nullish-coalescing) expressions.
2324
* The extractor now supports [TypeScript 3.2](https://www.typescriptlang.org/docs/handbook/release-notes/typescript-3-2.html).

javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java

Lines changed: 163 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
import java.util.LinkedHashSet;
2020
import java.util.List;
2121
import java.util.Set;
22+
import java.util.concurrent.ExecutorService;
23+
import java.util.concurrent.Executors;
24+
import java.util.concurrent.TimeUnit;
2225
import java.util.stream.Stream;
2326

2427
import com.semmle.js.extractor.ExtractorConfig.SourceType;
@@ -159,22 +162,27 @@
159162
* </p>
160163
*
161164
* <p>
162-
* Finally, the environment variables <code>LGTM_TRAP_CACHE</code> and
163-
* <code>LGTM_TRAP_CACHE_BOUND</code> can optionally be used to specify the location and size
164-
* of a trap cache to be used during extraction.
165+
* To customise the actual extraction (as opposed to determining which files to extract),
166+
* the following environment variables are available:
165167
* </p>
168+
* <ul>
169+
* <li><code>LGTM_INDEX_THREADS</code> determines how many threads are used for parallel extraction
170+
* of JavaScript files (TypeScript files cannot currently be extracted in parallel). If left
171+
* unspecified, the extractor uses as many threads as there are cores.</li>
172+
* <li><code>LGTM_TRAP_CACHE</code> and <code>LGTM_TRAP_CACHE_BOUND</code> can be used to specify the
173+
* location and size of a trap cache to be used during extraction.</li>
174+
* </ul>
166175
*/
167176
public class AutoBuild {
168177
private final ExtractorOutputConfig outputConfig;
169178
private final ITrapCache trapCache;
170-
private Set<Path> includes = new LinkedHashSet<>();
171-
private Set<Path> excludes = new LinkedHashSet<>();
179+
private final Set<Path> includes = new LinkedHashSet<>();
180+
private final Set<Path> excludes = new LinkedHashSet<>();
172181
private ProjectLayout filters;
173182
private final Path LGTM_SRC, SEMMLE_DIST;
174183
private final TypeScriptMode typeScriptMode;
175184
private final String defaultEncoding;
176-
private ExtractorState extractorState;
177-
private long timedLogMessageStart = 0;
185+
private ExecutorService threadPool;
178186

179187
public AutoBuild() {
180188
this.LGTM_SRC = toRealPath(getPathFromEnvVar("LGTM_SRC"));
@@ -183,7 +191,6 @@ public AutoBuild() {
183191
this.trapCache = mkTrapCache();
184192
this.typeScriptMode = getEnumFromEnvVar("LGTM_INDEX_TYPESCRIPT", TypeScriptMode.class, TypeScriptMode.BASIC);
185193
this.defaultEncoding = getEnvVar("LGTM_INDEX_DEFAULT_ENCODING");
186-
this.extractorState = new ExtractorState();
187194
setupMatchers();
188195
}
189196

@@ -375,8 +382,36 @@ private boolean addPathPattern(Set<Path> patterns, Path base, String pattern) {
375382
* Perform extraction.
376383
*/
377384
public void run() throws IOException {
378-
extractExterns();
379-
extractSource();
385+
startThreadPool();
386+
try {
387+
extractSource();
388+
extractExterns();
389+
} finally {
390+
shutdownThreadPool();
391+
}
392+
}
393+
394+
private void startThreadPool() {
395+
int defaultNumThreads = Runtime.getRuntime().availableProcessors();
396+
int numThreads = Env.systemEnv().getInt("LGTM_INDEX_THREADS", defaultNumThreads);
397+
if (numThreads > 1) {
398+
System.out.println("Parallel extraction with " + numThreads + " threads.");
399+
threadPool = Executors.newFixedThreadPool(numThreads);
400+
} else {
401+
System.out.println("Single-threaded extraction.");
402+
threadPool = null;
403+
}
404+
}
405+
406+
private void shutdownThreadPool() {
407+
if (threadPool != null) {
408+
threadPool.shutdown();
409+
try {
410+
threadPool.awaitTermination(365, TimeUnit.DAYS);
411+
} catch (InterruptedException e) {
412+
Exceptions.ignore(e, "Awaiting termination is not essential.");
413+
}
414+
}
380415
}
381416

382417
/**
@@ -414,12 +449,12 @@ public File lookup(String source, ExtractorConfig config, FileType type) {
414449
}
415450
}
416451

417-
FileExtractor extractor = new FileExtractor(config, outputConfig, trapCache, extractorState);
452+
FileExtractor extractor = new FileExtractor(config, outputConfig, trapCache);
418453
FileVisitor<? super Path> visitor = new SimpleFileVisitor<Path>() {
419454
@Override
420455
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
421456
if (".js".equals(FileUtil.extension(file.toString())))
422-
extract(extractor, file);
457+
extract(extractor, file, null);
423458
return super.visitFile(file, attrs);
424459
}
425460
};
@@ -436,10 +471,91 @@ private void extractSource() throws IOException {
436471
config = config.withTypeScriptMode(typeScriptMode);
437472
if (defaultEncoding != null)
438473
config = config.withDefaultEncoding(defaultEncoding);
439-
FileExtractor extractor = new FileExtractor(config, outputConfig, trapCache, extractorState);
474+
FileExtractor extractor = new FileExtractor(config, outputConfig, trapCache);
475+
476+
Set<Path> filesToExtract = new LinkedHashSet<>();
477+
List<Path> tsconfigFiles = new ArrayList<>();
478+
findFilesToExtract(extractor, filesToExtract, tsconfigFiles);
479+
480+
// extract TypeScript projects and files
481+
Set<Path> extractedFiles = extractTypeScript(extractor, filesToExtract, tsconfigFiles);
482+
483+
// extract remaining files
484+
for (Path f : filesToExtract) {
485+
if (extractedFiles.add(f)) {
486+
extract(extractor, f, null);
487+
}
488+
}
489+
}
490+
491+
private Set<Path> extractTypeScript(FileExtractor extractor, Set<Path> files, List<Path> tsconfig) {
492+
Set<Path> extractedFiles = new LinkedHashSet<>();
493+
494+
if (hasTypeScriptFiles(files) || !tsconfig.isEmpty()) {
495+
ExtractorState extractorState = new ExtractorState();
496+
TypeScriptParser tsParser = extractorState.getTypeScriptParser();
497+
verifyTypeScriptInstallation(extractorState);
498+
499+
// Extract TypeScript projects
500+
for (Path projectPath : tsconfig) {
501+
File projectFile = projectPath.toFile();
502+
long start = logBeginProcess("Opening project " + projectFile);
503+
ParsedProject project = tsParser.openProject(projectFile);
504+
logEndProcess(start, "Done opening project " + projectFile);
505+
// Extract all files belonging to this project which are also matched
506+
// by our include/exclude filters.
507+
List<File> typeScriptFiles = new ArrayList<File>();
508+
for (File sourceFile : project.getSourceFiles()) {
509+
Path sourcePath = sourceFile.toPath();
510+
if (!files.contains(normalizePath(sourcePath)))
511+
continue;
512+
if (!extractedFiles.contains(sourcePath)) {
513+
typeScriptFiles.add(sourcePath.toFile());
514+
}
515+
}
516+
extractTypeScriptFiles(typeScriptFiles, extractedFiles, extractor, extractorState);
517+
tsParser.closeProject(projectFile);
518+
}
519+
520+
// Extract all the types discovered when extracting the ASTs.
521+
if (!tsconfig.isEmpty()) {
522+
TypeTable typeTable = tsParser.getTypeTable();
523+
extractTypeTable(tsconfig.iterator().next(), typeTable);
524+
}
525+
526+
// Extract remaining TypeScript files.
527+
List<File> remainingTypeScriptFiles = new ArrayList<File>();
528+
for (Path f : files) {
529+
if (!extractedFiles.contains(f) && FileType.forFileExtension(f.toFile()) == FileType.TYPESCRIPT) {
530+
remainingTypeScriptFiles.add(f.toFile());
531+
}
532+
}
533+
if (!remainingTypeScriptFiles.isEmpty()) {
534+
extractTypeScriptFiles(remainingTypeScriptFiles, extractedFiles, extractor, extractorState);
535+
}
536+
537+
// The TypeScript compiler instance is no longer needed.
538+
tsParser.killProcess();
539+
}
540+
541+
return extractedFiles;
542+
}
543+
544+
private boolean hasTypeScriptFiles(Set<Path> filesToExtract) {
545+
for (Path file : filesToExtract) {
546+
// Check if there are any files with the TypeScript extension.
547+
// Do not use FileType.forFile as it involves I/O for file header checks,
548+
// and files with a bad header have already been excluded.
549+
if (FileType.forFileExtension(file.toFile()) == FileType.TYPESCRIPT)
550+
return true;
551+
}
552+
return false;
553+
}
554+
555+
private void findFilesToExtract(FileExtractor extractor,
556+
final Set<Path> filesToExtract, final List<Path> tsconfigFiles)
557+
throws IOException {
440558
Path[] currentRoot = new Path[1];
441-
final Set<Path> filesToExtract = new LinkedHashSet<>();
442-
final List<Path> tsconfigFiles = new ArrayList<>();
443559
FileVisitor<? super Path> visitor = new SimpleFileVisitor<Path>() {
444560
private boolean isFileIncluded(Path file) {
445561
// normalise path for matching
@@ -481,87 +597,23 @@ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) th
481597
currentRoot[0] = root;
482598
Files.walkFileTree(currentRoot[0], visitor);
483599
}
484-
485-
// If there are any .ts files, verify that TypeScript is installed.
486-
TypeScriptParser tsParser = extractorState.getTypeScriptParser();
487-
boolean hasTypeScriptFiles = false;
488-
for (Path file : filesToExtract) {
489-
// Check if there are any files with the TypeScript extension.
490-
// Do not use FileType.forFile as it involves I/O for file header checks,
491-
// and files with a bad header have already been excluded.
492-
if (FileType.forFileExtension(file.toFile()) == FileType.TYPESCRIPT) {
493-
hasTypeScriptFiles = true;
494-
break;
495-
}
496-
}
497-
if (hasTypeScriptFiles || !tsconfigFiles.isEmpty()) {
498-
verifyTypeScriptInstallation();
499-
}
500-
501-
// Extract TypeScript projects
502-
Set<Path> extractedFiles = new LinkedHashSet<>();
503-
for (Path projectPath : tsconfigFiles) {
504-
File projectFile = projectPath.toFile();
505-
logBeginProcess("Opening project " + projectFile);
506-
ParsedProject project = tsParser.openProject(projectFile);
507-
logEndProcess();
508-
// Extract all files belonging to this project which are also matched
509-
// by our include/exclude filters.
510-
List<File> typeScriptFiles = new ArrayList<File>();
511-
for (File sourceFile : project.getSourceFiles()) {
512-
Path sourcePath = sourceFile.toPath();
513-
if (!filesToExtract.contains(normalizePath(sourcePath)))
514-
continue;
515-
if (!extractedFiles.contains(sourcePath)) {
516-
typeScriptFiles.add(sourcePath.toFile());
517-
}
518-
}
519-
extractTypeScriptFiles(typeScriptFiles, extractedFiles, extractor);
520-
tsParser.closeProject(projectFile);
521-
}
522-
523-
if (!tsconfigFiles.isEmpty()) {
524-
// Extract all the types discovered when extracting the ASTs.
525-
TypeTable typeTable = tsParser.getTypeTable();
526-
extractTypeTable(tsconfigFiles.iterator().next(), typeTable);
527-
}
528-
529-
// Extract remaining TypeScript files.
530-
List<File> remainingTypeScriptFiles = new ArrayList<File>();
531-
for (Path f : filesToExtract) {
532-
if (!extractedFiles.contains(f) && FileType.forFileExtension(f.toFile()) == FileType.TYPESCRIPT) {
533-
remainingTypeScriptFiles.add(f.toFile());
534-
}
535-
}
536-
if (!remainingTypeScriptFiles.isEmpty()) {
537-
extractTypeScriptFiles(remainingTypeScriptFiles, extractedFiles, extractor);
538-
}
539-
540-
// The TypeScript compiler instance is no longer needed.
541-
tsParser.killProcess();
542-
543-
// Extract non-TypeScript files
544-
for (Path f : filesToExtract) {
545-
if (extractedFiles.add(f)) {
546-
extract(extractor, f);
547-
}
548-
}
549600
}
550601

551602
/**
552603
* Verifies that Node.js and the TypeScript compiler are installed and can be
553604
* found.
554605
*/
555-
public void verifyTypeScriptInstallation() {
606+
public void verifyTypeScriptInstallation(ExtractorState extractorState) {
556607
extractorState.getTypeScriptParser().verifyInstallation(true);
557608
}
558609

559-
public void extractTypeScriptFiles(List<File> files, Set<Path> extractedFiles, FileExtractor extractor) throws IOException {
610+
public void extractTypeScriptFiles(List<File> files, Set<Path> extractedFiles,
611+
FileExtractor extractor, ExtractorState extractorState) {
560612
extractorState.getTypeScriptParser().prepareFiles(files);
561613
for (File f : files) {
562614
Path path = f.toPath();
563615
extractedFiles.add(path);
564-
extract(extractor, f.toPath());
616+
extract(extractor, f.toPath(), extractorState);
565617
}
566618
}
567619

@@ -596,35 +648,51 @@ private SourceType getSourceType() {
596648
}
597649

598650
/**
599-
* Extract a single file.
651+
* Extract a single file using the given extractor and state.
652+
*
653+
* If the state is {@code null}, the extraction job will be submitted to the {@link #threadPool},
654+
* otherwise extraction will happen on the main thread.
600655
*/
601-
protected void extract(FileExtractor extractor, Path file) throws IOException {
656+
protected void extract(FileExtractor extractor, Path file, ExtractorState state) {
657+
if (state == null && threadPool != null)
658+
threadPool.submit(() -> doExtract(extractor, file, state));
659+
else
660+
doExtract(extractor, file, state);
661+
}
662+
663+
private void doExtract(FileExtractor extractor, Path file, ExtractorState state) {
602664
File f = file.toFile();
603665
if (!f.exists()) {
604666
warn("Skipping " + file + ", which does not exist.");
605667
return;
606668
}
607669

608-
logBeginProcess("Extracting " + file);
609-
extractor.extract(f);
610-
logEndProcess();
670+
try {
671+
long start = logBeginProcess("Extracting " + file);
672+
extractor.extract(f, state);
673+
logEndProcess(start, "Done extracting " + file);
674+
} catch (Throwable t) {
675+
System.err.println("Exception while extracting " + file + ".");
676+
t.printStackTrace(System.err);
677+
System.exit(1);
678+
}
611679
}
612680

613681
private void warn(String msg) {
614682
System.err.println(msg);
615683
System.err.flush();
616684
}
617685

618-
private void logBeginProcess(String message) {
619-
System.out.print(message + "...");
620-
System.out.flush();
621-
this.timedLogMessageStart = System.nanoTime();
686+
private long logBeginProcess(String message) {
687+
System.out.println(message);
688+
return System.nanoTime();
622689
}
623690

624-
private void logEndProcess() {
691+
private void logEndProcess(long timedLogMessageStart, String message) {
625692
long end = System.nanoTime();
626-
int milliseconds = (int) ((end - this.timedLogMessageStart) / 1000000);
627-
System.out.println(" done (" + milliseconds + " ms)");
693+
int milliseconds = (int) ((end - timedLogMessageStart) / 1_000_000);
694+
System.out.println(message + " (" + milliseconds + " ms)");
695+
System.out.flush();
628696
}
629697

630698
public static void main(String[] args) {

0 commit comments

Comments
 (0)