github
diff --git a/‎javascript/extractor/src/com/semmle/js/extractor/ASTExtractor.java‎
Lines changed: 7 additions & 0 deletions b/‎javascript/extractor/src/com/semmle/js/extractor/ASTExtractor.java‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎javascript/extractor/src/com/semmle/js/extractor/CFGExtractor.java‎
Lines changed: 5 additions & 0 deletions b/‎javascript/extractor/src/com/semmle/js/extractor/CFGExtractor.java‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎javascript/extractor/src/com/semmle/js/extractor/ExtractionMetrics.java‎
Lines changed: 191 additions & 0 deletions b/‎javascript/extractor/src/com/semmle/js/extractor/ExtractionMetrics.java‎
Lines changed: 191 additions & 0 deletions
diff --git a/‎javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java‎
Lines changed: 26 additions & 7 deletions b/‎javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java‎
Lines changed: 26 additions & 7 deletions
diff --git a/‎javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java‎
Lines changed: 6 additions & 1 deletion b/‎javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java‎
Lines changed: 6 additions & 1 deletion
@@ -98,6 +98,7 @@
 import com.semmle.js.ast.jsx.JSXNamespacedName;
 import com.semmle.js.ast.jsx.JSXOpeningElement;
 import com.semmle.js.ast.jsx.JSXSpreadAttribute;
+import com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase;
 import com.semmle.js.extractor.ExtractorConfig.Platform;
 import com.semmle.js.extractor.ExtractorConfig.SourceType;
 import com.semmle.js.extractor.ScopeManager.DeclKind;
@@ -192,6 +193,10 @@ public ScopeManager getScopeManager() {
     return scopeManager;
   }
 
+  public ExtractionMetrics getMetrics() {
+    return lexicalExtractor.getMetrics();
+  }
+
   /**
    * The binding semantics for an identifier.
    *
@@ -1945,9 +1950,11 @@ public Label visit(XMLDotDotExpression nd, Context c) {
   }
 
   public void extract(Node root, Platform platform, SourceType sourceType, int toplevelKind) {
+    lexicalExtractor.getMetrics().startPhase(ExtractionPhase.ASTExtractor_extract);
     trapwriter.addTuple("toplevels", toplevelLabel, toplevelKind);
     locationManager.emitNodeLocation(root, toplevelLabel);
 
     root.accept(new V(platform, sourceType), null);
+    lexicalExtractor.getMetrics().stopPhase(ExtractionPhase.ASTExtractor_extract);
   }
 }
@@ -93,6 +93,7 @@
 import com.semmle.js.ast.jsx.JSXNamespacedName;
 import com.semmle.js.ast.jsx.JSXOpeningElement;
 import com.semmle.js.ast.jsx.JSXSpreadAttribute;
+import com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase;
 import com.semmle.ts.ast.DecoratorList;
 import com.semmle.ts.ast.EnumDeclaration;
 import com.semmle.ts.ast.EnumMember;
@@ -171,11 +172,13 @@ public class CFGExtractor {
   private final TrapWriter trapwriter;
   private final Label toplevelLabel;
   private final LocationManager locationManager;
+  private final ExtractionMetrics metrics;
 
   public CFGExtractor(ASTExtractor astExtractor) {
     this.trapwriter = astExtractor.getTrapwriter();
     this.toplevelLabel = astExtractor.getToplevelLabel();
     this.locationManager = astExtractor.getLocationManager();
+    this.metrics = astExtractor.getMetrics();
   }
 
   @SuppressWarnings("unchecked")
@@ -1955,6 +1958,8 @@ public Void visit(XMLDotDotExpression nd, SuccessorInfo c) {
   }
 
   public void extract(Node nd) {
+    metrics.startPhase(ExtractionPhase.CFGExtractor_extract);
     nd.accept(new V(), new SimpleSuccessorInfo(null));
+    metrics.stopPhase(ExtractionPhase.CFGExtractor_extract);
   }
 }
@@ -0,0 +1,191 @@
+package com.semmle.js.extractor;
+
+import com.semmle.util.exception.Exceptions;
+import com.semmle.util.files.FileUtil;
+import com.semmle.util.trap.TrapWriter;
+import com.semmle.util.trap.TrapWriter.Label;
+import com.semmle.util.trap.pathtransformers.PathTransformer;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
+import java.lang.management.ManagementFactory;
+import java.lang.management.ThreadMXBean;
+import java.nio.charset.Charset;
+import java.util.Stack;
+import java.util.zip.GZIPOutputStream;
+
+/** Metrics for the (single-threaded) extraction of a single file. */
+public class ExtractionMetrics {
+  /**
+   * The phase of the extraction that should be measured time for.
+   *
+   * <p>Convention: the enum names have the format <code>{ClassName}_{MethodName}</code>, and should
+   * identify the methods they correspond to.
+   */
+  public enum ExtractionPhase {
+    ASTExtractor_extract(0),
+    CFGExtractor_extract(1),
+    FileExtractor_extractContents(2),
+    JSExtractor_extract(3),
+    JSParser_parse(4),
+    LexicalExtractor_extractLines(5),
+    LexicalExtractor_extractTokens(6),
+    TypeScriptASTConverter_convertAST(7),
+    TypeScriptParser_talkToParserWrapper(8);
+
+    /** The id used in the database for the time spent performing this phase of the extraction. */
+    final int dbschemeId;
+
+    ExtractionPhase(int dbschemeId) {
+      this.dbschemeId = dbschemeId;
+    }
+  }
+
+  /** The cache file, if any. */
+  private File cacheFile;
+
+  /** True iff the extraction of this file reuses an existing trap cache file. */
+  private boolean canReuseCacheFile;
+
+  /** The cumulative CPU-time spent in each extraction phase so far. */
+  private final long[] cpuTimes = new long[ExtractionPhase.values().length];
+
+  /** The label for the file that is being extracted. */
+  private Label fileLabel;
+
+  /** The number of characters in the file that is being extracted. */
+  private int length;
+
+  /** The previous time a CPU-time measure was performed. */
+  private long previousCpuTime;
+
+  /** The previous time a wallclock-time measure was performed. */
+  private long previousWallclockTime;
+
+  /** The extraction phase stack. */
+  private final Stack<ExtractionPhase> stack = new Stack<>();
+
+  /** The current thread, used for measuring CPU-time. */
+  private final ThreadMXBean thread = ManagementFactory.getThreadMXBean();
+
+  /** The cumulative wallclock-time spent in each extraction phase so far. */
+  private final long[] wallclockTimes = new long[ExtractionPhase.values().length];
+
+  /**
+   * True iff extraction metrics could not be obtained for this file (due to an unforeseen error
+   * that should not prevent the ordinary extraction from succeeding).
+   */
+  private boolean timingsFailed;
+
+  /**
+   * Appends these metrics to a trap file. Note that this makes the resulting trap file content
+   * non-deterministic.
+   */
+  public void appendToTrapFile(File trapFileToAppendTo) {
+    if (trapFileToAppendTo == null) {
+      return;
+    }
+
+    BufferedWriter out = null;
+    FileOutputStream fos = null;
+    GZIPOutputStream gzip = null;
+    TrapWriter trapwriter = null;
+    try {
+      fos = new FileOutputStream(trapFileToAppendTo, true);
+      gzip = new GZIPOutputStream(fos);
+      out = new BufferedWriter(new OutputStreamWriter(gzip, Charset.forName("UTF-8")));
+
+      trapwriter = new TrapWriter(out, PathTransformer.std());
+      trapwriter.addTuple(
+          "extraction_data",
+          fileLabel,
+          cacheFile != null ? cacheFile.getAbsolutePath() : "",
+          canReuseCacheFile,
+          length);
+
+      if (!stack.isEmpty()) {
+        failTimings(
+            String.format(
+                "Could not properly record extraction times for %s. (stack = %s)%n",
+                fileLabel, stack.toString()));
+      }
+      if (!timingsFailed) {
+        for (int i = 0; i < ExtractionPhase.values().length; i++) {
+          trapwriter.addTuple("extraction_time", fileLabel, i, 0, (float) cpuTimes[i]);
+          trapwriter.addTuple("extraction_time", fileLabel, i, 1, (float) wallclockTimes[i]);
+        }
+      }
+      FileUtil.close(trapwriter);
+    } catch (Exception e) {
+      FileUtil.close(fos);
+      FileUtil.close(gzip);
+      FileUtil.close(out);
+      FileUtil.close(trapwriter);
+      Exceptions.ignore(e, "Ignoring exception for extraction metrics writing");
+    }
+  }
+
+  private void failTimings(String msg) {
+    System.err.printf(msg);
+    System.err.flush();
+    this.timingsFailed = true;
+  }
+
+  private void incrementCurrentTimer() {
+    long nowWallclock = System.nanoTime();
+    long nowCpu = thread.getCurrentThreadCpuTime();
+
+    if (!stack.isEmpty()) {
+      // increment by the time elapsed
+      wallclockTimes[stack.peek().dbschemeId] += nowWallclock - previousWallclockTime;
+      cpuTimes[stack.peek().dbschemeId] += nowCpu - previousCpuTime;
+    }
+
+    // update the running clock
+    previousWallclockTime = nowWallclock;
+    previousCpuTime = nowCpu;
+  }
+
+  public void setCacheFile(File cacheFile) {
+    this.cacheFile = cacheFile;
+  }
+
+  public void setCanReuseCacheFile(boolean canReuseCacheFile) {
+    this.canReuseCacheFile = canReuseCacheFile;
+  }
+
+  public void setFileLabel(Label fileLabel) {
+    this.fileLabel = fileLabel;
+  }
+
+  public void setLength(int length) {
+    this.length = length;
+  }
+
+  public void startPhase(ExtractionPhase event) {
+    incrementCurrentTimer();
+    stack.push(event);
+  }
+
+  public void stopPhase(
+      ExtractionPhase
+          event /* technically not needed, but useful for documentation and sanity checking */) {
+    if (stack.isEmpty()) {
+      failTimings(
+          String.format(
+              "Inconsistent extraction time recording: trying to stop timer %s, but no timer is running",
+              event));
+      return;
+    }
+    if (stack.peek() != event) {
+      failTimings(
+          String.format(
+              "Inconsistent extraction time recording: trying to stop timer %s, but current timer is: %s",
+              event, stack.peek()));
+      return;
+    }
+    incrementCurrentTimer();
+    stack.pop();
+  }
+}
@@ -1,5 +1,6 @@
 package com.semmle.js.extractor;
 
+import com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase;
 import com.semmle.js.extractor.trapcache.CachingTrapWriter;
 import com.semmle.js.extractor.trapcache.ITrapCache;
 import com.semmle.util.data.StringUtil;
@@ -386,13 +387,15 @@ public boolean supports(File f) {
 
   /** @return the number of lines of code extracted, or {@code null} if the file was cached */
   public Integer extract(File f, ExtractorState state) throws IOException {
+
     // populate source archive
     String source = new WholeIO(config.getDefaultEncoding()).strictread(f);
     outputConfig.getSourceArchive().add(f, source);
 
     // extract language-independent bits
     TrapWriter trapwriter = outputConfig.getTrapWriterFactory().mkTrapWriter(f);
     Label fileLabel = trapwriter.populateFile(f);
+
     LocationManager locationManager = new LocationManager(f, trapwriter, fileLabel);
     locationManager.emitFileLocation(fileLabel, 0, 0, 0, 0);
 
@@ -424,23 +427,37 @@ public Integer extract(File f, ExtractorState state) throws IOException {
   private Integer extractContents(
       File f, Label fileLabel, String source, LocationManager locationManager, ExtractorState state)
       throws IOException {
+    ExtractionMetrics metrics = new ExtractionMetrics();
+    metrics.startPhase(ExtractionPhase.FileExtractor_extractContents);
+    metrics.setLength(source.length());
+    metrics.setFileLabel(fileLabel);
     TrapWriter trapwriter = locationManager.getTrapWriter();
     FileType fileType = getFileType(f);
 
     File cacheFile = null, // the cache file for this extraction
         resultFile = null; // the final result TRAP file for this extraction
 
-    // check whether we can perform caching
-    if (bumpIdCounter(trapwriter) && fileType.isTrapCachingAllowed()) {
+    if (bumpIdCounter(trapwriter)) {
       resultFile = outputConfig.getTrapWriterFactory().getTrapFileFor(f);
-      if (resultFile != null) cacheFile = trapCache.lookup(source, config, fileType);
     }
+    // check whether we can perform caching
+    if (resultFile != null && fileType.isTrapCachingAllowed()) {
+      cacheFile = trapCache.lookup(source, config, fileType);
+    }
+
+    boolean canUseCacheFile = cacheFile != null;
+    boolean canReuseCacheFile = canUseCacheFile && cacheFile.exists();
 
-    if (cacheFile != null) {
+    metrics.setCacheFile(cacheFile);
+    metrics.setCanReuseCacheFile(canReuseCacheFile);
+
+    if (canUseCacheFile) {
       FileUtil.close(trapwriter);
 
-      if (cacheFile.exists()) {
+      if (canReuseCacheFile) {
         FileUtil.append(cacheFile, resultFile);
+        metrics.stopPhase(ExtractionPhase.FileExtractor_extractContents);
+        metrics.appendToTrapFile(resultFile);
         return null;
       }
 
@@ -457,7 +474,8 @@ private Integer extractContents(
     try {
       IExtractor extractor = fileType.mkExtractor(config, state);
       TextualExtractor textualExtractor =
-          new TextualExtractor(trapwriter, locationManager, source, config.getExtractLines());
+          new TextualExtractor(
+              trapwriter, locationManager, source, config.getExtractLines(), metrics);
       LoCInfo loc = extractor.extract(textualExtractor);
       int numLines = textualExtractor.getNumLines();
       int linesOfCode = loc.getLinesOfCode(), linesOfComments = loc.getLinesOfComments();
@@ -468,8 +486,9 @@ private Integer extractContents(
     } finally {
       if (!successful && trapwriter instanceof CachingTrapWriter)
         ((CachingTrapWriter) trapwriter).discard();
-
       FileUtil.close(trapwriter);
+      metrics.stopPhase(ExtractionPhase.FileExtractor_extractContents);
+      metrics.appendToTrapFile(resultFile);
     }
   }
 
 
@@ -191,7 +191,12 @@ private LoCInfo extractSnippet(
     JSExtractor extractor = new JSExtractor(config);
     try {
       TextualExtractor tx =
-          new TextualExtractor(trapwriter, scriptLocationManager, source, config.getExtractLines());
+          new TextualExtractor(
+              trapwriter,
+              scriptLocationManager,
+              source,
+              config.getExtractLines(),
+              textualExtractor.getMetrics());
       return extractor.extract(tx, source, toplevelKind, scopeManager).snd();
     } catch (ParseError e) {
       e.setPosition(scriptLocationManager.translatePosition(e.getPosition()));
Original file line number	Diff line number	Diff line change
`@@ -98,6 +98,7 @@`
`98`	`98`	`import com.semmle.js.ast.jsx.JSXNamespacedName;`
`99`	`99`	`import com.semmle.js.ast.jsx.JSXOpeningElement;`
`100`	`100`	`import com.semmle.js.ast.jsx.JSXSpreadAttribute;`
	`101`	`+import com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase;`
`101`	`102`	`import com.semmle.js.extractor.ExtractorConfig.Platform;`
`102`	`103`	`import com.semmle.js.extractor.ExtractorConfig.SourceType;`
`103`	`104`	`import com.semmle.js.extractor.ScopeManager.DeclKind;`
`@@ -192,6 +193,10 @@ public ScopeManager getScopeManager() {`
`192`	`193`	`return scopeManager;`
`193`	`194`	`}`
`194`	`195`
	`196`	`+ public ExtractionMetrics getMetrics() {`
	`197`	`+ return lexicalExtractor.getMetrics();`
	`198`	`+ }`
	`199`	`+`
`195`	`200`	`/**`
`196`	`201`	`* The binding semantics for an identifier.`
`197`	`202`	`*`
`@@ -1945,9 +1950,11 @@ public Label visit(XMLDotDotExpression nd, Context c) {`
`1945`	`1950`	`}`
`1946`	`1951`
`1947`	`1952`	`public void extract(Node root, Platform platform, SourceType sourceType, int toplevelKind) {`
	`1953`	`+ lexicalExtractor.getMetrics().startPhase(ExtractionPhase.ASTExtractor_extract);`
`1948`	`1954`	`trapwriter.addTuple("toplevels", toplevelLabel, toplevelKind);`
`1949`	`1955`	`locationManager.emitNodeLocation(root, toplevelLabel);`
`1950`	`1956`
`1951`	`1957`	`root.accept(new V(platform, sourceType), null);`
	`1958`	`+ lexicalExtractor.getMetrics().stopPhase(ExtractionPhase.ASTExtractor_extract);`
`1952`	`1959`	`}`
`1953`	`1960`	`}`