Skip to content

Commit 9fb61d5

Browse files
authored
Merge pull request #1371 from xiemaisi/js/index-xml
Approved by asger-semmle
2 parents ead59ba + 7f8f126 commit 9fb61d5

File tree

2 files changed

+116
-12
lines changed

2 files changed

+116
-12
lines changed

javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import com.semmle.ts.extractor.TypeExtractor;
1111
import com.semmle.ts.extractor.TypeTable;
1212
import com.semmle.util.data.StringUtil;
13+
import com.semmle.util.exception.CatastrophicError;
1314
import com.semmle.util.exception.Exceptions;
1415
import com.semmle.util.exception.ResourceError;
1516
import com.semmle.util.exception.UserError;
@@ -23,6 +24,7 @@
2324
import java.io.File;
2425
import java.io.IOException;
2526
import java.io.Reader;
27+
import java.lang.ProcessBuilder.Redirect;
2628
import java.net.URI;
2729
import java.net.URISyntaxException;
2830
import java.nio.charset.StandardCharsets;
@@ -68,7 +70,9 @@
6870
* patterns that can be used to refine the list of files to include and exclude
6971
* <li><code>LGTM_INDEX_TYPESCRIPT</code>: whether to extract TypeScript
7072
* <li><code>LGTM_INDEX_FILETYPES</code>: a newline-separated list of ".extension:filetype" pairs
71-
* specifying which {@link FileType} to use for the given extension
73+
* specifying which {@link FileType} to use for the given extension; the additional file
74+
* type <code>XML</code> is also supported
75+
* <li><code>LGTM_INDEX_XML_MODE</code>: whether to extract XML files
7276
* <li><code>LGTM_THREADS</code>: the maximum number of files to extract in parallel
7377
* <li><code>LGTM_TRAP_CACHE</code>: the path of a directory to use for trap caching
7478
* <li><code>LGTM_TRAP_CACHE_BOUND</code>: the size to bound the trap cache to
@@ -158,6 +162,12 @@
158162
* <p>The file type as which a file is extracted can be customised via the <code>
159163
* LGTM_INDEX_FILETYPES</code> environment variable explained above.
160164
*
165+
* <p>If <code>LGTM_INDEX_XML_MODE</code> is set to <code>ALL</code>, then all files with extension
166+
* <code>.xml</code> under <code>LGTM_SRC</code> are extracted as XML (in addition to any files
167+
* whose file type is specified to be <code>XML</code> via <code>LGTM_INDEX_SOURCE_TYPE</code>).
168+
* Currently XML extraction does not respect inclusion and exclusion filters, but this is a bug,
169+
* not a feature, and hence will change eventually.
170+
*
161171
* <p>Note that all these customisations only apply to <code>LGTM_SRC</code>. Extraction of externs
162172
* is not customisable.
163173
*
@@ -178,6 +188,7 @@ public class AutoBuild {
178188
private final Map<String, FileType> fileTypes = new LinkedHashMap<>();
179189
private final Set<Path> includes = new LinkedHashSet<>();
180190
private final Set<Path> excludes = new LinkedHashSet<>();
191+
private final Set<String> xmlExtensions = new LinkedHashSet<>();
181192
private ProjectLayout filters;
182193
private final Path LGTM_SRC, SEMMLE_DIST;
183194
private final TypeScriptMode typeScriptMode;
@@ -193,6 +204,7 @@ public AutoBuild() {
193204
getEnumFromEnvVar("LGTM_INDEX_TYPESCRIPT", TypeScriptMode.class, TypeScriptMode.FULL);
194205
this.defaultEncoding = getEnvVar("LGTM_INDEX_DEFAULT_ENCODING");
195206
setupFileTypes();
207+
setupXmlMode();
196208
setupMatchers();
197209
}
198210

@@ -272,14 +284,30 @@ private void setupFileTypes() {
272284
String extension = fields[0].trim();
273285
String fileType = fields[1].trim();
274286
try {
275-
fileTypes.put(extension, FileType.valueOf(StringUtil.uc(fileType)));
287+
fileType = StringUtil.uc(fileType);
288+
if ("XML".equals(fileType)) {
289+
if (extension.length() < 2)
290+
throw new UserError("Invalid extension '" + extension + "'.");
291+
xmlExtensions.add(extension.substring(1));
292+
} else {
293+
fileTypes.put(extension, FileType.valueOf(fileType));
294+
}
276295
} catch (IllegalArgumentException e) {
277296
Exceptions.ignore(e, "We construct a better error message.");
278297
throw new UserError("Invalid file type '" + fileType + "'.");
279298
}
280299
}
281300
}
282301

302+
private void setupXmlMode() {
303+
String xmlMode = getEnvVar("LGTM_INDEX_XML_MODE", "DISABLED");
304+
xmlMode = StringUtil.uc(xmlMode.trim());
305+
if ("ALL".equals(xmlMode))
306+
xmlExtensions.add("xml");
307+
else if (!"DISABLED".equals(xmlMode))
308+
throw new UserError("Invalid XML mode '" + xmlMode + "' (should be either ALL or DISABLED).");
309+
}
310+
283311
/** Set up include and exclude matchers based on environment variables. */
284312
private void setupMatchers() {
285313
setupIncludesAndExcludes();
@@ -402,6 +430,7 @@ public void run() throws IOException {
402430
try {
403431
extractSource();
404432
extractExterns();
433+
extractXml();
405434
} finally {
406435
shutdownThreadPool();
407436
}
@@ -733,10 +762,33 @@ private void logEndProcess(long timedLogMessageStart, String message) {
733762
System.out.flush();
734763
}
735764

765+
public Set<String> getXmlExtensions() {
766+
return xmlExtensions;
767+
}
768+
769+
protected void extractXml() throws IOException {
770+
if (xmlExtensions.isEmpty())
771+
return;
772+
List<String> cmd = new ArrayList<>();
773+
cmd.add("odasa");
774+
cmd.add("index");
775+
cmd.add("--xml");
776+
cmd.add("--extensions");
777+
cmd.addAll(xmlExtensions);
778+
ProcessBuilder pb = new ProcessBuilder(cmd);
779+
try {
780+
pb.redirectError(Redirect.INHERIT);
781+
pb.redirectOutput(Redirect.INHERIT);
782+
pb.start().waitFor();
783+
} catch (InterruptedException e) {
784+
throw new CatastrophicError(e);
785+
}
786+
}
787+
736788
public static void main(String[] args) {
737789
try {
738790
new AutoBuild().run();
739-
} catch (IOException | UserError e) {
791+
} catch (IOException | UserError | CatastrophicError e) {
740792
System.err.println(e.toString());
741793
System.exit(1);
742794
}

javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java

Lines changed: 61 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,38 @@
11
package com.semmle.js.extractor.test;
22

3-
import com.semmle.js.extractor.AutoBuild;
4-
import com.semmle.js.extractor.ExtractorState;
5-
import com.semmle.js.extractor.FileExtractor;
6-
import com.semmle.js.extractor.FileExtractor.FileType;
7-
import com.semmle.util.data.StringUtil;
8-
import com.semmle.util.exception.UserError;
9-
import com.semmle.util.files.FileUtil8;
10-
import com.semmle.util.process.Env;
113
import java.io.File;
124
import java.io.IOException;
135
import java.nio.charset.StandardCharsets;
6+
import java.nio.file.FileVisitResult;
147
import java.nio.file.Files;
158
import java.nio.file.Path;
169
import java.nio.file.Paths;
10+
import java.nio.file.SimpleFileVisitor;
11+
import java.nio.file.attribute.BasicFileAttributes;
1712
import java.nio.file.attribute.DosFileAttributeView;
1813
import java.util.ArrayList;
1914
import java.util.LinkedHashMap;
2015
import java.util.LinkedHashSet;
2116
import java.util.List;
2217
import java.util.Map;
2318
import java.util.Set;
19+
2420
import org.junit.After;
2521
import org.junit.Assert;
2622
import org.junit.Assume;
2723
import org.junit.Before;
2824
import org.junit.Test;
2925

26+
import com.semmle.js.extractor.AutoBuild;
27+
import com.semmle.js.extractor.ExtractorState;
28+
import com.semmle.js.extractor.FileExtractor;
29+
import com.semmle.js.extractor.FileExtractor.FileType;
30+
import com.semmle.util.data.StringUtil;
31+
import com.semmle.util.exception.UserError;
32+
import com.semmle.util.files.FileUtil;
33+
import com.semmle.util.files.FileUtil8;
34+
import com.semmle.util.process.Env;
35+
3036
public class AutoBuildTests {
3137
private Path SEMMLE_DIST, LGTM_SRC;
3238
private Set<String> expected;
@@ -123,6 +129,20 @@ public void extractTypeScriptFiles(
123129
actual.add(f.toString());
124130
}
125131
}
132+
133+
@Override
134+
protected void extractXml() throws IOException {
135+
Files.walkFileTree(LGTM_SRC, new SimpleFileVisitor<Path>(){
136+
@Override
137+
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
138+
throws IOException {
139+
String ext = FileUtil.extension(file);
140+
if (!ext.isEmpty() && getXmlExtensions().contains(ext.substring(1)))
141+
actual.add(file.toString());
142+
return FileVisitResult.CONTINUE;
143+
}
144+
});
145+
}
126146
}.run();
127147
String expectedString = StringUtil.glue("\n", expected.stream().sorted().toArray());
128148
String actualString = StringUtil.glue("\n", actual.stream().sorted().toArray());
@@ -488,7 +508,7 @@ public void invalidFileType() throws IOException {
488508
runTest();
489509
Assert.fail("expected UserError");
490510
} catch (UserError ue) {
491-
Assert.assertEquals("Invalid file type 'javascript'.", ue.getMessage());
511+
Assert.assertEquals("Invalid file type 'JAVASCRIPT'.", ue.getMessage());
492512
}
493513
}
494514

@@ -499,4 +519,36 @@ public void includeYaml() throws IOException {
499519
addFile(true, LGTM_SRC, "tst.raml");
500520
runTest();
501521
}
522+
523+
@Test
524+
public void dontIncludeXmlByDefault() throws IOException {
525+
addFile(false, LGTM_SRC, "tst.xml");
526+
addFile(false, LGTM_SRC, "tst.qhelp");
527+
runTest();
528+
}
529+
530+
@Test
531+
public void includeXml() throws IOException {
532+
envVars.put("LGTM_INDEX_XML_MODE", "all");
533+
addFile(true, LGTM_SRC, "tst.xml");
534+
addFile(false, LGTM_SRC, "tst.qhelp");
535+
runTest();
536+
}
537+
538+
@Test
539+
public void qhelpAsXml() throws IOException {
540+
envVars.put("LGTM_INDEX_FILETYPES", ".qhelp:xml");
541+
addFile(false, LGTM_SRC, "tst.xml");
542+
addFile(true, LGTM_SRC, "tst.qhelp");
543+
runTest();
544+
}
545+
546+
@Test
547+
public void qhelpAsXmlAndAllXml() throws IOException {
548+
envVars.put("LGTM_INDEX_XML_MODE", "all");
549+
envVars.put("LGTM_INDEX_FILETYPES", ".qhelp:xml");
550+
addFile(true, LGTM_SRC, "tst.xml");
551+
addFile(true, LGTM_SRC, "tst.qhelp");
552+
runTest();
553+
}
502554
}

0 commit comments

Comments
 (0)