Skip to content

Commit 844abc5

Browse files
authored
Merge pull request #4108 from erik-krogh/packType
Approved by asgerf
2 parents dc62cd1 + b0d4e79 commit 844abc5

File tree

21 files changed

+275
-83
lines changed

21 files changed

+275
-83
lines changed

javascript/extractor/src/com/semmle/js/extractor/ExtractorState.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
package com.semmle.js.extractor;
22

3+
import java.io.File;
34
import java.nio.file.Path;
45
import java.util.concurrent.ConcurrentHashMap;
6+
import java.util.concurrent.ConcurrentMap;
7+
import java.util.Optional;
58

69
import com.semmle.js.parser.TypeScriptParser;
710

@@ -23,6 +26,8 @@ public class ExtractorState {
2326

2427
private final ConcurrentHashMap<Path, FileSnippet> snippets = new ConcurrentHashMap<>();
2528

29+
private static final ConcurrentMap<File, Optional<String>> packageTypeCache = new ConcurrentHashMap<>();
30+
2631
public TypeScriptParser getTypeScriptParser() {
2732
return typeScriptParser;
2833
}
@@ -36,12 +41,22 @@ public ConcurrentHashMap<Path, FileSnippet> getSnippets() {
3641
return snippets;
3742
}
3843

44+
/**
45+
* Returns a cache for the "type" field in `package.json` files.
46+
*
47+
* <p>The map is thread-safe and may be mutated by the caller.
48+
*/
49+
public ConcurrentMap<File, Optional<String>> getPackageTypeCache() {
50+
return this.packageTypeCache;
51+
}
52+
3953
/**
4054
* Makes this semantically equivalent to a fresh state, but may internally retain shared resources
4155
* that are expensive to reacquire.
4256
*/
4357
public void reset() {
4458
typeScriptParser.reset();
4559
snippets.clear();
60+
packageTypeCache.clear();
4661
}
4762
}

javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ public String toString() {
119119
JS(".js", ".jsx", ".mjs", ".cjs", ".es6", ".es") {
120120
@Override
121121
public IExtractor mkExtractor(ExtractorConfig config, ExtractorState state) {
122-
return new ScriptExtractor(config);
122+
return new ScriptExtractor(config, state);
123123
}
124124

125125
@Override

javascript/extractor/src/com/semmle/js/extractor/Main.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ public class Main {
4343
* A version identifier that should be updated every time the extractor changes in such a way that
4444
* it may produce different tuples for the same file under the same {@link ExtractorConfig}.
4545
*/
46-
public static final String EXTRACTOR_VERSION = "2020-08-20-2";
46+
public static final String EXTRACTOR_VERSION = "2020-08-24";
4747

4848
public static final Pattern NEWLINE = Pattern.compile("\n");
4949

javascript/extractor/src/com/semmle/js/extractor/ScriptExtractor.java

Lines changed: 66 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
package com.semmle.js.extractor;
22

3+
import java.io.BufferedReader;
4+
import java.io.File;
5+
import java.io.FileNotFoundException;
6+
import java.io.FileReader;
7+
import java.io.IOException;
8+
import java.util.concurrent.ConcurrentMap;
9+
import java.util.Optional;
10+
11+
import com.google.gson.Gson;
12+
import com.google.gson.JsonSyntaxException;
13+
314
import com.semmle.js.extractor.ExtractorConfig.Platform;
415
import com.semmle.js.extractor.ExtractorConfig.SourceType;
516
import com.semmle.js.parser.ParseError;
@@ -9,19 +20,24 @@
920
/** Extract a stand-alone JavaScript script. */
1021
public class ScriptExtractor implements IExtractor {
1122
private ExtractorConfig config;
23+
private ConcurrentMap<File, Optional<String>> packageTypeCache;
1224

13-
public ScriptExtractor(ExtractorConfig config) {
25+
public ScriptExtractor(ExtractorConfig config, ExtractorState state) {
1426
this.config = config;
27+
this.packageTypeCache = state.getPackageTypeCache();
1528
}
1629

17-
/** True if files with the given extension should always be treated as modules. */
18-
private boolean isAlwaysModule(String extension) {
19-
return extension.equals(".mjs") || extension.equals(".es6") || extension.equals(".es");
30+
/** True if files with the given extension and type (from package.json) should always be treated as ES2015 modules. */
31+
private boolean isAlwaysModule(String extension, String packageType) {
32+
if (extension.equals(".mjs") || extension.equals(".es6") || extension.equals(".es")) {
33+
return true;
34+
}
35+
return "module".equals(packageType) && extension.equals(".js");
2036
}
2137

22-
/** True if files with the given extension should always be treated as CommonJS modules. */
23-
private boolean isAlwaysCommonJSModule(String extension) {
24-
return extension.equals(".cjs");
38+
/** True if files with the given extension and type (from package.json) should always be treated as CommonJS modules. */
39+
private boolean isAlwaysCommonJSModule(String extension, String packageType) {
40+
return extension.equals(".cjs") || (extension.equals(".js") && "commonjs".equals(packageType));
2541
}
2642

2743
@Override
@@ -49,13 +65,16 @@ public LoCInfo extract(TextualExtractor textualExtractor) {
4965
locationManager.setStart(2, 1);
5066
}
5167

52-
// Some file extensions are interpreted as modules by default.
68+
String packageType = getPackageType(locationManager.getSourceFile().getParentFile());
69+
String extension = locationManager.getSourceFileExtension();
70+
71+
// Some files are interpreted as modules by default.
5372
if (config.getSourceType() == SourceType.AUTO) {
54-
if (isAlwaysModule(locationManager.getSourceFileExtension())) {
73+
if (isAlwaysModule(extension, packageType)) {
5574
config = config.withSourceType(SourceType.MODULE);
5675
}
57-
if (isAlwaysCommonJSModule(locationManager.getSourceFileExtension())) {
58-
config = config.withSourceType(SourceType.COMMONJS_MODULE);
76+
if (isAlwaysCommonJSModule(extension, packageType)) {
77+
config = config.withSourceType(SourceType.COMMONJS_MODULE).withPlatform(Platform.NODE);
5978
}
6079
}
6180

@@ -78,4 +97,40 @@ public LoCInfo extract(TextualExtractor textualExtractor) {
7897

7998
return loc;
8099
}
100+
101+
/**
102+
* A minimal model of `package.json` files that can be used to read the "type" field.
103+
*/
104+
private static class PackageJSON {
105+
String type;
106+
}
107+
108+
/**
109+
* Returns the "type" field from the nearest `package.json` file (searching up the file hierarchy).
110+
*/
111+
private String getPackageType(File folder) {
112+
if (folder == null || !folder.isDirectory()) {
113+
return null;
114+
}
115+
if (packageTypeCache.containsKey(folder)) {
116+
return packageTypeCache.get(folder).orElse(null);
117+
}
118+
File file = new File(folder, "package.json");
119+
if (file.isDirectory()) {
120+
return null;
121+
}
122+
if (!file.exists()) {
123+
String result = getPackageType(folder.getParentFile());
124+
packageTypeCache.put(folder, Optional.ofNullable(result));
125+
return result;
126+
}
127+
try {
128+
BufferedReader reader = new BufferedReader(new FileReader(file));
129+
String result = new Gson().fromJson(reader, PackageJSON.class).type;
130+
packageTypeCache.put(folder, Optional.ofNullable(result));
131+
return result;
132+
} catch (IOException | JsonSyntaxException e) {
133+
return null;
134+
}
135+
}
81136
}

javascript/extractor/tests/extensions/output/trap/tst4.cjs.trap

Lines changed: 77 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -60,82 +60,89 @@ toplevels(#20001,0)
6060
#20020=@"loc,{#10000},1,1,2,0"
6161
locations_default(#20020,#10000,1,1,2,0)
6262
hasLocation(#20001,#20020)
63-
#20021=@"module;{#10000},1,1"
64-
scopes(#20021,3)
65-
scopenodes(#20001,#20021)
66-
scopenesting(#20021,#20000)
67-
#20022=@"var;{require};{#20021}"
68-
variables(#20022,"require",#20021)
69-
#20023=@"var;{module};{#20021}"
70-
variables(#20023,"module",#20021)
71-
#20024=@"var;{exports};{#20021}"
72-
variables(#20024,"exports",#20021)
73-
#20025=@"var;{__filename};{#20021}"
74-
variables(#20025,"__filename",#20021)
75-
#20026=@"var;{__dirname};{#20021}"
76-
variables(#20026,"__dirname",#20021)
77-
#20027=@"var;{arguments};{#20021}"
78-
variables(#20027,"arguments",#20021)
63+
#20021=@"var;{global};{#20000}"
64+
variables(#20021,"global",#20000)
65+
#20022=@"var;{process};{#20000}"
66+
variables(#20022,"process",#20000)
67+
#20023=@"var;{console};{#20000}"
68+
variables(#20023,"console",#20000)
69+
#20024=@"var;{Buffer};{#20000}"
70+
variables(#20024,"Buffer",#20000)
71+
#20025=@"module;{#10000},1,1"
72+
scopes(#20025,3)
73+
scopenodes(#20001,#20025)
74+
scopenesting(#20025,#20000)
75+
#20026=@"var;{require};{#20025}"
76+
variables(#20026,"require",#20025)
77+
#20027=@"var;{module};{#20025}"
78+
variables(#20027,"module",#20025)
79+
#20028=@"var;{exports};{#20025}"
80+
variables(#20028,"exports",#20025)
81+
#20029=@"var;{__filename};{#20025}"
82+
variables(#20029,"__filename",#20025)
83+
#20030=@"var;{__dirname};{#20025}"
84+
variables(#20030,"__dirname",#20025)
85+
#20031=@"var;{arguments};{#20025}"
86+
variables(#20031,"arguments",#20025)
7987
isModule(#20001)
80-
#20028=*
81-
stmts(#20028,2,#20001,0,"console ... onJS"");")
82-
hasLocation(#20028,#20003)
83-
stmtContainers(#20028,#20001)
84-
#20029=*
85-
exprs(#20029,13,#20028,0,"console ... monJS"")")
86-
#20030=@"loc,{#10000},1,1,1,29"
87-
locations_default(#20030,#10000,1,1,1,29)
88-
hasLocation(#20029,#20030)
89-
enclosingStmt(#20029,#20028)
90-
exprContainers(#20029,#20001)
91-
#20031=*
92-
exprs(#20031,14,#20029,-1,"console.log")
93-
#20032=@"loc,{#10000},1,1,1,11"
94-
locations_default(#20032,#10000,1,1,1,11)
95-
hasLocation(#20031,#20032)
96-
enclosingStmt(#20031,#20028)
97-
exprContainers(#20031,#20001)
88+
#20032=*
89+
stmts(#20032,2,#20001,0,"console ... onJS"");")
90+
hasLocation(#20032,#20003)
91+
stmtContainers(#20032,#20001)
9892
#20033=*
99-
exprs(#20033,79,#20031,0,"console")
100-
hasLocation(#20033,#20005)
101-
enclosingStmt(#20033,#20028)
93+
exprs(#20033,13,#20032,0,"console ... monJS"")")
94+
#20034=@"loc,{#10000},1,1,1,29"
95+
locations_default(#20034,#10000,1,1,1,29)
96+
hasLocation(#20033,#20034)
97+
enclosingStmt(#20033,#20032)
10298
exprContainers(#20033,#20001)
103-
literals("console","console",#20033)
104-
#20034=@"var;{console};{#20000}"
105-
variables(#20034,"console",#20000)
106-
bind(#20033,#20034)
10799
#20035=*
108-
exprs(#20035,0,#20031,1,"log")
109-
hasLocation(#20035,#20009)
110-
enclosingStmt(#20035,#20028)
100+
exprs(#20035,14,#20033,-1,"console.log")
101+
#20036=@"loc,{#10000},1,1,1,11"
102+
locations_default(#20036,#10000,1,1,1,11)
103+
hasLocation(#20035,#20036)
104+
enclosingStmt(#20035,#20032)
111105
exprContainers(#20035,#20001)
112-
literals("log","log",#20035)
113-
#20036=*
114-
exprs(#20036,4,#20029,0,"""Hello CommonJS""")
115-
hasLocation(#20036,#20013)
116-
enclosingStmt(#20036,#20028)
117-
exprContainers(#20036,#20001)
118-
literals("Hello CommonJS","""Hello CommonJS""",#20036)
119106
#20037=*
120-
regexpterm(#20037,14,#20036,0,"Hello CommonJS")
121-
#20038=@"loc,{#10000},1,14,1,27"
122-
locations_default(#20038,#10000,1,14,1,27)
123-
hasLocation(#20037,#20038)
124-
regexpConstValue(#20037,"Hello CommonJS")
107+
exprs(#20037,79,#20035,0,"console")
108+
hasLocation(#20037,#20005)
109+
enclosingStmt(#20037,#20032)
110+
exprContainers(#20037,#20001)
111+
literals("console","console",#20037)
112+
bind(#20037,#20023)
113+
#20038=*
114+
exprs(#20038,0,#20035,1,"log")
115+
hasLocation(#20038,#20009)
116+
enclosingStmt(#20038,#20032)
117+
exprContainers(#20038,#20001)
118+
literals("log","log",#20038)
125119
#20039=*
126-
entry_cfg_node(#20039,#20001)
127-
#20040=@"loc,{#10000},1,1,1,0"
128-
locations_default(#20040,#10000,1,1,1,0)
129-
hasLocation(#20039,#20040)
130-
#20041=*
131-
exit_cfg_node(#20041,#20001)
132-
hasLocation(#20041,#20019)
133-
successor(#20028,#20033)
134-
successor(#20036,#20029)
135-
successor(#20035,#20031)
136-
successor(#20033,#20035)
137-
successor(#20031,#20036)
138-
successor(#20029,#20041)
139-
successor(#20039,#20028)
120+
exprs(#20039,4,#20033,0,"""Hello CommonJS""")
121+
hasLocation(#20039,#20013)
122+
enclosingStmt(#20039,#20032)
123+
exprContainers(#20039,#20001)
124+
literals("Hello CommonJS","""Hello CommonJS""",#20039)
125+
#20040=*
126+
regexpterm(#20040,14,#20039,0,"Hello CommonJS")
127+
#20041=@"loc,{#10000},1,14,1,27"
128+
locations_default(#20041,#10000,1,14,1,27)
129+
hasLocation(#20040,#20041)
130+
regexpConstValue(#20040,"Hello CommonJS")
131+
#20042=*
132+
entry_cfg_node(#20042,#20001)
133+
#20043=@"loc,{#10000},1,1,1,0"
134+
locations_default(#20043,#10000,1,1,1,0)
135+
hasLocation(#20042,#20043)
136+
#20044=*
137+
exit_cfg_node(#20044,#20001)
138+
hasLocation(#20044,#20019)
139+
successor(#20032,#20037)
140+
successor(#20039,#20033)
141+
successor(#20038,#20035)
142+
successor(#20037,#20038)
143+
successor(#20035,#20039)
144+
successor(#20033,#20044)
145+
successor(#20042,#20032)
146+
isNodejs(#20001)
140147
numlines(#10000,1,1,0)
141148
filetype(#10000,"javascript")
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
// I'm invalid JSON
2+
{
3+
"type": "foo"

javascript/extractor/tests/moduleTypes1/input/tst.js

Whitespace-only changes.
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#10000=@"/package.json;sourcefile"
2+
files(#10000,"/package.json","package","json",0)
3+
#10001=@"/;folder"
4+
folders(#10001,"/","")
5+
containerparent(#10001,#10000)
6+
#10002=@"loc,{#10000},0,0,0,0"
7+
locations_default(#10002,#10000,0,0,0,0)
8+
hasLocation(#10000,#10002)
9+
#20000=*
10+
json_errors(#20000,"Error: Unexpected token")
11+
#20001=@"loc,{#10000},3,1,3,1"
12+
locations_default(#20001,#10000,3,1,3,1)
13+
hasLocation(#20000,#20001)
14+
numlines(#10000,3,0,0)
15+
filetype(#10000,"json")
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#10000=@"/tst.js;sourcefile"
2+
files(#10000,"/tst.js","tst","js",0)
3+
#10001=@"/;folder"
4+
folders(#10001,"/","")
5+
containerparent(#10001,#10000)
6+
#10002=@"loc,{#10000},0,0,0,0"
7+
locations_default(#10002,#10000,0,0,0,0)
8+
hasLocation(#10000,#10002)
9+
#20000=@"global_scope"
10+
scopes(#20000,0)
11+
#20001=@"script;{#10000},1,1"
12+
numlines(#20001,0,0,0)
13+
#20002=*
14+
tokeninfo(#20002,0,#20001,0,"")
15+
#20003=@"loc,{#10000},1,1,1,0"
16+
locations_default(#20003,#10000,1,1,1,0)
17+
hasLocation(#20002,#20003)
18+
toplevels(#20001,0)
19+
hasLocation(#20001,#20003)
20+
#20004=*
21+
entry_cfg_node(#20004,#20001)
22+
hasLocation(#20004,#20003)
23+
#20005=*
24+
exit_cfg_node(#20005,#20001)
25+
hasLocation(#20005,#20003)
26+
successor(#20004,#20005)
27+
numlines(#10000,0,0,0)
28+
filetype(#10000,"javascript")
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"type": 123
3+
}

0 commit comments

Comments
 (0)