Skip to content

Commit b5ec26d

Browse files
author
Sauyon Lee
authored
Merge pull request #4744 from github/sauyon/html-refactor
JavaScript: Factor out HTML extractor
2 parents bc340e2 + 17e450f commit b5ec26d

File tree

12 files changed

+527
-746
lines changed

12 files changed

+527
-746
lines changed

javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java

Lines changed: 5 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import java.util.LinkedHashSet;
1010
import java.util.Set;
1111

12+
import com.semmle.extractor.html.HtmlPopulator;
1213
import com.semmle.js.parser.JcornWrapper;
1314
import com.semmle.util.data.StringUtil;
1415
import com.semmle.util.exception.UserError;
@@ -146,42 +147,6 @@ public Set<String> getPredefinedGlobals() {
146147
}
147148
}
148149

149-
/** How to handle HTML files. */
150-
public static enum HTMLHandling {
151-
/** Only extract embedded scripts, not the HTML itself. */
152-
SCRIPTS(false, false),
153-
/** Only extract elements and embedded scripts, not text. */
154-
ELEMENTS(true, false),
155-
/** Extract elements, embedded scripts, and text. */
156-
ALL(true, true);
157-
158-
private final boolean extractElements;
159-
160-
private final boolean extractText;
161-
162-
private HTMLHandling(boolean extractElements, boolean extractText) {
163-
this.extractElements = extractElements;
164-
this.extractText = extractText;
165-
}
166-
167-
public boolean extractElements() {
168-
return extractElements;
169-
}
170-
171-
public boolean extractText() {
172-
return extractText;
173-
}
174-
175-
public boolean extractComments() {
176-
return extractElements;
177-
}
178-
179-
@Override
180-
public String toString() {
181-
return StringUtil.lc(name());
182-
}
183-
}
184-
185150
/** Which language version is the source code parsed as? */
186151
private ECMAVersion ecmaVersion;
187152

@@ -213,7 +178,7 @@ public String toString() {
213178
private boolean tolerateParseErrors;
214179

215180
/** How should HTML files be extracted? */
216-
private HTMLHandling htmlHandling;
181+
private HtmlPopulator.Config htmlHandling;
217182

218183
/**
219184
* Which {@link FileExtractor.FileType} should this code be parsed as?
@@ -244,7 +209,7 @@ public ExtractorConfig(boolean experimental) {
244209
this.platform = Platform.AUTO;
245210
this.jsx = true;
246211
this.sourceType = SourceType.AUTO;
247-
this.htmlHandling = HTMLHandling.ELEMENTS;
212+
this.htmlHandling = HtmlPopulator.Config.ELEMENTS;
248213
this.tolerateParseErrors = true;
249214
if (experimental) {
250215
this.mozExtensions = true;
@@ -403,11 +368,11 @@ public ExtractorConfig withJsx(boolean jsx) {
403368
return res;
404369
}
405370

406-
public HTMLHandling getHtmlHandling() {
371+
public HtmlPopulator.Config getHtmlHandling() {
407372
return htmlHandling;
408373
}
409374

410-
public ExtractorConfig withHtmlHandling(HTMLHandling htmlHandling) {
375+
public ExtractorConfig withHtmlHandling(HtmlPopulator.Config htmlHandling) {
411376
ExtractorConfig res = new ExtractorConfig(this);
412377
res.htmlHandling = htmlHandling;
413378
return res;

0 commit comments

Comments
 (0)