Skip to content

Commit 8183635

Browse files
committed
add global filter to process all field
1 parent f1fc302 commit 8183635

File tree

2 files changed

+41
-0
lines changed

2 files changed

+41
-0
lines changed

src/main/java/im/nll/data/extractor/Extractors.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ public class Extractors {
2525
private List<String> htmlList;
2626
private Map<String, List<Extractor>> extractorsMap = new LinkedHashMap<>();
2727
private Map<String, List<Filter>> filtersMap = new LinkedHashMap();
28+
private List<Filter> globalFilter = new LinkedList<>();
2829
private String prevField;
2930

3031
public Extractors(String html) {
@@ -193,6 +194,17 @@ public Extractors filter(Filter filter) {
193194
return this;
194195
}
195196

197+
/**
198+
* process all values use filter before field filter
199+
*
200+
* @param filter
201+
* @return
202+
*/
203+
public Extractors filterAll(Filter filter) {
204+
globalFilter.add(filter);
205+
return this;
206+
}
207+
196208

197209
/**
198210
* split html use listable extractor
@@ -244,6 +256,7 @@ public String asString() {
244256
for (Extractor extractor : extractors) {
245257
result = extractor.extract(result);
246258
}
259+
result = filterGlobal(result);
247260
result = filter(DEFAULT_FIELD, result);
248261
}
249262
return result;
@@ -292,6 +305,7 @@ public List<String> asStringList(String separator) {
292305
for (Extractor extractor : extractors) {
293306
result = extractor.extract(result);
294307
}
308+
result = filterGlobal(result);
295309
result = filter(name, result);
296310
stringBuffer.append(result).append(separator);
297311
}
@@ -421,6 +435,7 @@ private <T> T extractBean(String html, Class<T> clazz) {
421435
for (Extractor extractor : extractors) {
422436
result = extractor.extract(result);
423437
}
438+
result = filterGlobal(result);
424439
result = filter(name, result);
425440
try {
426441
Reflect.on(entity).set(name, result);
@@ -440,6 +455,8 @@ private Map<String, String> extractMap(String html) {
440455
for (Extractor extractor : extractors) {
441456
result = extractor.extract(result);
442457
}
458+
//global filter all
459+
result = filterGlobal(result);
443460
result = filter(name, result);
444461
try {
445462
map.put(name, result);
@@ -458,6 +475,13 @@ private String filter(String name, String result) {
458475
return result;
459476
}
460477

478+
private String filterGlobal(String result) {
479+
for (Filter filter : globalFilter) {
480+
result = filter.process(result);
481+
}
482+
return result;
483+
}
484+
461485
//------------ internal --------------//
462486

463487

src/test/java/im/nll/data/extractor/ExtractorsTest.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,23 @@ public void testToBeanListFilter() throws Exception {
369369
Assert.assertEquals(second.getUrl(), "url:https://www.ruby-lang.org");
370370
}
371371

372+
@Test
373+
public void testToBeanListFilterAll() throws Exception {
374+
List<Language> languages = Extractors.on(listHtml)
375+
.filterAll(value -> "all-" + value)
376+
.split(xpath("//tr[@class='item']"))
377+
.extract("type", xpath("//td[1]/text()")).filter(value -> "type:" + value)
378+
.extract("name", xpath("//td[2]/text()")).filter(value -> "name:" + value)
379+
.extract("url", xpath("//td[3]/text()")).filter(value -> "url:" + value)
380+
.asBeanList(Language.class);
381+
Assert.assertNotNull(languages);
382+
Language second = languages.get(1);
383+
Assert.assertEquals(languages.size(), 3);
384+
Assert.assertEquals(second.getType(), "type:all-dynamic");
385+
Assert.assertEquals(second.getName(), "name:all-Ruby");
386+
Assert.assertEquals(second.getUrl(), "url:all-https://www.ruby-lang.org");
387+
}
388+
372389
@Test
373390
public void testToBeanListByJson() throws Exception {
374391
List<Book> books = Extractors.on(jsonString).split(json("$..book.*"))

0 commit comments

Comments
 (0)