Skip to content

Commit 8514ea5

Browse files
committed
add extractRules test
1 parent 76d6afd commit 8514ea5

File tree

1 file changed

+54
-0
lines changed

1 file changed

+54
-0
lines changed
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
package im.nll.data.extractor.rule;
2+
3+
import com.google.common.base.Charsets;
4+
import com.google.common.io.Resources;
5+
import im.nll.data.extractor.Extractors;
6+
import org.junit.Assert;
7+
import org.junit.Before;
8+
import org.junit.Test;
9+
10+
import java.io.IOException;
11+
import java.util.LinkedHashMap;
12+
import java.util.List;
13+
import java.util.Map;
14+
15+
/**
16+
* @author <a href="mailto:fivesmallq@gmail.com">fivesmallq</a>
17+
* @version Revision: 1.0
18+
* @date 16/4/7 下午6:42
19+
*/
20+
public class ExtractRulesTest {
21+
private String listHtml2;
22+
23+
@Before
24+
public void before() {
25+
try {
26+
listHtml2 = Resources.toString(Resources.getResource("list2.html"), Charsets.UTF_8);
27+
} catch (IOException e) {
28+
e.printStackTrace();
29+
}
30+
}
31+
32+
@Test
33+
public void test() {
34+
Map<String, String> fields = new LinkedHashMap<>();
35+
fields.put("url", "selector:a.attr(href)");
36+
fields.put("title", "selector:a");
37+
fields.put("date", "selector:span.fr");
38+
ExtractRules rules = ExtractRules
39+
.newRules("selector:dd.x_ct1.html")
40+
.fields(fields);
41+
42+
List<Map<String, String>> datas = Extractors
43+
.on(listHtml2)
44+
.split(rules.getSplit().getExtractor())
45+
.extract(rules.getExtractRules())
46+
.asMapList();
47+
Assert.assertEquals(10, datas.size());
48+
Map<String, String> data = datas.get(2);
49+
Assert.assertEquals("http://infect.dxy.cn/article/486885", data.get("url"));
50+
Assert.assertEquals("5 个小测验:教你轻松应对艰难梭菌感染", data.get("title"));
51+
Assert.assertEquals("2016.03.10", data.get("date"));
52+
53+
}
54+
}

0 commit comments

Comments
 (0)