Skip to content

Commit 26eac85

Browse files
committed
Clean up web searcher code
1 parent ce2bffd commit 26eac85

File tree

6 files changed

+420
-400
lines changed

6 files changed

+420
-400
lines changed
Lines changed: 37 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
12
package org.scijava.search.web;
23

34
import java.util.ArrayList;
@@ -6,47 +7,44 @@
67
import org.scijava.search.Searcher;
78

89
/**
9-
* The AbstractWebSearcher contains convenience function to manage
10-
* search results of all Searchers browing the web.
11-
*
12-
*
13-
* Author: Robert Haase (http://haesleinhuepf.net) at MPI CBG (http://mpi-cbg.de)
14-
* December 2017
10+
* The AbstractWebSearcher contains convenience functions to manage search
11+
* results of all {@link Searcher} plugins browsing the web.
12+
*
13+
* @author Robert Haase (MPI-CBG)
1514
*/
16-
public abstract class AbstractWebSearcher implements Searcher
17-
{
18-
private String title;
19-
private ArrayList<SearchResult> searchResults = new ArrayList<SearchResult>();
20-
21-
/**
22-
*
23-
* @param title Name of the search engine
24-
*/
25-
public AbstractWebSearcher(String title) {
26-
this.title = title;
27-
}
28-
29-
@Override public String title()
30-
{
31-
return title;
32-
}
33-
34-
/**
35-
*
36-
* @param name Resulting website title / name
37-
* @param iconPath path to an image representing the results
38-
* @param url URL of the found website
39-
* @param details some text from the website representing its content
40-
*/
41-
protected void addResult(String name, String iconPath, String url, String details) {
15+
public abstract class AbstractWebSearcher implements Searcher {
16+
17+
private final String title;
18+
private final ArrayList<SearchResult> searchResults = new ArrayList<>();
19+
20+
/**
21+
* @param title Name of the search engine
22+
*/
23+
public AbstractWebSearcher(final String title) {
24+
this.title = title;
25+
}
26+
27+
@Override
28+
public String title() {
29+
return title;
30+
}
31+
32+
/**
33+
* @param name Resulting website title / name
34+
* @param iconPath path to an image representing the results
35+
* @param url URL of the found website
36+
* @param details some text from the website representing its content
37+
*/
38+
protected void addResult(final String name, final String iconPath,
39+
final String url, final String details)
40+
{
4241
searchResults.add(new WebSearchResult(name, //
43-
iconPath == null || iconPath.isEmpty() ? "/icons/world_link.png" : iconPath, url, details));
44-
}
45-
46-
public ArrayList<SearchResult> getSearchResults()
47-
{
48-
return searchResults;
49-
}
42+
iconPath == null || iconPath.isEmpty() ? "/icons/world_link.png"
43+
: iconPath, url, details));
44+
}
5045

46+
public ArrayList<SearchResult> getSearchResults() {
47+
return searchResults;
48+
}
5149

5250
}
Lines changed: 119 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
package org.scijava.search.web;
21

2+
package org.scijava.search.web;
33

44
import java.io.IOException;
55
import java.net.URL;
@@ -17,118 +17,126 @@
1717
import org.xml.sax.SAXException;
1818

1919
/**
20-
* Searcher plugin for the Bio Imaging Search Engine (http://biii.eu/search)
20+
* A searcher for the <a href="http://biii.eu/search">Bio-Imaging Search
21+
* Engine</a>.
2122
*
22-
* @author Robert Haase, http://github.com/haesleinhuepfv
23+
* @author Robert Haase (MPI-CBG)
2324
*/
2425
//@Plugin(type = Searcher.class, name = "BISE")
25-
public class BISESearcher extends AbstractWebSearcher
26-
{
27-
28-
public BISESearcher() {
29-
super("BISE");
30-
}
31-
32-
@Override public List<SearchResult> search(String text,
33-
boolean fuzzy)
34-
{
35-
try {
36-
//URL url = new URL("file:///c:/structure/temp/biii.eu_search2.html");
37-
URL url = new URL("http://biii.eu/search?search_api_fulltext=" + URLEncoder.encode(text) + "&source=imagej");
38-
39-
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
40-
DocumentBuilder db = dbf.newDocumentBuilder();
41-
Document doc = db.parse(url.openStream());
42-
43-
parse(doc.getDocumentElement());
44-
saveLastItem();
45-
46-
47-
} catch (IOException ex) {
48-
ex.printStackTrace();
49-
} catch (ParserConfigurationException e) {
50-
e.printStackTrace();
51-
} catch (SAXException e) {
52-
e.printStackTrace();
53-
}
54-
return getSearchResults();
55-
}
56-
57-
String currentHeading;
58-
String currentLink;
59-
60-
private void parseHeading(Node node) {
61-
62-
if (node.getTextContent() != null && node.getTextContent().trim().length() > 0) {
63-
currentHeading = node.getTextContent();
64-
}
65-
if (node.getAttributes() != null) {
66-
Node href = node.getAttributes().getNamedItem("href");
67-
if (href != null) {
68-
currentLink = "http://biii.eu" + href.getNodeValue();
69-
}
70-
}
71-
72-
NodeList nodeList = node.getChildNodes();
73-
for (int i = 0; i < nodeList.getLength(); i++) {
74-
Node childNode = nodeList.item(i);
75-
76-
parseHeading(childNode);
77-
}
78-
}
79-
80-
String currentContent;
81-
82-
private void parseContent(Node node) {
83-
if (node.getTextContent() != null) {
84-
currentContent = node.getTextContent();
85-
}
86-
87-
NodeList nodeList = node.getChildNodes();
88-
for (int i = 0; i < nodeList.getLength(); i++) {
89-
Node childNode = nodeList.item(i);
90-
91-
parse(childNode);
92-
}
93-
}
94-
95-
private void saveLastItem() {
96-
if (currentHeading != null && currentHeading.length() > 0) {
97-
98-
addResult(currentHeading, "", currentLink, currentContent);
99-
100-
}
101-
currentHeading = "";
102-
currentLink = "";
103-
currentContent = "";
104-
}
105-
106-
private void parse(Node node) {
107-
if (node.getNodeName().equals("div")) {
108-
Node item = node.getAttributes() == null ? null : node.getAttributes().getNamedItem("class");
109-
if (item != null && item.getNodeValue().equals("views-field views-field-title")) {
110-
111-
if (currentHeading != null) {
112-
saveLastItem();
113-
}
114-
parseHeading(node);
115-
116-
return;
117-
}
118-
if (item != null && item.getNodeValue().equals("views-field views-field-search-api-excerpt")) {
119-
parseContent(node);
120-
return;
121-
}
122-
}
123-
124-
125-
NodeList nodeList = node.getChildNodes();
126-
for (int i = 0; i < nodeList.getLength(); i++) {
127-
Node childNode = nodeList.item(i);
128-
129-
parse(childNode);
130-
}
131-
132-
}
26+
public class BISESearcher extends AbstractWebSearcher {
27+
28+
public BISESearcher() {
29+
super("BISE");
30+
}
31+
32+
@Override
33+
public List<SearchResult> search(final String text, final boolean fuzzy) {
34+
try {
35+
// URL url = new URL("file:///c:/structure/temp/biii.eu_search2.html");
36+
final URL url = new URL("http://biii.eu/search?search_api_fulltext=" +
37+
URLEncoder.encode(text) + "&source=imagej");
38+
39+
final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
40+
final DocumentBuilder db = dbf.newDocumentBuilder();
41+
final Document doc = db.parse(url.openStream());
42+
43+
parse(doc.getDocumentElement());
44+
saveLastItem();
45+
46+
}
47+
catch (final IOException ex) {
48+
ex.printStackTrace();
49+
}
50+
catch (final ParserConfigurationException e) {
51+
e.printStackTrace();
52+
}
53+
catch (final SAXException e) {
54+
e.printStackTrace();
55+
}
56+
return getSearchResults();
57+
}
58+
59+
String currentHeading;
60+
String currentLink;
61+
62+
private void parseHeading(final Node node) {
63+
64+
if (node.getTextContent() != null && node.getTextContent().trim()
65+
.length() > 0)
66+
{
67+
currentHeading = node.getTextContent();
68+
}
69+
if (node.getAttributes() != null) {
70+
final Node href = node.getAttributes().getNamedItem("href");
71+
if (href != null) {
72+
currentLink = "http://biii.eu" + href.getNodeValue();
73+
}
74+
}
75+
76+
final NodeList nodeList = node.getChildNodes();
77+
for (int i = 0; i < nodeList.getLength(); i++) {
78+
final Node childNode = nodeList.item(i);
79+
80+
parseHeading(childNode);
81+
}
82+
}
83+
84+
String currentContent;
85+
86+
private void parseContent(final Node node) {
87+
if (node.getTextContent() != null) {
88+
currentContent = node.getTextContent();
89+
}
90+
91+
final NodeList nodeList = node.getChildNodes();
92+
for (int i = 0; i < nodeList.getLength(); i++) {
93+
final Node childNode = nodeList.item(i);
94+
95+
parse(childNode);
96+
}
97+
}
98+
99+
private void saveLastItem() {
100+
if (currentHeading != null && currentHeading.length() > 0) {
101+
102+
addResult(currentHeading, "", currentLink, currentContent);
103+
104+
}
105+
currentHeading = "";
106+
currentLink = "";
107+
currentContent = "";
108+
}
109+
110+
private void parse(final Node node) {
111+
if (node.getNodeName().equals("div")) {
112+
final Node item = node.getAttributes() == null ? null : node
113+
.getAttributes().getNamedItem("class");
114+
if (item != null && item.getNodeValue().equals(
115+
"views-field views-field-title"))
116+
{
117+
118+
if (currentHeading != null) {
119+
saveLastItem();
120+
}
121+
parseHeading(node);
122+
123+
return;
124+
}
125+
if (item != null && item.getNodeValue().equals(
126+
"views-field views-field-search-api-excerpt"))
127+
{
128+
parseContent(node);
129+
return;
130+
}
131+
}
132+
133+
final NodeList nodeList = node.getChildNodes();
134+
for (int i = 0; i < nodeList.getLength(); i++) {
135+
final Node childNode = nodeList.item(i);
136+
137+
parse(childNode);
138+
}
139+
140+
}
133141

134142
}

0 commit comments

Comments
 (0)