Skip to content
This repository was archived by the owner on Apr 22, 2020. It is now read-only.

Commit b27d32d

Browse files
author
Mark Needham
committed
overlap function
1 parent 9c91b16 commit b27d32d

File tree

2 files changed

+48
-7
lines changed

2 files changed

+48
-7
lines changed

algo/src/main/java/org/neo4j/graphalgo/similarity/Similarities.java

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
/**
22
* Copyright (c) 2017 "Neo4j, Inc." <http://neo4j.com>
3-
*
3+
* <p>
44
* This file is part of Neo4j Graph Algorithms <http://github.com/neo4j-contrib/neo4j-graph-algorithms>.
5-
*
5+
* <p>
66
* Neo4j Graph Algorithms is free software: you can redistribute it and/or modify
77
* it under the terms of the GNU General Public License as published by
88
* the Free Software Foundation, either version 3 of the License, or
99
* (at your option) any later version.
10-
*
10+
* <p>
1111
* This program is distributed in the hope that it will be useful,
1212
* but WITHOUT ANY WARRANTY; without even the implied warranty of
1313
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1414
* GNU General Public License for more details.
15-
*
15+
* <p>
1616
* You should have received a copy of the GNU General Public License
1717
* along with this program. If not, see <http://www.gnu.org/licenses/>.
1818
*/
@@ -38,7 +38,7 @@ public double jaccardSimilarity(@Name("vector1") List<Number> vector1, @Name("ve
3838
int intersection = intersectionSet.size();
3939

4040
long denominator = vector1.size() + vector2.size() - intersection;
41-
return denominator == 0 ? 0 : (double)intersection / denominator;
41+
return denominator == 0 ? 0 : (double) intersection / denominator;
4242
}
4343

4444
@UserFunction("algo.similarity.cosine")
@@ -92,4 +92,18 @@ public double euclideanDistance(@Name("vector1") List<Number> vector1, @Name("ve
9292
public double euclideanSimilarity(@Name("vector1") List<Number> vector1, @Name("vector2") List<Number> vector2) {
9393
return 1.0d / (1 + euclideanDistance(vector1, vector2));
9494
}
95-
}
95+
96+
@UserFunction("algo.similarity.overlap")
97+
@Description("algo.similarity.overlap([vector1], [vector2]) " +
98+
"given two collection vectors, calculate overlap similarity")
99+
public double overlapSimilarity(@Name("vector1") List<Number> vector1, @Name("vector2") List<Number> vector2) {
100+
if (vector1 == null || vector2 == null) return 0;
101+
102+
HashSet<Number> intersectionSet = new HashSet<>(vector1);
103+
intersectionSet.retainAll(vector2);
104+
int intersection = intersectionSet.size();
105+
106+
long denominator = Math.min(vector1.size(), vector2.size());
107+
return denominator == 0 ? 0 : (double) intersection / denominator;
108+
}
109+
}

tests/src/test/java/org/neo4j/graphalgo/algo/similarity/SimilaritiesTest.java

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,33 @@ public void testJaccardSimilarity() throws Exception {
188188
assertEquals(bobSim, result.next().get("jaccardSim"));
189189
assertEquals(jimSim, result.next().get("jaccardSim"));
190190
}
191+
192+
@Test
193+
public void testOverlapSimilarity() throws Exception {
194+
String controlQuery =
195+
"MATCH (p1:Employee)-[:HAS_SKILL]->(sk)<-[:HAS_SKILL]-(p2)\n" +
196+
"WITH p1,p2,size((p1)-[:HAS_SKILL]->()) as d1, size((p2)-[:HAS_SKILL]->()) as d2, count(distinct sk) as intersection\n" +
197+
"WITH p1.name as name1, p2.name as name2, toFloat(intersection) / CASE WHEN d1 > d2 THEN d2 ELSE d1 END as overlapSim\n" +
198+
"ORDER BY name1,name2\n" +
199+
"RETURN name1,name2, toString(toInteger(overlapSim*10000)/10000.0) as overlapSim";
200+
String bobSim;
201+
String jimSim;
202+
try (Transaction tx = db.beginTx()) {
203+
Result result = db.execute(controlQuery);
204+
bobSim = (String) result.next().get("overlapSim");
205+
jimSim = (String) result.next().get("overlapSim");
206+
}
207+
208+
Result result = db.execute(
209+
"MATCH (p1:Employee),(p2:Employee) WHERE p1 <> p2\n" +
210+
"WITH p1, [(p1)-[:HAS_SKILL]->(sk) | id(sk)] as v1, p2, [(p2)-[:HAS_SKILL]->(sk) | id(sk)] as v2\n" +
211+
"WITH p1.name as name1, p2.name as name2, algo.similarity.overlap(v1, v2) as overlapSim ORDER BY name1,name2\n" +
212+
"RETURN name1, name2, toString(toInteger(overlapSim*10000)/10000.0) as overlapSim");
213+
214+
assertEquals(bobSim, result.next().get("overlapSim"));
215+
assertEquals(jimSim, result.next().get("overlapSim"));
216+
}
217+
191218
@Test
192219
public void testEuclideanSimilarity() throws Exception {
193220
String controlQuery =
@@ -217,4 +244,4 @@ public void testEuclideanSimilarity() throws Exception {
217244
assertEquals(bobSim, result.next().get("euclidSim"));
218245
assertEquals(jimSim, result.next().get("euclidSim"));
219246
}
220-
}
247+
}

0 commit comments

Comments
 (0)