11/**
22 * Copyright (c) 2017 "Neo4j, Inc." <http://neo4j.com>
3- *
3+ * <p>
44 * This file is part of Neo4j Graph Algorithms <http://github.com/neo4j-contrib/neo4j-graph-algorithms>.
5- *
5+ * <p>
66 * Neo4j Graph Algorithms is free software: you can redistribute it and/or modify
77 * it under the terms of the GNU General Public License as published by
88 * the Free Software Foundation, either version 3 of the License, or
99 * (at your option) any later version.
10- *
10+ * <p>
1111 * This program is distributed in the hope that it will be useful,
1212 * but WITHOUT ANY WARRANTY; without even the implied warranty of
1313 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1414 * GNU General Public License for more details.
15- *
15+ * <p>
1616 * You should have received a copy of the GNU General Public License
1717 * along with this program. If not, see <http://www.gnu.org/licenses/>.
1818 */
@@ -38,7 +38,7 @@ public double jaccardSimilarity(@Name("vector1") List<Number> vector1, @Name("ve
3838 int intersection = intersectionSet .size ();
3939
4040 long denominator = vector1 .size () + vector2 .size () - intersection ;
41- return denominator == 0 ? 0 : (double )intersection / denominator ;
41+ return denominator == 0 ? 0 : (double ) intersection / denominator ;
4242 }
4343
4444 @ UserFunction ("algo.similarity.cosine" )
@@ -92,4 +92,18 @@ public double euclideanDistance(@Name("vector1") List<Number> vector1, @Name("ve
9292 public double euclideanSimilarity (@ Name ("vector1" ) List <Number > vector1 , @ Name ("vector2" ) List <Number > vector2 ) {
9393 return 1.0d / (1 + euclideanDistance (vector1 , vector2 ));
9494 }
95- }
95+
96+ @ UserFunction ("algo.similarity.overlap" )
97+ @ Description ("algo.similarity.overlap([vector1], [vector2]) " +
98+ "given two collection vectors, calculate overlap similarity" )
99+ public double overlapSimilarity (@ Name ("vector1" ) List <Number > vector1 , @ Name ("vector2" ) List <Number > vector2 ) {
100+ if (vector1 == null || vector2 == null ) return 0 ;
101+
102+ HashSet <Number > intersectionSet = new HashSet <>(vector1 );
103+ intersectionSet .retainAll (vector2 );
104+ int intersection = intersectionSet .size ();
105+
106+ long denominator = Math .min (vector1 .size (), vector2 .size ());
107+ return denominator == 0 ? 0 : (double ) intersection / denominator ;
108+ }
109+ }
0 commit comments