Skip to content

Commit 86ebc0e

Browse files
committed
first cut
1 parent 21f99fc commit 86ebc0e

File tree

3 files changed

+191
-4
lines changed

3 files changed

+191
-4
lines changed

REQUIRE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
julia 0.6
2+
Clustering

src/CommunityDetection.jl

Lines changed: 100 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,103 @@
1+
__precompile__(true)
12
module CommunityDetection
3+
using LightGraphs
4+
import Clustering: kmeans
25

3-
# package code goes here
6+
export community_detection_nback, community_detection_bethe
47

5-
end # module
8+
"""
9+
community_detection_nback(g::AbstractGraph, k::Int)
10+
11+
Return an array, indexed by vertex, containing commmunity assignments for
12+
graph `g` detecting `k` communities.
13+
Community detection is performed using the spectral properties of the
14+
non-backtracking matrix of `g`.
15+
16+
### References
17+
- [Krzakala et al.](http://www.pnas.org/content/110/52/20935.short)
18+
"""
19+
function community_detection_nback(g::AbstractGraph, k::Int)
20+
#TODO insert check on connected_components
21+
ϕ = real(nonbacktrack_embedding(g, k))
22+
if k==2
23+
c = community_detection_threshold(g, ϕ[1,:])
24+
else
25+
c = kmeans(ϕ, k).assignments
26+
end
27+
return c
28+
end
29+
30+
function community_detection_threshold(g::AbstractGraph, coords::AbstractArray)
31+
# TODO use a more intelligent method to set the threshold
32+
# 0 based thresholds are highly sensitive to errors.
33+
c = ones(Int, nv(g))
34+
# idx = sortperm(λ, lt=(x,y)-> abs(x) > abs(y))[2:k] #the second eigenvector is the relevant one
35+
for i=1:nv(g)
36+
c[i] = coords[i] > 0 ? 1 : 2
37+
end
38+
return c
39+
end
40+
41+
42+
"""
43+
nonbacktrack_embedding(g::AbstractGraph, k::Int)
44+
45+
Perform spectral embedding of the non-backtracking matrix of `g`. Return
46+
a matrix ϕ where ϕ[:,i] are the coordinates for vertex i.
47+
48+
### Implementation Notes
49+
Does not explicitly construct the `non_backtracking_matrix`.
50+
See `Nonbacktracking` for details.
51+
52+
### References
53+
- [Krzakala et al.](http://www.pnas.org/content/110/52/20935.short).
54+
"""
55+
function nonbacktrack_embedding(g::AbstractGraph, k::Int)
56+
B = Nonbacktracking(g)
57+
λ, eigv, conv = eigs(B, nev=k+1, v0=ones(Float64, B.m))
58+
ϕ = zeros(Complex64, nv(g), k-1)
59+
# TODO decide what to do with the stationary distribution ϕ[:,1]
60+
# this code just throws it away in favor of eigv[:,2:k+1].
61+
# we might also use the degree distribution to scale these vectors as is
62+
# common with the laplacian/adjacency methods.
63+
for n=1:k-1
64+
v= eigv[:,n+1]
65+
ϕ[:,n] = contract(B, v)
66+
end
67+
return ϕ'
68+
end
69+
70+
71+
72+
"""
73+
community_detection_bethe(g::AbstractGraph, k=-1; kmax=15)
74+
75+
Perform detection for `k` communities using the spectral properties of the
76+
Bethe Hessian matrix associated to `g`.
77+
If `k` is omitted or less than `1`, the optimal number of communities
78+
will be automatically selected. In this case the maximum number of
79+
detectable communities is given by `kmax`.
80+
Return a vector containing the vertex assignments.
81+
82+
### References
83+
- [Saade et al.](http://papers.nips.cc/paper/5520-spectral-clustering-of-graphs-with-the-bethe-hessian)
84+
"""
85+
function community_detection_bethe(g::AbstractGraph, k::Int=-1; kmax::Int=15)
86+
A = adjacency_matrix(g)
87+
D = diagm(degree(g))
88+
r = (sum(degree(g)) / nv(g))^0.5
89+
90+
Hr = (r^2-1)*eye(nv(g))-r*A+D;
91+
# Hmr = (r^2-1)*eye(nv(g))+r*A+D;
92+
k >= 1 && (kmax = k)
93+
λ, eigv = eigs(Hr, which=:SR, nev=min(kmax, nv(g)))
94+
q = findlast(x -> x<0, λ)
95+
k > q && warn("Using eigenvectors with positive eigenvalues,
96+
some communities could be meaningless. Try to reduce `k`.")
97+
k < 1 && (k = q)
98+
k < 1 && return fill(1, nv(g))
99+
labels = kmeans(eigv[:,2:k]', k).assignments
100+
return labels
101+
end
102+
103+
end #module

test/runtests.jl

Lines changed: 90 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,93 @@
11
using CommunityDetection
2+
using LightGraphs
23
using Base.Test
34

4-
# write your own tests here
5-
@test 1 == 2
5+
""" Spectral embedding of the non-backtracking matrix of `g`
6+
(see [Krzakala et al.](http://www.pnas.org/content/110/52/20935.short)).
7+
8+
`g`: input Graph
9+
`k`: number of dimensions in which to embed
10+
11+
return : a matrix ϕ where ϕ[:,i] are the coordinates for vertex i.
12+
"""
13+
function nonbacktrack_embedding_dense(g::AbstractGraph, k::Int)
14+
B, edgeid = non_backtracking_matrix(g)
15+
λ,eigv,conv = eigs(B, nev=k+1, v0=ones(Float64, size(B,1)))
16+
ϕ = zeros(Complex64, k-1, nv(g))
17+
# TODO decide what to do with the stationary distribution ϕ[:,1]
18+
# this code just throws it away in favor of eigv[:,2:k+1].
19+
# we might also use the degree distribution to scale these vectors as is
20+
# common with the laplacian/adjacency methods.
21+
for n=1:k-1
22+
v= eigv[:,n+1]
23+
for i=1:nv(g)
24+
for j in neighbors(g, i)
25+
u = edgeid[Edge(j,i)]
26+
ϕ[n,i] += v[u]
27+
end
28+
end
29+
end
30+
return ϕ
31+
end
32+
33+
n = 10; k = 5
34+
pg = PathGraph(n)
35+
ϕ1 = CommunityDetection.nonbacktrack_embedding(pg, k)'
36+
37+
nbt = Nonbacktracking(pg)
38+
B, emap = non_backtracking_matrix(pg)
39+
Bs = sparse(nbt)
40+
@test sparse(B) == Bs
41+
42+
# check that matvec works
43+
x = ones(Float64, nbt.m)
44+
y = nbt * x
45+
z = B * x
46+
@test norm(y-z) < 1e-8
47+
48+
#check that matmat works and full(nbt) == B
49+
@test norm(nbt*eye(nbt.m) - B) < 1e-8
50+
51+
#check that we can use the implicit matvec in nonbacktrack_embedding
52+
@test size(y) == size(x)
53+
ϕ2 = nonbacktrack_embedding_dense(pg, k)'
54+
@test size(ϕ2) == size(ϕ1)
55+
56+
#check that this recovers communities in the path of cliques
57+
n=10
58+
g10 = CompleteGraph(n)
59+
z = copy(g10)
60+
for k=2:5
61+
z = blkdiag(z, g10)
62+
add_edge!(z, (k-1)*n, k*n)
63+
64+
c = community_detection_nback(z, k)
65+
@test sort(union(c)) == [1:k;]
66+
a = collect(n:n:k*n)
67+
@test length(c[a]) == length(unique(c[a]))
68+
for i=1:k
69+
for j=(i-1)*n+1:i*n
70+
@test c[j] == c[i*n]
71+
end
72+
end
73+
74+
c = community_detection_bethe(z, k)
75+
@test sort(union(c)) == [1:k;]
76+
a = collect(n:n:k*n)
77+
@test length(c[a]) == length(unique(c[a]))
78+
for i=1:k
79+
for j=(i-1)*n+1:i*n
80+
@test c[j] == c[i*n]
81+
end
82+
end
83+
84+
c = community_detection_bethe(z)
85+
@test sort(union(c)) == [1:k;]
86+
a = collect(n:n:k*n)
87+
@test length(c[a]) == length(unique(c[a]))
88+
for i=1:k
89+
for j=(i-1)*n+1:i*n
90+
@test c[j] == c[i*n]
91+
end
92+
end
93+
end

0 commit comments

Comments
 (0)