|
| 1 | +__precompile__(true) |
1 | 2 | module CommunityDetection |
| 3 | +using LightGraphs |
| 4 | +import Clustering: kmeans |
2 | 5 |
|
3 | | -# package code goes here |
| 6 | +export community_detection_nback, community_detection_bethe |
4 | 7 |
|
5 | | -end # module |
| 8 | +""" |
| 9 | + community_detection_nback(g::AbstractGraph, k::Int) |
| 10 | +
|
| 11 | +Return an array, indexed by vertex, containing commmunity assignments for |
| 12 | +graph `g` detecting `k` communities. |
| 13 | +Community detection is performed using the spectral properties of the |
| 14 | +non-backtracking matrix of `g`. |
| 15 | +
|
| 16 | +### References |
| 17 | +- [Krzakala et al.](http://www.pnas.org/content/110/52/20935.short) |
| 18 | +""" |
| 19 | +function community_detection_nback(g::AbstractGraph, k::Int) |
| 20 | + #TODO insert check on connected_components |
| 21 | + ϕ = real(nonbacktrack_embedding(g, k)) |
| 22 | + if k==2 |
| 23 | + c = community_detection_threshold(g, ϕ[1,:]) |
| 24 | + else |
| 25 | + c = kmeans(ϕ, k).assignments |
| 26 | + end |
| 27 | + return c |
| 28 | +end |
| 29 | + |
| 30 | +function community_detection_threshold(g::AbstractGraph, coords::AbstractArray) |
| 31 | + # TODO use a more intelligent method to set the threshold |
| 32 | + # 0 based thresholds are highly sensitive to errors. |
| 33 | + c = ones(Int, nv(g)) |
| 34 | + # idx = sortperm(λ, lt=(x,y)-> abs(x) > abs(y))[2:k] #the second eigenvector is the relevant one |
| 35 | + for i=1:nv(g) |
| 36 | + c[i] = coords[i] > 0 ? 1 : 2 |
| 37 | + end |
| 38 | + return c |
| 39 | +end |
| 40 | + |
| 41 | + |
| 42 | +""" |
| 43 | + nonbacktrack_embedding(g::AbstractGraph, k::Int) |
| 44 | +
|
| 45 | +Perform spectral embedding of the non-backtracking matrix of `g`. Return |
| 46 | +a matrix ϕ where ϕ[:,i] are the coordinates for vertex i. |
| 47 | +
|
| 48 | +### Implementation Notes |
| 49 | +Does not explicitly construct the `non_backtracking_matrix`. |
| 50 | +See `Nonbacktracking` for details. |
| 51 | +
|
| 52 | +### References |
| 53 | +- [Krzakala et al.](http://www.pnas.org/content/110/52/20935.short). |
| 54 | +""" |
| 55 | +function nonbacktrack_embedding(g::AbstractGraph, k::Int) |
| 56 | + B = Nonbacktracking(g) |
| 57 | + λ, eigv, conv = eigs(B, nev=k+1, v0=ones(Float64, B.m)) |
| 58 | + ϕ = zeros(Complex64, nv(g), k-1) |
| 59 | + # TODO decide what to do with the stationary distribution ϕ[:,1] |
| 60 | + # this code just throws it away in favor of eigv[:,2:k+1]. |
| 61 | + # we might also use the degree distribution to scale these vectors as is |
| 62 | + # common with the laplacian/adjacency methods. |
| 63 | + for n=1:k-1 |
| 64 | + v= eigv[:,n+1] |
| 65 | + ϕ[:,n] = contract(B, v) |
| 66 | + end |
| 67 | + return ϕ' |
| 68 | +end |
| 69 | + |
| 70 | + |
| 71 | + |
| 72 | +""" |
| 73 | + community_detection_bethe(g::AbstractGraph, k=-1; kmax=15) |
| 74 | +
|
| 75 | +Perform detection for `k` communities using the spectral properties of the |
| 76 | +Bethe Hessian matrix associated to `g`. |
| 77 | +If `k` is omitted or less than `1`, the optimal number of communities |
| 78 | +will be automatically selected. In this case the maximum number of |
| 79 | +detectable communities is given by `kmax`. |
| 80 | +Return a vector containing the vertex assignments. |
| 81 | +
|
| 82 | +### References |
| 83 | +- [Saade et al.](http://papers.nips.cc/paper/5520-spectral-clustering-of-graphs-with-the-bethe-hessian) |
| 84 | +""" |
| 85 | +function community_detection_bethe(g::AbstractGraph, k::Int=-1; kmax::Int=15) |
| 86 | + A = adjacency_matrix(g) |
| 87 | + D = diagm(degree(g)) |
| 88 | + r = (sum(degree(g)) / nv(g))^0.5 |
| 89 | + |
| 90 | + Hr = (r^2-1)*eye(nv(g))-r*A+D; |
| 91 | + # Hmr = (r^2-1)*eye(nv(g))+r*A+D; |
| 92 | + k >= 1 && (kmax = k) |
| 93 | + λ, eigv = eigs(Hr, which=:SR, nev=min(kmax, nv(g))) |
| 94 | + q = findlast(x -> x<0, λ) |
| 95 | + k > q && warn("Using eigenvectors with positive eigenvalues, |
| 96 | + some communities could be meaningless. Try to reduce `k`.") |
| 97 | + k < 1 && (k = q) |
| 98 | + k < 1 && return fill(1, nv(g)) |
| 99 | + labels = kmeans(eigv[:,2:k]', k).assignments |
| 100 | + return labels |
| 101 | +end |
| 102 | + |
| 103 | +end #module |
0 commit comments