-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmember_community.R
413 lines (390 loc) · 16.9 KB
/
member_community.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
# Non-hierarchical community partitioning ####
#' Non-hierarchical community partitioning algorithms
#'
#' @description
#' These functions offer algorithms for partitioning
#' networks into sets of communities:
#'
#' - `node_in_optimal()` is a problem-solving algorithm that seeks to maximise
#' modularity over all possible partitions.
#' - `node_in_partition()` is a greedy, iterative, deterministic
#' partitioning algorithm that results in two equally-sized communities.
#' - `node_in_infomap()` is an algorithm based on the information in random walks.
#' - `node_in_spinglass()` is a greedy, iterative, probabilistic algorithm,
#' based on analogy to model from statistical physics.
#' - `node_in_fluid()` is a propogation-based partitioning algorithm,
#' based on analogy to model from fluid dynamics.
#' - `node_in_louvain()` is an agglomerative multilevel algorithm that seeks to maximise
#' modularity over all possible partitions.
#' - `node_in_leiden()` is an agglomerative multilevel algorithm that seeks to maximise
#' the Constant Potts Model over all possible partitions.
#'
#' The different algorithms offer various advantages in terms of computation time,
#' availability on different types of networks, ability to maximise modularity,
#' and their logic or domain of inspiration.
#'
#' @inheritParams mark_is
#' @name member_community_non
#' @family memberships
NULL
#' @rdname member_community_non
#' @section Optimal:
#' The general idea is to calculate the modularity of all possible partitions,
#' and choose the community structure that maximises this modularity measure.
#' Note that this is an NP-complete problem with exponential time complexity.
#' The guidance in the igraph package is networks of <50-200 nodes is probably fine.
#' @references
#' ## On optimal community detection
#' Brandes, Ulrik, Daniel Delling, Marco Gaertler, Robert Gorke, Martin Hoefer, Zoran Nikoloski, Dorothea Wagner. 2008.
#' "On Modularity Clustering",
#' _IEEE Transactions on Knowledge and Data Engineering_ 20(2):172-188.
#' @examples
#' node_in_optimal(ison_adolescents)
#' @export
node_in_optimal <- function(.data){
if(missing(.data)) {expect_nodes(); .data <- .G()}
out <- igraph::cluster_optimal(manynet::as_igraph(.data)
)$membership
make_node_member(out, .data)
}
#' @rdname member_community_non
#' @references
#' ## On partitioning community detection
#' Kernighan, Brian W., and Shen Lin. 1970.
#' "An efficient heuristic procedure for partitioning graphs."
#' _The Bell System Technical Journal_ 49(2): 291-307.
#' \doi{10.1002/j.1538-7305.1970.tb01770.x}
#' @examples
#' node_in_partition(ison_adolescents)
#' node_in_partition(ison_southern_women)
#' @export
node_in_partition <- function(.data){
if(missing(.data)) {expect_nodes(); .data <- .G()}
# assign groups arbitrarily
n <- net_nodes(.data)
group_size <- ifelse(n %% 2 == 0, n/2, (n+1)/2)
# count internal and external costs of each vertex
g <- as_matrix(to_multilevel(.data))
g1 <- g[1:group_size, 1:group_size]
g2 <- g[(group_size+1):n, (group_size+1):n]
intergroup <- g[1:group_size, (group_size+1):n]
g2.intcosts <- rowSums(g2)
g2.extcosts <- colSums(intergroup)
g1.intcosts <- rowSums(g1)
g1.extcosts <- rowSums(intergroup)
# count edge costs of each vertex
g1.net <- g1.extcosts - g1.intcosts
g2.net <- g2.extcosts - g2.intcosts
g1.net <- sort(g1.net, decreasing = TRUE)
g2.net <- sort(g2.net, decreasing = TRUE)
# swap pairs of vertices (one from each group) that give a positive sum of net edge costs
if(length(g1.net)!=length(g2.net)) {
g2.net <- c(g2.net,0)
} else {g2.net}
sums <- as.integer(unname(g1.net + g2.net))
# positions in sequence of names at which sum >= 0
index <- which(sums >= 0 %in% sums)
g1.newnames <- g1.names <- names(g1.net)
g2.newnames <- g2.names <- names(g2.net)
# make swaps based on positions in sequence
for (i in index) {
g1.newnames[i] <- g2.names[i]
g2.newnames[i] <- g1.names[i]
}
# extract names of vertices in each group after swaps
out <- ifelse(manynet::node_names(.data) %in% g1.newnames, 1, 2)
make_node_member(out, .data)
}
#' @rdname member_community_non
#' @section Infomap:
#' Motivated by information theoretic principles, this algorithm tries to build
#' a grouping that provides the shortest description length for a random walk,
#' where the description length is measured by the expected number of bits per node required to encode the path.
#' @param times Integer indicating number of simulations/walks used.
#' By default, `times=50`.
#' @references
#' ## On infomap community detection
#' Rosvall, M, and C. T. Bergstrom. 2008.
#' "Maps of information flow reveal community structure in complex networks",
#' _PNAS_ 105:1118.
#' \doi{10.1073/pnas.0706851105}
#'
#' Rosvall, M., D. Axelsson, and C. T. Bergstrom. 2009.
#' "The map equation",
#' _Eur. Phys. J. Special Topics_ 178: 13.
#' \doi{10.1140/epjst/e2010-01179-1}
#' @examples
#' node_in_infomap(ison_adolescents)
#' @export
node_in_infomap <- function(.data, times = 50){
if(missing(.data)) {expect_nodes(); .data <- .G()}
out <- igraph::cluster_infomap(manynet::as_igraph(.data),
nb.trials = times
)$membership
make_node_member(out, .data)
}
#' @rdname member_community_non
#' @param max_k Integer constant, the number of spins to use as an upper limit
#' of communities to be found. Some sets can be empty at the end.
#' @param resolution The Reichardt-Bornholdt “gamma” resolution parameter for modularity.
#' By default 1, making existing and non-existing ties equally important.
#' Smaller values make existing ties more important,
#' and larger values make missing ties more important.
#' @section Spin-glass:
#' This is motivated by analogy to the Potts model in statistical physics.
#' Each node can be in one of _k_ "spin states",
#' and ties (particle interactions) provide information about which pairs of nodes
#' want similar or different spin states.
#' The final community definitions are represented by the nodes' spin states
#' after a number of updates.
#' A different implementation than the default is used in the case of signed networks,
#' such that nodes connected by negative ties will be more likely found in separate communities.
#' @references
#' ## On spinglass community detection
#' Reichardt, Jorg, and Stefan Bornholdt. 2006.
#' "Statistical Mechanics of Community Detection"
#' _Physical Review E_, 74(1): 016110–14.
#' \doi{10.1073/pnas.0605965104}
#'
#' Traag, Vincent A., and Jeroen Bruggeman. 2009.
#' "Community detection in networks with positive and negative links".
#' _Physical Review E_, 80(3): 036115.
#' \doi{10.1103/PhysRevE.80.036115}
#' @examples
#' node_in_spinglass(ison_adolescents)
#' @export
node_in_spinglass <- function(.data, max_k = 200, resolution = 1){
if(missing(.data)) {expect_nodes(); .data <- .G()}
out <- igraph::cluster_spinglass(manynet::as_igraph(.data),
spins = max_k, gamma = resolution,
implementation = ifelse(manynet::is_signed(.data), "neg", "orig")
)$membership
make_node_member(out, .data)
}
#' @rdname member_community_non
#' @section Fluid:
#' The general idea is to observe how a discrete number of fluids interact, expand and contract,
#' in a non-homogenous environment, i.e. the network structure.
#' Unlike the `{igraph}` implementation that this function wraps,
#' this function iterates over all possible numbers of communities and returns the membership
#' associated with the highest modularity.
#' @references
#' ## On fluid community detection
#' Parés Ferran, Dario Garcia Gasulla, Armand Vilalta, Jonatan Moreno, Eduard Ayguade, Jesus Labarta, Ulises Cortes, and Toyotaro Suzumura. 2018.
#' "Fluid Communities: A Competitive, Scalable and Diverse Community Detection Algorithm".
#' In: _Complex Networks & Their Applications VI_
#' Springer, 689: 229.
#' \doi{10.1007/978-3-319-72150-7_19}
#' @examples
#' node_in_fluid(ison_adolescents)
#' @export
node_in_fluid <- function(.data) {
if(missing(.data)) {expect_nodes(); .data <- .G()}
.data <- as_igraph(.data)
mods <- vapply(seq.int(net_nodes(.data)), function(x)
igraph::modularity(.data, membership = igraph::membership(
igraph::cluster_fluid_communities(.data, x))),
FUN.VALUE = numeric(1))
out <- igraph::membership(igraph::cluster_fluid_communities(
.data, no.of.communities = which.max(mods)))
make_node_member(out, .data)
}
#' @rdname member_community_non
#' @section Louvain:
#' The general idea is to take a hierarchical approach to optimising the modularity criterion.
#' Nodes begin in their own communities and are re-assigned in a local, greedy way:
#' each node is moved to the community where it achieves the highest contribution to modularity.
#' When no further modularity-increasing reassignments are possible,
#' the resulting communities are considered nodes (like a reduced graph),
#' and the process continues.
#' @references
#' ## On Louvain community detection
#' Blondel, Vincent, Jean-Loup Guillaume, Renaud Lambiotte, Etienne Lefebvre. 2008.
#' "Fast unfolding of communities in large networks",
#' _J. Stat. Mech._ P10008.
#' @examples
#' node_in_louvain(ison_adolescents)
#' @export
node_in_louvain <- function(.data, resolution = 1){
if(missing(.data)) {expect_nodes(); .data <- .G()}
out <- igraph::cluster_louvain(manynet::as_igraph(.data),
resolution = resolution
)$membership
make_node_member(out, .data)
}
#' @rdname member_community_non
#' @section Leiden:
#' The general idea is to optimise the Constant Potts Model,
#' which does not suffer from the resolution limit, instead of modularity.
#' As outlined in the `{igraph}` package,
#' the Constant Potts Model object function is:
#'
#' \deqn{\frac{1}{2m} \sum_{ij}(A_{ij}-\gamma n_i n_j)\delta(\sigma_i, \sigma_j)}
#'
#' where _m_ is the total tie weight,
#' \eqn{A_{ij}} is the tie weight between _i_ and _j_,
#' \eqn{\gamma} is the so-called resolution parameter,
#' \eqn{n_i} is the node weight of node _i_,
#' and \eqn{\delta(\sigma_i, \sigma_j) = 1} if and only if
#' _i_ and _j_ are in the same communities and 0 otherwise.
#' @references
#' ## On Leiden community detection
#' Traag, Vincent A., Ludo Waltman, and Nees Jan van Eck. 2019.
#' "From Louvain to Leiden: guaranteeing well-connected communities",
#' _Scientific Reports_, 9(1):5233.
#' \doi{10.1038/s41598-019-41695-z}
#' @examples
#' node_in_leiden(ison_adolescents)
#' @export
node_in_leiden <- function(.data, resolution = 1){
if(missing(.data)) {expect_nodes(); .data <- .G()}
if(is_weighted(.data)){ # Traag resolution default
n <- net_nodes(.data)
resolution <- sum(tie_weights(.data))/(n*(n - 1)/2)
}
out <- igraph::cluster_leiden(as_igraph(.data),
resolution_parameter = resolution
)$membership
make_node_member(out, .data)
}
# Hierarchical community partitioning ####
#' Hierarchical community partitioning algorithms
#'
#' @description
#' These functions offer algorithms for hierarchically clustering
#' networks into communities. Since all of the following are hierarchical,
#' their dendrograms can be plotted:
#'
#' - `node_in_betweenness()` is a hierarchical, decomposition algorithm
#' where edges are removed in decreasing order of the number of
#' shortest paths passing through the edge.
#' - `node_in_greedy()` is a hierarchical, agglomerative algorithm,
#' that tries to optimize modularity in a greedy manner.
#' - `node_in_eigen()` is a top-down, hierarchical algorithm.
#' - `node_in_walktrap()` is a hierarchical, agglomerative algorithm based on random walks.
#'
#' The different algorithms offer various advantages in terms of computation time,
#' availability on different types of networks, ability to maximise modularity,
#' and their logic or domain of inspiration.
#'
#' @inheritParams member_community_non
#' @name member_community_hier
#' @family memberships
NULL
#' @rdname member_community_hier
#' @section Edge-betweenness:
#' This is motivated by the idea that edges connecting different groups
#' are more likely to lie on multiple shortest paths when they are the
#' only option to go from one group to another.
#' This method yields good results but is very slow because of
#' the computational complexity of edge-betweenness calculations and
#' the betweenness scores have to be re-calculated after every edge removal.
#' Networks of ~700 nodes and ~3500 ties are around the upper size limit
#' that are feasible with this approach.
#' @references
#' ## On edge-betweenness community detection
#' Newman, Mark, and Michelle Girvan. 2004.
#' "Finding and evaluating community structure in networks."
#' _Physical Review E_ 69: 026113.
#' \doi{10.1103/PhysRevE.69.026113}
#' @examples
#' node_in_betweenness(ison_adolescents)
#' if(require("ggdendro", quietly = TRUE)){
#' plot(node_in_betweenness(ison_adolescents))
#' }
#' @export
node_in_betweenness <- function(.data){
if(missing(.data)) {expect_nodes(); .data <- .G()}
clust <- suppressWarnings(igraph::cluster_edge_betweenness(
manynet::as_igraph(.data)))
out <- clust$membership
out <- make_node_member(out, .data)
attr(out, "hc") <- stats::as.hclust(clust,
use.modularity = igraph::is_connected(.data))
attr(out, "k") <- max(clust$membership)
out
}
#' @rdname member_community_hier
#' @section Fast-greedy:
#' Initially, each node is assigned a separate community.
#' Communities are then merged iteratively such that each merge
#' yields the largest increase in the current value of modularity,
#' until no further increases to the modularity are possible.
#' The method is fast and recommended as a first approximation
#' because it has no parameters to tune.
#' However, it is known to suffer from a resolution limit.
#' @references
#' ## On fast-greedy community detection
#' Clauset, Aaron, Mark E.J. Newman, and Cristopher Moore. 2004.
#' "Finding community structure in very large networks."
#' _Physical Review E_, 70: 066111.
#' \doi{10.1103/PhysRevE.70.066111}
#' @examples
#' node_in_greedy(ison_adolescents)
#' @export
node_in_greedy <- function(.data){
if(missing(.data)) {expect_nodes(); .data <- .G()}
clust <- igraph::cluster_fast_greedy(to_undirected(as_igraph(.data)))
out <- clust$membership
make_node_member(out, .data)
out <- make_node_member(out, .data)
attr(out, "hc") <- stats::as.hclust(clust,
use.modularity = igraph::is_connected(.data))
attr(out, "k") <- max(clust$membership)
out
}
#' @rdname member_community_hier
#' @section Leading eigenvector:
#' In each step, the network is bifurcated such that modularity increases most.
#' The splits are determined according to the leading eigenvector of the modularity matrix.
#' A stopping condition prevents tightly connected groups from being split further.
#' Note that due to the eigenvector calculations involved,
#' this algorithm will perform poorly on degenerate networks,
#' but will likely obtain a higher modularity than fast-greedy (at some cost of speed).
#' @references
#' ## On leading eigenvector community detection
#' Newman, Mark E.J. 2006.
#' "Finding community structure using the eigenvectors of matrices"
#' _Physical Review E_ 74:036104.
#' \doi{10.1103/PhysRevE.74.036104}
#' @examples
#' node_in_eigen(ison_adolescents)
#' @export
node_in_eigen <- function(.data){
if(missing(.data)) {expect_nodes(); .data <- .G()}
clust <- igraph::cluster_leading_eigen(as_igraph(.data))
out <- clust$membership
make_node_member(out, .data)
out <- make_node_member(out, .data)
attr(out, "hc") <- stats::as.hclust(clust)
attr(out, "k") <- max(clust$membership)
out
}
#' @rdname member_community_hier
#' @section Walktrap:
#' The general idea is that random walks on a network are more likely to stay
#' within the same community because few edges lead outside a community.
#' By repeating random walks of 4 steps many times,
#' information about the hierarchical merging of communities is collected.
#' @param times Integer indicating number of simulations/walks used.
#' By default, `times=50`.
#' @references
#' ## On walktrap community detection
#' Pons, Pascal, and Matthieu Latapy. 2005.
#' "Computing communities in large networks using random walks".
#' 1-20.
#' \doi{10.48550/arXiv.physics/0512106}
#' @examples
#' node_in_walktrap(ison_adolescents)
#' @export
node_in_walktrap <- function(.data, times = 50){
if(missing(.data)) {expect_nodes(); .data <- .G()}
clust <- igraph::cluster_walktrap(manynet::as_igraph(.data))
out <- clust$membership
make_node_member(out, .data)
out <- make_node_member(out, .data)
attr(out, "hc") <- stats::as.hclust(clust,
use.modularity = igraph::is_connected(.data))
attr(out, "k") <- max(clust$membership)
out
}