We developed a family of parallel and high-throughput algorithms for computing graph clusters based on the Infomap equation, a flow-based clustering objective function that encodes the information content of a random walk through the graph: short encodings involve big jumps between well-defined clusters, just like geographic maps use landmarks effectively. Infomap has won several third-party benchmarks, but remains constrained by its serial implementation.
Seung-Hee Bae developed a multi-core generalization of Infomap called RelaxMap, a new technique called prioritization that can improve nearly any graph clustering algorithm, and a highly scalable approximation algorithm called GossipMap. Relaxmap “relaxes” concurrency assumptions to avoid lock overhead, achieving high parallel efficiency in shared-memory multicore experiments while exhibiting similar convergence properties and finding similar community structures as the serial algorithm. Prioritization focuses the algorithm on the neighborhoods surrounding the vertices moved in the previous iteration, providing a nearly fre 30% speedup in any clustering algorithm based on vertex movement. Gossipmap uses a local-only approximation to the infomap objective function to avoid expensive global computations. Empricially and quite surprisingly, this aggressive approximation achieves very competitive results with the serial Infomap algorithm and allows us to cluster billion-edge directed graphs using the methods with the highest-known quality.
See the project page on github
@article{bae2016scalable, author = {Bae, Seung-Hee and Halperin, Daniel and West, Jevin and Rosvall, Martin and Howe, Bill}, title = {Scalable and Efficient Flow-Based Community Detection for Large-Scale Graph Analysis}, journal = {Transactions on Knowledge Discovery from Data (under review)}, year = {2016} }
@inproceedings{bae2015gossip, author = {Bae, Seung\-Hee and Howe, Bill}, title = {GossipMap: a distributed community detection algorithm for billion-edge directed graphs}, booktitle = {Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, {Supercomputing} 2015, Austin, TX, USA, November 15-20, 2015}, pages = {27:1--27:12}, year = {2015}, crossref = {DBLP:conf/sc/2015}, url = {http://doi.acm.org/10.1145/2807591.2807668}, doi = {10.1145/2807591.2807668}, timestamp = {Tue, 03 Nov 2015 13:08:57 +0100}, biburl = {http://dblp.uni-trier.de/rec/bib/conf/sc/BaeH15}, bibsource = {dblp computer science bibliography, http://dblp.org} }
@inproceedings{bae2013flowbased, author = {Bae, Seung-Hee and Halperin, Daniel and West, Jevin and Rosvall, Martin and Howe, Bill}, title = {Scalable Flow-Based Community Detection for Large-Scale Network Analysis}, booktitle = {Proceedings of IEEE International Conference on Data Mining Workshops (ICDMW 2013)}, year = {2013} }