| 1 | // Copyright 2004 The Trustees of Indiana University. | 
|---|
| 2 |  | 
|---|
| 3 | // Use, modification and distribution is subject to the Boost Software | 
|---|
| 4 | // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at | 
|---|
| 5 | // http://www.boost.org/LICENSE_1_0.txt) | 
|---|
| 6 |  | 
|---|
| 7 | //  Authors: Douglas Gregor | 
|---|
| 8 | //           Andrew Lumsdaine | 
|---|
| 9 |  | 
|---|
| 10 | // This program performs betweenness centrality (BC) clustering on the | 
|---|
| 11 | // actor collaboration graph available at | 
|---|
| 12 | // http://www.nd.edu/~networks/database/index.html and outputs the | 
|---|
| 13 | // result of clustering in Pajek format. | 
|---|
| 14 | // | 
|---|
| 15 | // This program mimics the BC clustering algorithm program implemented | 
|---|
| 16 | // by Shashikant Penumarthy for JUNG, so that we may compare results | 
|---|
| 17 | // and timings. | 
|---|
| 18 | #include <boost/graph/bc_clustering.hpp> | 
|---|
| 19 | #include <boost/graph/adjacency_list.hpp> | 
|---|
| 20 | #include <boost/graph/graph_traits.hpp> | 
|---|
| 21 | #include <fstream> | 
|---|
| 22 | #include <iostream> | 
|---|
| 23 | #include <string> | 
|---|
| 24 | #include <boost/tokenizer.hpp> | 
|---|
| 25 | #include <boost/lexical_cast.hpp> | 
|---|
| 26 | #include <map> | 
|---|
| 27 |  | 
|---|
| 28 | using namespace boost; | 
|---|
| 29 |  | 
|---|
| 30 | struct Actor | 
|---|
| 31 | { | 
|---|
| 32 | Actor(int id = -1) : id(id) {} | 
|---|
| 33 |  | 
|---|
| 34 | int id; | 
|---|
| 35 | }; | 
|---|
| 36 |  | 
|---|
| 37 | typedef adjacency_list<vecS, vecS, undirectedS, Actor, | 
|---|
| 38 | property<edge_centrality_t, double> > ActorGraph; | 
|---|
| 39 | typedef graph_traits<ActorGraph>::vertex_descriptor Vertex; | 
|---|
| 40 | typedef graph_traits<ActorGraph>::edge_descriptor Edge; | 
|---|
| 41 |  | 
|---|
| 42 | void load_actor_graph(std::istream& in, ActorGraph& g) | 
|---|
| 43 | { | 
|---|
| 44 | std::map<int, Vertex> actors; | 
|---|
| 45 |  | 
|---|
| 46 | std::string line; | 
|---|
| 47 | while (getline(in, line)) { | 
|---|
| 48 | std::vector<Vertex> actors_in_movie; | 
|---|
| 49 |  | 
|---|
| 50 | // Map from the actor numbers on this line to the actor vertices | 
|---|
| 51 | typedef tokenizer<char_separator<char> > Tok; | 
|---|
| 52 | Tok tok(line, char_separator<char>(" ")); | 
|---|
| 53 | for (Tok::iterator id = tok.begin(); id != tok.end(); ++id) { | 
|---|
| 54 | int actor_id = lexical_cast<int>(*id); | 
|---|
| 55 | std::map<int, Vertex>::iterator v = actors.find(actor_id); | 
|---|
| 56 | if (v == actors.end()) { | 
|---|
| 57 | Vertex new_vertex = add_vertex(Actor(actor_id), g); | 
|---|
| 58 | actors[actor_id] = new_vertex; | 
|---|
| 59 | actors_in_movie.push_back(new_vertex); | 
|---|
| 60 | } else { | 
|---|
| 61 | actors_in_movie.push_back(v->second); | 
|---|
| 62 | } | 
|---|
| 63 | } | 
|---|
| 64 |  | 
|---|
| 65 | for (std::vector<Vertex>::iterator i = actors_in_movie.begin(); | 
|---|
| 66 | i != actors_in_movie.end(); ++i) { | 
|---|
| 67 | for (std::vector<Vertex>::iterator j = i + 1; | 
|---|
| 68 | j != actors_in_movie.end(); ++j) { | 
|---|
| 69 | if (!edge(*i, *j, g).second) add_edge(*i, *j, g); | 
|---|
| 70 | } | 
|---|
| 71 | } | 
|---|
| 72 | } | 
|---|
| 73 | } | 
|---|
| 74 |  | 
|---|
| 75 | template<typename Graph, typename VertexIndexMap, typename VertexNameMap> | 
|---|
| 76 | std::ostream& | 
|---|
| 77 | write_pajek_graph(std::ostream& out, const Graph& g, | 
|---|
| 78 | VertexIndexMap vertex_index, VertexNameMap vertex_name) | 
|---|
| 79 | { | 
|---|
| 80 | out << "*Vertices " << num_vertices(g) << '\n'; | 
|---|
| 81 | typedef typename graph_traits<Graph>::vertex_iterator vertex_iterator; | 
|---|
| 82 | for (vertex_iterator v = vertices(g).first; v != vertices(g).second; ++v) { | 
|---|
| 83 | out << get(vertex_index, *v)+1 << " \"" << get(vertex_name, *v) << "\"\n"; | 
|---|
| 84 | } | 
|---|
| 85 |  | 
|---|
| 86 | out << "*Edges\n"; | 
|---|
| 87 | typedef typename graph_traits<Graph>::edge_iterator edge_iterator; | 
|---|
| 88 | for (edge_iterator e = edges(g).first; e != edges(g).second; ++e) { | 
|---|
| 89 | out << get(vertex_index, source(*e, g))+1 << ' ' | 
|---|
| 90 | << get(vertex_index, target(*e, g))+1 << " 1.0\n"; // HACK! | 
|---|
| 91 | } | 
|---|
| 92 | return out; | 
|---|
| 93 | } | 
|---|
| 94 |  | 
|---|
| 95 | class actor_clustering_threshold : public bc_clustering_threshold<double> | 
|---|
| 96 | { | 
|---|
| 97 | typedef bc_clustering_threshold<double> inherited; | 
|---|
| 98 |  | 
|---|
| 99 | public: | 
|---|
| 100 | actor_clustering_threshold(double threshold, const ActorGraph& g, | 
|---|
| 101 | bool normalize) | 
|---|
| 102 | : inherited(threshold, g, normalize), iter(1) { } | 
|---|
| 103 |  | 
|---|
| 104 | bool operator()(double max_centrality, Edge e, const ActorGraph& g) | 
|---|
| 105 | { | 
|---|
| 106 | std::cout << "Iter: " << iter << " Max Centrality: " | 
|---|
| 107 | << (max_centrality / dividend) << std::endl; | 
|---|
| 108 | ++iter; | 
|---|
| 109 | return inherited::operator()(max_centrality, e, g); | 
|---|
| 110 | } | 
|---|
| 111 |  | 
|---|
| 112 | private: | 
|---|
| 113 | unsigned int iter; | 
|---|
| 114 | }; | 
|---|
| 115 |  | 
|---|
| 116 | int main(int argc, char* argv[]) | 
|---|
| 117 | { | 
|---|
| 118 | std::string in_file; | 
|---|
| 119 | std::string out_file; | 
|---|
| 120 | double threshold = -1.0; | 
|---|
| 121 | bool normalize = false; | 
|---|
| 122 |  | 
|---|
| 123 | // Parse command-line options | 
|---|
| 124 | { | 
|---|
| 125 | int on_arg = 1; | 
|---|
| 126 | while (on_arg < argc) { | 
|---|
| 127 | std::string arg(argv[on_arg]); | 
|---|
| 128 | if (arg == "-in") { | 
|---|
| 129 | ++on_arg; assert(on_arg < argc); | 
|---|
| 130 | in_file = argv[on_arg]; | 
|---|
| 131 | } else if (arg == "-out") { | 
|---|
| 132 | ++on_arg; assert(on_arg < argc); | 
|---|
| 133 | out_file = argv[on_arg]; | 
|---|
| 134 | } else if (arg == "-threshold") { | 
|---|
| 135 | ++on_arg; assert(on_arg < argc); | 
|---|
| 136 | threshold = lexical_cast<double>(argv[on_arg]); | 
|---|
| 137 | } else if (arg == "-normalize") { | 
|---|
| 138 | normalize = true; | 
|---|
| 139 | } else { | 
|---|
| 140 | std::cerr << "Unrecognized parameter \"" << arg << "\".\n"; | 
|---|
| 141 | return -1; | 
|---|
| 142 | } | 
|---|
| 143 | ++on_arg; | 
|---|
| 144 | } | 
|---|
| 145 |  | 
|---|
| 146 | if (in_file.empty() || out_file.empty() || threshold < 0) { | 
|---|
| 147 | std::cerr << "error: syntax is actor_clustering [options]\n\n" | 
|---|
| 148 | << "options are:\n" | 
|---|
| 149 | << "\t-in <infile>\tInput file\n" | 
|---|
| 150 | << "\t-out <outfile>\tOutput file\n" | 
|---|
| 151 | << "\t-threshold <value>\tA threshold value\n" | 
|---|
| 152 | << "\t-normalize\tNormalize edge centrality scores\n"; | 
|---|
| 153 | return -1; | 
|---|
| 154 | } | 
|---|
| 155 | } | 
|---|
| 156 |  | 
|---|
| 157 | ActorGraph g; | 
|---|
| 158 |  | 
|---|
| 159 | // Load the actor graph | 
|---|
| 160 | { | 
|---|
| 161 | std::cout << "Building graph." << std::endl; | 
|---|
| 162 | std::ifstream in(in_file.c_str()); | 
|---|
| 163 | if (!in) { | 
|---|
| 164 | std::cerr << "Unable to open file \"" << in_file << "\" for input.\n"; | 
|---|
| 165 | return -2; | 
|---|
| 166 | } | 
|---|
| 167 | load_actor_graph(in, g); | 
|---|
| 168 | } | 
|---|
| 169 |  | 
|---|
| 170 | // Run the algorithm | 
|---|
| 171 | std::cout << "Clusting..." << std::endl; | 
|---|
| 172 | betweenness_centrality_clustering(g, | 
|---|
| 173 | actor_clustering_threshold(threshold, g, normalize), | 
|---|
| 174 | get(edge_centrality, g)); | 
|---|
| 175 |  | 
|---|
| 176 | // Output the graph | 
|---|
| 177 | { | 
|---|
| 178 | std::cout << "Writing graph to file: " << out_file << std::endl; | 
|---|
| 179 | std::ofstream out(out_file.c_str()); | 
|---|
| 180 | if (!out) { | 
|---|
| 181 | std::cerr << "Unable to open file \"" << out_file << "\" for output.\n"; | 
|---|
| 182 | return -3; | 
|---|
| 183 | } | 
|---|
| 184 | write_pajek_graph(out, g, get(vertex_index, g), get(&Actor::id, g)); | 
|---|
| 185 | } | 
|---|
| 186 | return 0; | 
|---|
| 187 | } | 
|---|