| 1 | // Copyright 2004 The Trustees of Indiana University. |
|---|
| 2 | |
|---|
| 3 | // Use, modification and distribution is subject to the Boost Software |
|---|
| 4 | // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at |
|---|
| 5 | // http://www.boost.org/LICENSE_1_0.txt) |
|---|
| 6 | |
|---|
| 7 | // Authors: Douglas Gregor |
|---|
| 8 | // Andrew Lumsdaine |
|---|
| 9 | |
|---|
| 10 | // This program performs betweenness centrality (BC) clustering on the |
|---|
| 11 | // actor collaboration graph available at |
|---|
| 12 | // http://www.nd.edu/~networks/database/index.html and outputs the |
|---|
| 13 | // result of clustering in Pajek format. |
|---|
| 14 | // |
|---|
| 15 | // This program mimics the BC clustering algorithm program implemented |
|---|
| 16 | // by Shashikant Penumarthy for JUNG, so that we may compare results |
|---|
| 17 | // and timings. |
|---|
| 18 | #include <boost/graph/bc_clustering.hpp> |
|---|
| 19 | #include <boost/graph/adjacency_list.hpp> |
|---|
| 20 | #include <boost/graph/graph_traits.hpp> |
|---|
| 21 | #include <fstream> |
|---|
| 22 | #include <iostream> |
|---|
| 23 | #include <string> |
|---|
| 24 | #include <boost/tokenizer.hpp> |
|---|
| 25 | #include <boost/lexical_cast.hpp> |
|---|
| 26 | #include <map> |
|---|
| 27 | |
|---|
| 28 | using namespace boost; |
|---|
| 29 | |
|---|
| 30 | struct Actor |
|---|
| 31 | { |
|---|
| 32 | Actor(int id = -1) : id(id) {} |
|---|
| 33 | |
|---|
| 34 | int id; |
|---|
| 35 | }; |
|---|
| 36 | |
|---|
| 37 | typedef adjacency_list<vecS, vecS, undirectedS, Actor, |
|---|
| 38 | property<edge_centrality_t, double> > ActorGraph; |
|---|
| 39 | typedef graph_traits<ActorGraph>::vertex_descriptor Vertex; |
|---|
| 40 | typedef graph_traits<ActorGraph>::edge_descriptor Edge; |
|---|
| 41 | |
|---|
| 42 | void load_actor_graph(std::istream& in, ActorGraph& g) |
|---|
| 43 | { |
|---|
| 44 | std::map<int, Vertex> actors; |
|---|
| 45 | |
|---|
| 46 | std::string line; |
|---|
| 47 | while (getline(in, line)) { |
|---|
| 48 | std::vector<Vertex> actors_in_movie; |
|---|
| 49 | |
|---|
| 50 | // Map from the actor numbers on this line to the actor vertices |
|---|
| 51 | typedef tokenizer<char_separator<char> > Tok; |
|---|
| 52 | Tok tok(line, char_separator<char>(" ")); |
|---|
| 53 | for (Tok::iterator id = tok.begin(); id != tok.end(); ++id) { |
|---|
| 54 | int actor_id = lexical_cast<int>(*id); |
|---|
| 55 | std::map<int, Vertex>::iterator v = actors.find(actor_id); |
|---|
| 56 | if (v == actors.end()) { |
|---|
| 57 | Vertex new_vertex = add_vertex(Actor(actor_id), g); |
|---|
| 58 | actors[actor_id] = new_vertex; |
|---|
| 59 | actors_in_movie.push_back(new_vertex); |
|---|
| 60 | } else { |
|---|
| 61 | actors_in_movie.push_back(v->second); |
|---|
| 62 | } |
|---|
| 63 | } |
|---|
| 64 | |
|---|
| 65 | for (std::vector<Vertex>::iterator i = actors_in_movie.begin(); |
|---|
| 66 | i != actors_in_movie.end(); ++i) { |
|---|
| 67 | for (std::vector<Vertex>::iterator j = i + 1; |
|---|
| 68 | j != actors_in_movie.end(); ++j) { |
|---|
| 69 | if (!edge(*i, *j, g).second) add_edge(*i, *j, g); |
|---|
| 70 | } |
|---|
| 71 | } |
|---|
| 72 | } |
|---|
| 73 | } |
|---|
| 74 | |
|---|
| 75 | template<typename Graph, typename VertexIndexMap, typename VertexNameMap> |
|---|
| 76 | std::ostream& |
|---|
| 77 | write_pajek_graph(std::ostream& out, const Graph& g, |
|---|
| 78 | VertexIndexMap vertex_index, VertexNameMap vertex_name) |
|---|
| 79 | { |
|---|
| 80 | out << "*Vertices " << num_vertices(g) << '\n'; |
|---|
| 81 | typedef typename graph_traits<Graph>::vertex_iterator vertex_iterator; |
|---|
| 82 | for (vertex_iterator v = vertices(g).first; v != vertices(g).second; ++v) { |
|---|
| 83 | out << get(vertex_index, *v)+1 << " \"" << get(vertex_name, *v) << "\"\n"; |
|---|
| 84 | } |
|---|
| 85 | |
|---|
| 86 | out << "*Edges\n"; |
|---|
| 87 | typedef typename graph_traits<Graph>::edge_iterator edge_iterator; |
|---|
| 88 | for (edge_iterator e = edges(g).first; e != edges(g).second; ++e) { |
|---|
| 89 | out << get(vertex_index, source(*e, g))+1 << ' ' |
|---|
| 90 | << get(vertex_index, target(*e, g))+1 << " 1.0\n"; // HACK! |
|---|
| 91 | } |
|---|
| 92 | return out; |
|---|
| 93 | } |
|---|
| 94 | |
|---|
| 95 | class actor_clustering_threshold : public bc_clustering_threshold<double> |
|---|
| 96 | { |
|---|
| 97 | typedef bc_clustering_threshold<double> inherited; |
|---|
| 98 | |
|---|
| 99 | public: |
|---|
| 100 | actor_clustering_threshold(double threshold, const ActorGraph& g, |
|---|
| 101 | bool normalize) |
|---|
| 102 | : inherited(threshold, g, normalize), iter(1) { } |
|---|
| 103 | |
|---|
| 104 | bool operator()(double max_centrality, Edge e, const ActorGraph& g) |
|---|
| 105 | { |
|---|
| 106 | std::cout << "Iter: " << iter << " Max Centrality: " |
|---|
| 107 | << (max_centrality / dividend) << std::endl; |
|---|
| 108 | ++iter; |
|---|
| 109 | return inherited::operator()(max_centrality, e, g); |
|---|
| 110 | } |
|---|
| 111 | |
|---|
| 112 | private: |
|---|
| 113 | unsigned int iter; |
|---|
| 114 | }; |
|---|
| 115 | |
|---|
| 116 | int main(int argc, char* argv[]) |
|---|
| 117 | { |
|---|
| 118 | std::string in_file; |
|---|
| 119 | std::string out_file; |
|---|
| 120 | double threshold = -1.0; |
|---|
| 121 | bool normalize = false; |
|---|
| 122 | |
|---|
| 123 | // Parse command-line options |
|---|
| 124 | { |
|---|
| 125 | int on_arg = 1; |
|---|
| 126 | while (on_arg < argc) { |
|---|
| 127 | std::string arg(argv[on_arg]); |
|---|
| 128 | if (arg == "-in") { |
|---|
| 129 | ++on_arg; assert(on_arg < argc); |
|---|
| 130 | in_file = argv[on_arg]; |
|---|
| 131 | } else if (arg == "-out") { |
|---|
| 132 | ++on_arg; assert(on_arg < argc); |
|---|
| 133 | out_file = argv[on_arg]; |
|---|
| 134 | } else if (arg == "-threshold") { |
|---|
| 135 | ++on_arg; assert(on_arg < argc); |
|---|
| 136 | threshold = lexical_cast<double>(argv[on_arg]); |
|---|
| 137 | } else if (arg == "-normalize") { |
|---|
| 138 | normalize = true; |
|---|
| 139 | } else { |
|---|
| 140 | std::cerr << "Unrecognized parameter \"" << arg << "\".\n"; |
|---|
| 141 | return -1; |
|---|
| 142 | } |
|---|
| 143 | ++on_arg; |
|---|
| 144 | } |
|---|
| 145 | |
|---|
| 146 | if (in_file.empty() || out_file.empty() || threshold < 0) { |
|---|
| 147 | std::cerr << "error: syntax is actor_clustering [options]\n\n" |
|---|
| 148 | << "options are:\n" |
|---|
| 149 | << "\t-in <infile>\tInput file\n" |
|---|
| 150 | << "\t-out <outfile>\tOutput file\n" |
|---|
| 151 | << "\t-threshold <value>\tA threshold value\n" |
|---|
| 152 | << "\t-normalize\tNormalize edge centrality scores\n"; |
|---|
| 153 | return -1; |
|---|
| 154 | } |
|---|
| 155 | } |
|---|
| 156 | |
|---|
| 157 | ActorGraph g; |
|---|
| 158 | |
|---|
| 159 | // Load the actor graph |
|---|
| 160 | { |
|---|
| 161 | std::cout << "Building graph." << std::endl; |
|---|
| 162 | std::ifstream in(in_file.c_str()); |
|---|
| 163 | if (!in) { |
|---|
| 164 | std::cerr << "Unable to open file \"" << in_file << "\" for input.\n"; |
|---|
| 165 | return -2; |
|---|
| 166 | } |
|---|
| 167 | load_actor_graph(in, g); |
|---|
| 168 | } |
|---|
| 169 | |
|---|
| 170 | // Run the algorithm |
|---|
| 171 | std::cout << "Clusting..." << std::endl; |
|---|
| 172 | betweenness_centrality_clustering(g, |
|---|
| 173 | actor_clustering_threshold(threshold, g, normalize), |
|---|
| 174 | get(edge_centrality, g)); |
|---|
| 175 | |
|---|
| 176 | // Output the graph |
|---|
| 177 | { |
|---|
| 178 | std::cout << "Writing graph to file: " << out_file << std::endl; |
|---|
| 179 | std::ofstream out(out_file.c_str()); |
|---|
| 180 | if (!out) { |
|---|
| 181 | std::cerr << "Unable to open file \"" << out_file << "\" for output.\n"; |
|---|
| 182 | return -3; |
|---|
| 183 | } |
|---|
| 184 | write_pajek_graph(out, g, get(vertex_index, g), get(&Actor::id, g)); |
|---|
| 185 | } |
|---|
| 186 | return 0; |
|---|
| 187 | } |
|---|