1目标:统计出一个图中每个顶点的度的情况,并且统计相同度的节点数目
2算法:利用GraphLab的GAS结构
首先利用全局容器记录相同度的节点数目,map<int,int>in_degree统计出度,map<int,int>out_degree统计入度
gather_edges阶段:返回节点的所有边,ALL_EDGES
gather阶段:收集到一条边时,对于当前顶点来说就是度统计的时候,通过重写gather_type类实现度的统计,主要是重载加的操作(gather默认会执行gather_type的加操作),现在给出具体gather_type类degree的实现:
classdegree:graphlab::IS_POD_TYPE
{
public:
inttype;
floatin_degree;
floatout_degree;
degree(inti=-1):type(i),in_degree(0),out_degree(0){}
degree&operator+=(const degree& temp)
{
if(temp.type==0)//in_degree fordirected graph
in_degree++;
elseif(temp.type==1)
out_degree++;//out_degree&& non_directed
return*this;
}
};
当gather返回degree(1)时候就会统计出度(对于无向图来说就是节点的度),同理入度则返回degree(0);
apply阶段:通过gather阶段,节点度统计已经放在degree中了,因此只需要提出数据并写入文件即可,同时将在map容器中写入度的节点数目,已经遍历过的顶点则将其标志,以跳过scatter阶段,防止再次迭代已经迭代过的顶点
scatter_edges阶段:返回所有的邻居,如果是已经迭代过的顶点则返回NO_EDGES以跳过scatter阶段
scatter阶段:激活邻居
3问题
统计度分布采用全局容器,当针对稀疏图并且度分布极其零散,那么容器可能是耗内存的一个因素,
现在实验只针对了一个小规模的图,大规模的图尚待验证.
4附上代码:
#include <vector> #include <string> #include <fstream> #include<ios> #include<map> #include<utility> #include <boost/spirit/include/qi.hpp> #include <boost/spirit/include/phoenix_core.hpp> #include <boost/spirit/include/phoenix_operator.hpp> #include <boost/spirit/include/phoenix_stl.hpp> #include <boost/unordered_set.hpp> #include <graphlab.hpp> #include <graphlab/util/stl_util.hpp> #include <graphlab/macros_def.hpp> typedef float distance_type; bool DIRECTED_SSSP=false; class vertex_data:graphlab::IS_POD_TYPE { public: bool flag;//顶点迭代标志 vertex_data():flag(false){} }; struct edge_data : graphlab::IS_POD_TYPE { distance_type dist; edge_data(distance_type dist = 1) : dist(dist) { } }; // end of edge data typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type; bool graph_loader(graph_type& graph, const std::string& fname, const std::string& line) { ASSERT_FALSE(line.empty()); namespace qi = boost::spirit::qi; namespace ascii = boost::spirit::ascii; namespace phoenix = boost::phoenix; graphlab::vertex_id_type source_id(-1), target_id(-1); float weight = 1; const bool success = qi::phrase_parse (line.begin(), line.end(), // Begin grammar ( qi::ulong_[phoenix::ref(source_id) = qi::_1] >> -qi::char_(',') >> qi::ulong_[phoenix::ref(target_id) = qi::_1] >> -(-qi::char_(',') >> qi::float_[phoenix::ref(weight) = qi::_1]) ) , // End grammar ascii::space); if(!success) return false; if(source_id == target_id) { logstream(LOG_ERROR) << "Self edge to vertex " << source_id << " is not permitted." << std::endl; } // Create an edge and add it to the graph graph.add_edge(source_id, target_id, weight); return true; // successful load }; // end of graph loader inline graph_type::vertex_type get_other_vertex(const graph_type::edge_type& edge, const graph_type::vertex_type& vertex) { return vertex.id() == edge.source().id()? edge.target() : edge.source(); } class degree:graphlab::IS_POD_TYPE { public: int type; float in_degree; float out_degree; degree(int i=-1,int a=0,int b=1):type(i),in_degree(a),out_degree(b){}//针对有向图错误 degree& operator+=(const degree& temp) { if(temp.type==0)//in_degree in_degree++; else if(temp.type==1) out_degree++;//out_degree && non_directed return *this; } }; std::map<float,float> in_map;//in_degree are same std::map<float,float> out_map;//out_degree are same && undirected std::string v_d_file; class sssp : public graphlab::ivertex_program<graph_type, class degree>, public graphlab::IS_POD_TYPE { public: bool changed; edge_dir_type gather_edges(icontext_type& context, const vertex_type& vertex) const { return graphlab::ALL_EDGES; }; // end of gather_edges degree gather(icontext_type& context,const vertex_type& vertex,edge_type& edge)const { if(DIRECTED_SSSP) { return degree(-1);//针对有向图采用系统接口 } else return degree(1); } void apply(icontext_type& context, vertex_type& vertex,const gather_type& total) { if(!DIRECTED_SSSP) { changed=false; if(vertex.data().flag==false) { vertex.data().flag=true; /** * write vertex' id and degree to file **/ //std::ofstream os("/home/lxj/data/results/undirected_degree_count_results/vertex_degree0.txt",std::ios_base::out|std::ios_base::app); // os<<vertex.id()<<"\t"<<total.out_degree<<"\n"; out_map[total.out_degree]++; //os.close(); changed=true; } } else { changed=false; if(vertex.data().flag==false) { vertex.data().flag=true; //std::ofstream os("/home/lxj/data/results/directed_degree_count_results/v_d0.txt",std::ios_base::out|std::ios_base::app); //os<<vertex.id()<<"\t"<<vertex.num_in_edges()<<"\t"<<vertex.num_out_edges()<<"\n"; in_map[vertex.num_in_edges()]++; out_map[vertex.num_out_edges()]++; //os.close(); changed=true; } } } edge_dir_type scatter_edges(icontext_type& context, const vertex_type& vertex) const { if(changed==true) return graphlab::ALL_EDGES; else return graphlab::NO_EDGES; } // end of scatter_edges void scatter(icontext_type& context, const vertex_type& vertex,edge_type& edge) const { const vertex_type other = get_other_vertex(edge, vertex); context.signal(other); } // end of scatter }; // end of shortest path vertex program int main(int argc, char** argv) { // Initialize control plain using mpi graphlab::mpi_tools::init(argc, argv); graphlab::distributed_control dc; global_logger().set_log_level(LOG_INFO); // Parse command line options ----------------------------------------------- graphlab::command_line_options clopts("Single Source Shortest Path Algorithm."); std::string graph_dir; std::string format = "tsv"; std::string exec_type = "synchronous"; size_t powerlaw = 0; std::vector<graphlab::vertex_id_type> sources; clopts.attach_option("graph", graph_dir, "The graph file. If none is provided " "then a toy graph will be created"); clopts.add_positional("graph"); clopts.attach_option("source", sources, "The source vertices"); clopts.add_positional("source"); clopts.attach_option("directed", DIRECTED_SSSP, "Treat edges as directed."); clopts.attach_option("engine", exec_type, "The engine type synchronous or asynchronous"); clopts.attach_option("powerlaw", powerlaw, "Generate a synthetic powerlaw out-degree graph. "); std::string saveprefix; clopts.attach_option("saveprefix", saveprefix, "If set, will save the resultant pagerank to a " "sequence of files with prefix saveprefix"); if(!clopts.parse(argc, argv)) { dc.cout() << "Error in parsing command line arguments." << std::endl; return EXIT_FAILURE; } // Build the graph ---------------------------------------------------------- graph_type graph(dc, clopts); if(powerlaw > 0) { // make a synthetic graph dc.cout() << "Loading synthetic Powerlaw graph." << std::endl; graph.load_synthetic_powerlaw(powerlaw, false, 2, 100000000); } else if (graph_dir.length() > 0) { // Load the graph from a file dc.cout() << "Loading graph in format: "<< format << std::endl; graph.load(graph_dir, graph_loader); } else { dc.cout() << "graph or powerlaw option must be specified" << std::endl; clopts.print_description(); return EXIT_FAILURE; } // must call finalize before querying the graph graph.finalize(); dc.cout() << "#vertices: " << graph.num_vertices() << std::endl << "#edges: " << graph.num_edges() << std::endl; // Running The Engine ------------------------------------------------------- graphlab::omni_engine<sssp> engine(dc, graph, exec_type, clopts); // Signal all the vertices in the source set //for(size_t i = 0; i < sources.size(); ++i) { // engine.signal(sources[i]); //} engine.signal_all(); engine.start(); const float runtime = engine.elapsed_seconds(); dc.cout() << "Finished Running engine in " << runtime << " seconds." << std::endl; std::string s(saveprefix); std::ofstream save(s.c_str(),std::ios_base::out|std::ios_base::app); if(DIRECTED_SSSP) { for(std::map<float,float>::iterator it=in_map.begin();it!=in_map.end();it++)//将入度统计写入文件 save<<it->first<<"\t"<<it->second<<"\n"; save.close(); std::string ss(s+"_1"); std::ofstream save0(ss.c_str(),std::ios_base::out|std::ios_base::app); for(std::map<float,float>::iterator iter=out_map.begin();iter!=out_map.end();iter++)//将出度统计写入文件,文件名比入度统计多了_1 save0<<iter->first<<"\t"<<iter->second<<"\n"; save0.close(); } else { for(std::map<float,float>::iterator iter=out_map.begin();iter!=out_map.end();iter++) save<<iter->first<<"\t"<<iter->second<<"\n"; } save.close(); // Tear-down communication layer and quit ----------------------------------- graphlab::mpi_tools::finalize(); return EXIT_SUCCESS; } // End of main // We render this entire program in the documentation