统计图的顶点的度和相同度的顶点数目(GraphLab)

1目标:统计出一个图中每个顶点的度的情况,并且统计相同度的节点数目

2算法:利用GraphLabGAS结构

首先利用全局容器记录相同度的节点数目,map<int,int>in_degree统计出度,map<int,int>out_degree统计入度


gather_edges阶段:返回节点的所有边,ALL_EDGES


gather阶段:收集到一条边时,对于当前顶点来说就是度统计的时候,通过重写gather_type类实现度的统计,主要是重载加的操作(gather默认会执行gather_type的加操作),现在给出具体gather_typedegree的实现:


classdegree:graphlab::IS_POD_TYPE

{

public:

inttype;

floatin_degree;

floatout_degree;

degree(inti=-1):type(i),in_degree(0),out_degree(0){}

degree&operator+=(const degree& temp)

{

if(temp.type==0)//in_degree fordirected graph

in_degree++;

elseif(temp.type==1)

out_degree++;//out_degree&& non_directed

return*this;

}

};

gather返回degree(1)时候就会统计出度(对于无向图来说就是节点的度),同理入度则返回degree(0);


apply阶段:通过gather阶段,节点度统计已经放在degree中了,因此只需要提出数据并写入文件即可,同时将在map容器中写入度的节点数目,已经遍历过的顶点则将其标志,以跳过scatter阶段,防止再次迭代已经迭代过的顶点


scatter_edges阶段:返回所有的邻居,如果是已经迭代过的顶点则返回NO_EDGES以跳过scatter阶段


scatter阶段:激活邻居


3问题

统计度分布采用全局容器,当针对稀疏图并且度分布极其零散,那么容器可能是耗内存的一个因素,

现在实验只针对了一个小规模的图,大规模的图尚待验证.


4附上代码:


#include <vector>
#include <string>
#include <fstream>
#include<ios>
#include<map>
#include<utility> 		
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/unordered_set.hpp>
#include <graphlab.hpp>
#include <graphlab/util/stl_util.hpp>
#include <graphlab/macros_def.hpp>
typedef float distance_type;
bool DIRECTED_SSSP=false;
class vertex_data:graphlab::IS_POD_TYPE
{
	public:
		bool flag;//顶点迭代标志
		vertex_data():flag(false){}
};

struct edge_data : graphlab::IS_POD_TYPE {
  distance_type dist;
  edge_data(distance_type dist = 1) : dist(dist) { }
}; // end of edge data

typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;

bool graph_loader(graph_type& graph, const std::string& fname,
                  const std::string& line) {
  ASSERT_FALSE(line.empty());
  namespace qi = boost::spirit::qi;
  namespace ascii = boost::spirit::ascii;
  namespace phoenix = boost::phoenix;
  graphlab::vertex_id_type source_id(-1), target_id(-1);
  float weight = 1;
  const bool success = qi::phrase_parse
    (line.begin(), line.end(),       
     //  Begin grammar
     (
      qi::ulong_[phoenix::ref(source_id) = qi::_1] >> -qi::char_(',') >>
      qi::ulong_[phoenix::ref(target_id) = qi::_1] >> 
      -(-qi::char_(',') >> qi::float_[phoenix::ref(weight) = qi::_1])
      )
     ,
     //  End grammar
     ascii::space); 
  if(!success) return false;
  if(source_id == target_id) {
    logstream(LOG_ERROR) 
      << "Self edge to vertex " << source_id << " is not permitted." << std::endl;
  }
  // Create an edge and add it to the graph
  graph.add_edge(source_id, target_id, weight);
  return true; // successful load
}; // end of graph loader



inline graph_type::vertex_type
get_other_vertex(const graph_type::edge_type& edge,
                 const graph_type::vertex_type& vertex) {
  return vertex.id() == edge.source().id()? edge.target() : edge.source();
}

class degree:graphlab::IS_POD_TYPE
{
	public:
		int type;
		float in_degree;
		float out_degree;
		degree(int i=-1,int a=0,int b=1):type(i),in_degree(a),out_degree(b){}//针对有向图错误
		degree& operator+=(const degree& temp)
		{
			if(temp.type==0)//in_degree
				in_degree++;
			else if(temp.type==1)
				out_degree++;//out_degree && non_directed
			return *this;
		}

};
std::map<float,float> in_map;//in_degree are same
std::map<float,float> out_map;//out_degree are same && undirected
std::string v_d_file;
class sssp :
  	public graphlab::ivertex_program<graph_type, class degree>,
  	public graphlab::IS_POD_TYPE
{
  public:

	  bool changed;
  edge_dir_type gather_edges(icontext_type& context, 
                             const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES;
  }; // end of gather_edges 

degree gather(icontext_type& context,const vertex_type& vertex,edge_type& edge)const
{
	if(DIRECTED_SSSP)
	{
		return degree(-1);//针对有向图采用系统接口
	}
	else
		return degree(1);
}

void apply(icontext_type& context, vertex_type& vertex,const gather_type& total)
{
	if(!DIRECTED_SSSP)
	{
		changed=false;
		if(vertex.data().flag==false)
		{	
			vertex.data().flag=true;
			/** 
			 *	write vertex' id and degree to file
			**/	
			//std::ofstream os("/home/lxj/data/results/undirected_degree_count_results/vertex_degree0.txt",std::ios_base::out|std::ios_base::app);
			//	os<<vertex.id()<<"\t"<<total.out_degree<<"\n";
			out_map[total.out_degree]++;
		 	//os.close();
			changed=true;
		}
	}
	else
	{
		changed=false;
		if(vertex.data().flag==false)
		{
			vertex.data().flag=true;
			//std::ofstream os("/home/lxj/data/results/directed_degree_count_results/v_d0.txt",std::ios_base::out|std::ios_base::app);
			//os<<vertex.id()<<"\t"<<vertex.num_in_edges()<<"\t"<<vertex.num_out_edges()<<"\n";
			in_map[vertex.num_in_edges()]++;
			out_map[vertex.num_out_edges()]++;
			//os.close();
			changed=true;
		}
	}
	
}

	edge_dir_type scatter_edges(icontext_type& context, const vertex_type& vertex) const
	{
		if(changed==true)
		     return graphlab::ALL_EDGES;
		else
			return graphlab::NO_EDGES;
	} // end of scatter_edges

	void scatter(icontext_type& context, const vertex_type& vertex,edge_type& edge) const 
	{
	    const vertex_type other = get_other_vertex(edge, vertex);
    	context.signal(other);
	} // end of scatter

}; // end of shortest path vertex program



int main(int argc, char** argv) {
  // Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  global_logger().set_log_level(LOG_INFO);

  // Parse command line options -----------------------------------------------
  graphlab::command_line_options 
    clopts("Single Source Shortest Path Algorithm.");
  std::string graph_dir;
  std::string format = "tsv";
  std::string exec_type = "synchronous";
  size_t powerlaw = 0;
  std::vector<graphlab::vertex_id_type> sources;
  clopts.attach_option("graph", graph_dir,
                       "The graph file.  If none is provided "
                       "then a toy graph will be created");
  clopts.add_positional("graph");

  clopts.attach_option("source", sources,
                       "The source vertices");

  clopts.add_positional("source");

  clopts.attach_option("directed", DIRECTED_SSSP,
                       "Treat edges as directed.");

  clopts.attach_option("engine", exec_type, 
                       "The engine type synchronous or asynchronous");
 
  
  clopts.attach_option("powerlaw", powerlaw,
                       "Generate a synthetic powerlaw out-degree graph. ");
  std::string saveprefix;
  clopts.attach_option("saveprefix", saveprefix,
                       "If set, will save the resultant pagerank to a "
                       "sequence of files with prefix saveprefix");
  if(!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }


  // Build the graph ----------------------------------------------------------
  graph_type graph(dc, clopts);
  if(powerlaw > 0) { // make a synthetic graph
    dc.cout() << "Loading synthetic Powerlaw graph." << std::endl;
    graph.load_synthetic_powerlaw(powerlaw, false, 2, 100000000);
  } else if (graph_dir.length() > 0) { // Load the graph from a file
    dc.cout() << "Loading graph in format: "<< format << std::endl;
    graph.load(graph_dir, graph_loader);
  } else {
    dc.cout() << "graph or powerlaw option must be specified" << std::endl;
    clopts.print_description();
    return EXIT_FAILURE;
  }
  // must call finalize before querying the graph
  graph.finalize();
  dc.cout() << "#vertices:  " << graph.num_vertices() << std::endl
            << "#edges:     " << graph.num_edges() << std::endl;




  // Running The Engine -------------------------------------------------------
  graphlab::omni_engine<sssp> engine(dc, graph, exec_type, clopts);


  
  // Signal all the vertices in the source set
  //for(size_t i = 0; i < sources.size(); ++i) {
  //  engine.signal(sources[i]);
  //}
  engine.signal_all();

  engine.start();
  const float runtime = engine.elapsed_seconds();
  dc.cout() << "Finished Running engine in " << runtime
            << " seconds." << std::endl;

  std::string s(saveprefix);
  std::ofstream save(s.c_str(),std::ios_base::out|std::ios_base::app);
  if(DIRECTED_SSSP)
  {
 	 	for(std::map<float,float>::iterator it=in_map.begin();it!=in_map.end();it++)//将入度统计写入文件
			save<<it->first<<"\t"<<it->second<<"\n";
		save.close();
		std::string ss(s+"_1");
		std::ofstream save0(ss.c_str(),std::ios_base::out|std::ios_base::app);
		for(std::map<float,float>::iterator iter=out_map.begin();iter!=out_map.end();iter++)//将出度统计写入文件,文件名比入度统计多了_1
			save0<<iter->first<<"\t"<<iter->second<<"\n";
		save0.close();
  }
  else 
  {
 		for(std::map<float,float>::iterator iter=out_map.begin();iter!=out_map.end();iter++)
		save<<iter->first<<"\t"<<iter->second<<"\n";
  }
  save.close();
  // Tear-down communication layer and quit -----------------------------------
  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main


// We render this entire program in the documentation



你可能感兴趣的:(统计图的顶点的度和相同度的顶点数目(GraphLab))