java写mr程序2-flowSum

2.mapreduce程序统计上行下行流量
①FlowBean类:

package cn.mr.flowSum;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

import com.sun.corba.se.impl.oa.toa.TOA;

public class FlowBean implements WritableComparable{

	
	private String phoneNUM ;
	private long up_flow;
	private long down_flow;
	private long total_flow;
	//反射时防止程序报错,所以定义没有参数的构造方法
	public FlowBean(){}
	
	public FlowBean(String phoneNUM,long up_flow,long down_flow){
		this.phoneNUM = phoneNUM;
		this.up_flow = up_flow;
		this.down_flow = down_flow;
		this.total_flow = up_flow + down_flow;
		
	}
	
	public long getTotal_flow() {
		return total_flow;
	}

	public void setTotal_flow(long total_flow) {
		this.total_flow = total_flow;
	}

	public String getPhoneNUM() {
		return phoneNUM;
	}
	public void setPhoneNUM(String phoneNUM) {
		this.phoneNUM = phoneNUM;
	}
	public long getUp_flow() {
		return up_flow;
	}
	public void setUp_flow(long up_flow) {
		this.up_flow = up_flow;
	}
	public long getDown_flow() {
		return down_flow;
	}
	public void setDown_flow(long down_flow) {
		this.down_flow = down_flow;
	}
	//要在hadoop的各个节点之间传输,应该遵循hadoop的序列化机制 实现实现hadoop相应的序列化接口WritableComparable,重写方法
	@Override
	public void readFields(DataInput datainput) throws IOException {
		// TODO Auto-generated method stub
		phoneNUM = datainput.readUTF();
		up_flow = datainput.readLong();
		down_flow = datainput.readLong();
		total_flow = datainput.readLong();
	}
	//要在hadoop的各个节点之间传输,应该遵循hadoop的序列化机制 实现实现hadoop相应的序列化接口WritableComparable,重写方法 
	@Override
	public void write(DataOutput dataoutput) throws IOException {
		// TODO Auto-generated method stub
		dataoutput.writeUTF(phoneNUM);
		dataoutput.writeLong(up_flow);
		dataoutput.writeLong(down_flow);
		dataoutput.writeLong(total_flow);
		
	}
	
	@Override
	public String toString() {
		// TODO Auto-generated method stub
		return "" + phoneNUM + '\t' + up_flow + '\t' + down_flow + '\t' + total_flow;
	}
	//实现了 Comparable 接口的类通过实现 comparaTo 方法从而确定该类对象的排序方式。
	@Override
	public int compareTo(FlowBean o) {
		// TODO Auto-generated method stub
		
		return total_flow > o.getTotal_flow() ? -1 : 1;
	}
	
}

②mapper程序:

package cn.mr.flowSum;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

/**
 * FlowBean 是我们自定义的一种数据类型,要在hadoop的各个节点之间传输,应该遵循hadoop的序列化机制
 * 就必须实现hadoop相应的序列化接口
 *
 */
public class flowSumMapper extends Mapper{

	protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException ,InterruptedException {
		
		String line = value.toString();
		//tab进行分割
		String []fields = StringUtils.split(line, "\t");
		
		//取第一个为phonnum
		String phoneNUM = fields[0];
		long up_flow = Long.parseLong(fields[1]);
		long down_flow = Long.parseLong(fields[2]);
		//定义flowbean对象,并进行初始化
		FlowBean flowBean = new FlowBean(phoneNUM,up_flow,down_flow);
		
		context.write(new Text(phoneNUM), flowBean);
	};
}

③reduce程序:

package cn.mr.flowSum;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class flowSumReducer extends Reducer{

	protected void reduce(Text key, Iterable flowBeans, Context context) throws IOException ,InterruptedException {
		long up_flow_sum = 0;
		long down_flow_sum = 0;
		for(FlowBean flowBean:flowBeans){
			String phoneNUM = flowBean.getPhoneNUM();
			up_flow_sum += flowBean.getUp_flow();
			down_flow_sum += flowBean.getDown_flow();
		}
		//这里value写入文本的格式是 "" + phoneNUM + '\t' + up_flow + '\t' + down_flow + '\t' + total_flow
		//FlowBean方法的toString()
		context.write(key, new FlowBean(key.toString(), up_flow_sum, down_flow_sum));
	};
}

④程序入口:

package cn.mr.flowSum;

import java.io.FileOutputStream;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class flowSumRunner extends Configured implements Tool{

	@Override
	public int run(String[] as) throws Exception {
		// TODO Auto-generated method stub
		
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		
		job.setJarByClass(flowSumRunner.class);
		
		job.setMapperClass(flowSumMapper.class);
		job.setReducerClass(flowSumReducer.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(FlowBean.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(FlowBean.class);
		
		FileInputFormat.setInputPaths(job, new Path(as[0]));
		FileOutputFormat.setOutputPath(job, new Path(as[1]));
		
		return job.waitForCompletion(true)?0:1;
	}
	
	public static void main(String[] args) throws Exception {
		int res = ToolRunner.run(new Configuration(), new flowSumRunner(), args);
		System.exit(res);
	}

}

你可能感兴趣的:(hadoop)