MapReduce实现流量的统计

需求:
MapReduce实现流量的统计_第1张图片
自定义JavaBean:

package com.flowdemo;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/**
 * 自定义FlowBean类并实现Writable接口
 */
public class FlowBean implements Writable {
    public long upFlow;
    public long downFlow;
    public long sumFlow;

    // 反序列化的时候需要调用空参数的构造方法
    public FlowBean() {
        super();
    }

    public FlowBean(long upFlow, long downFlow) {
        super();
        this.upFlow = upFlow;
        this.downFlow = downFlow;
        this.sumFlow = upFlow + downFlow;
    }

    /**
     * 重写序列化方法
     *
     * @param out
     * @throws IOException
     */
    @Override
    public void write(DataOutput out) throws IOException {
        // DataOutput 接口实现的是将java 数据转化成为字节数据,再将字节数据写进二进制字节流里面
        out.writeLong(this.upFlow);
        out.writeLong(this.downFlow);
        out.writeLong(this.sumFlow);
    }

    /**
     * 重写反序列化的方法,反序列化的读取顺序必须和序列化的写顺序一致
     *
     * @param in
     * @throws IOException
     */
    @Override
    public void readFields(DataInput in) throws IOException {
        // 直接从数据流里面进行读取,没有标识,所以顺序要和写入的顺序一致
        this.upFlow = in.readLong();
        this.downFlow = in.readLong();
        this.sumFlow = in.readLong();
    }

    /**
     * 重写toString 方法
     * 设置我们输出数据的格式
     * @return
     */
    @Override
    public String toString() {
        return "FlowBean{" +
                "upFlow=" + upFlow +
                ", downFlow=" + downFlow +
                ", sumFlow=" + sumFlow +
                '}';
    }

    public long getUpFlow() {
        return upFlow;
    }

    public void setUpFlow(long upFlow) {
        this.upFlow = upFlow;
    }

    public long getDownFlow() {
        return downFlow;
    }

    public void setDownFlow(long downFlow) {
        this.downFlow = downFlow;
    }

    public long getSumFlow() {
        return sumFlow;
    }

    public void setSumFlow(long sumFlow) {
        this.sumFlow = sumFlow;
    }
}

编写Mapper程序:

package com.flowdemo;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * 自定义FlowMapper类继承自 Mapper类
 */
public class FlowCountMapper extends Mapper<LongWritable, Text, Text, FlowBean> {
    //    FlowBean v = new FlowBean();
    Text k = new Text();

    /**
     * 重写map方法
     *
     * @param key
     * @param value
     * @param context
     * @throws IOException
     * @throws InterruptedException
     */
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //1.获取一行
        String line = value.toString();
        //2.切割字段
        String[] fields = line.split("    ");
        //3.封装对象
        //3.1取出手机号码
        String phoneNumber = fields[1];

        //3.2取出上行流量和下行流量
        Long upFlow = Long.parseLong(fields[fields.length - 3]);
        Long downFlow = Long.parseLong(fields[fields.length - 2]);

        k.set(phoneNumber);
        FlowBean v = new FlowBean(upFlow, downFlow);

        context.write(k, v);
    }
}

编写Reduce程序:

package com.flowdemo;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class FlowCountReduce extends Reducer<Text, FlowBean, Text, FlowBean> {
    /**
     * 重写reduce方法
     *
     * @param key   手机号
     * @param values
     * @param context
     * @throws IOException
     * @throws InterruptedException
     */
    @Override
    protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException {
        long sum_upFlowBean = 0;
        long sum_downFlowBean = 0;

        for (FlowBean value : values) {
            sum_upFlowBean += value.upFlow;
            sum_downFlowBean += value.downFlow;
        }

        FlowBean resultflowBean = new FlowBean(sum_upFlowBean, sum_downFlowBean);
        context.write(key, resultflowBean);
    }
}

编写Driver程序:

package com.flowdemo;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class FlowCountDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        // 设置输入输出路径
        args = new String[]{"e:/input/inputFlow", "e:/output"};

        //1.获取配置信息
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        //6.指定本程序的jar包所存在的路径
        job.setJarByClass(FlowCountDriver.class);

        //2.指定job所要使用的mapper/Reduce 类
        job.setMapperClass(FlowCountMapper.class);
        job.setReducerClass(FlowCountReduce.class);

        //3.指定mapper的输出数据类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);

        //4.指定最终输出的数据的kv类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FlowBean.class);


        //5.指定job的输入原始文件所在的目录
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        //7.将job 所在的job所用的java类的jar 提交给yarn去执行
        boolean result = job.waitForCompletion(true);
        System.exit(result ? 0 : 1);
    }
}

最后将我们写好的程序打成jar包,提交到集群中去运行就可以了。

hadoop jar demo.jar  Driver类的入口  输出地址

你可能感兴趣的:(Hadoop系列)