mapreduce序列化机制

//-------------------FlowBean.java----------------
package pack2;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

public class FlowBean implements WritableComparable{

        private String phoneNB;
        private long u_flow;
        private long d_flow;
        private long s_flow;

        //在反序列化时,需要反射,因此需要无参构造函数
        public FlowBean() {}
        public FlowBean(String phoneNB, long u_flow, long d_flow) {
                this.phoneNB = phoneNB;
                this.u_flow = u_flow;
                this.d_flow = d_flow;
                this.s_flow = u_flow + d_flow;
        }

        public String getPhoneNB() {
                return phoneNB;
        }

        public void setPhoneNB(String phoneNB) {
                this.phoneNB = phoneNB;
        }

        public long getU_flow() {
                return u_flow;
        }

        public void setU_flow(long u_flow) {
                this.u_flow = u_flow;
        }

        public long getD_flow() {
                return d_flow;
        }

        public void setD_flow(long d_flow) {
                this.d_flow = d_flow;
        }

        public long getS_flow() {
                return s_flow;
        }

        public void setS_flow(long s_flow) {
                this.s_flow = s_flow;
        }

        //将对象数据序列化到流中
        @Override
        public void readFields(DataInput in) throws IOException {
               phoneNB = in.readUTF();
               u_flow = in.readLong();
               d_flow = in.readLong();
               s_flow = in.readLong();
        }

        //从数据流中反序列出对象的数据
        @Override
        public void write(DataOutput out) throws IOException {
                out.writeUTF(phoneNB);
                out.writeLong(u_flow);
                out.writeLong(d_flow);
                out.writeLong(s_flow);
        }

        @Override
        public String toString() {
                return " "+ u_flow +" "+ d_flow + " " + s_flow;
        }
        @Override
        public int compareTo(FlowBean o) {
                return s_flow > o.s_flow ? -1 : 1;
        }
}
//--------------FlowSumMapper.java----------------
package pack2;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class FlowSumMapper extends Mapper <LongWritable, Text, Text, FlowBean>{

        @Override
        protected void map(LongWritable key, Text value, Mapper.Context context)
                        throws IOException, InterruptedException {
                String line = value.toString();
                String[] fields = line.split("\t");
                String phoneNB = fields[1];
                long u_flow = Long.parseLong(fields[7]);
                long d_flow = Long.parseLong(fields[8]);

                context.write(new Text(phoneNB), new FlowBean(phoneNB, u_flow, d_flow));
        }

}
//--------------FlowSumReducer.java---------------
package pack2;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class FlowSumReducer extends Reducer<Text, FlowBean, Text, FlowBean>{

        @Override
        protected void reduce(Text key, Iterable values, Context context)
                        throws IOException, InterruptedException {
                long u_flow_count = 0;
                long d_flow_count = 0;

                for(FlowBean bean : values) {
                        u_flow_count += bean.getU_flow();
                        d_flow_count += bean.getD_flow();
                }

                context.write(key, new FlowBean(key.toString(), u_flow_count, d_flow_count));

        }

}

//————–FlowSumRunner.java—————-

package pack2;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;



public class FlowSumRunner  extends Configured implements Tool {

        @Override
        public int run(String[] args) throws Exception {

                Configuration conf = new Configuration();
                Job job = Job.getInstance(conf);

                job.setJarByClass(FlowSumRunner.class);

                job.setMapperClass(FlowSumMapper.class);
                job.setReducerClass(FlowSumReducer.class);

                job.setMapOutputKeyClass(Text.class);
                job.setMapOutputValueClass(FlowBean.class);

                job.setOutputKeyClass(Text.class);
                job.setOutputValueClass(FlowBean.class);

                FileInputFormat.setInputPaths(job, new Path(args[0]));
                FileOutputFormat.setOutputPath(job, new Path(args[1]));

                return job.waitForCompletion(true) ? 0 : 1;

        }

        public static void main(String[] args) throws Exception {
                int res = ToolRunner.run(new Configuration(), new FlowSumRunner(), args);
                System.exit(res);
        }
}

你可能感兴趣的:(java,hadoop)