在hadoop集群的map和reduce函数中传递自定义对象

由于map和reduce中只能传输KEY,VALUE对

@Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //将Text内容转化成String类型
        String line = value.toString();
        //分词
        String[] words = line.split(" ");

        //return 
        for (String word:words)
        {
            context.write(new Text(word),new IntWritable(1));
        }
    }
@Override
    protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
        int result = 0;
        for(IntWritable value:values)
        {
            result += value.get();
        }

        //输出最终KV对
        context.write(key,new IntWritable(result));
    }

当需要传输复杂数据类型的时候,选择传输自定义对象,但由于hadoop数据传输需求,数据必须要有合适的序列化方法。即自定义对象需要实现hadoop中的Writable接口

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/**
 * Created by hadoop on 17-2-18.
 * 自定义数据类型需要在hadoop中传输需要实现Writable接口
 */
public class MyData implements Writable{
    private String a;
    private int b;

    //为了能够反序列化必须要定义一个无参数的构造函数
    public MyData() {
    }

    public MyData(String a, int b) {
        this.a = a;
        this.b = b;
    }

    public String getA() {

        return a;
    }

    public void setA(String a) {
        this.a = a;
    }

    public int getB() {
        return b;
    }

    public void setB(int b) {
        this.b = b;
    }




    /*
         * 序列化方法
         */
    @Override

    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeChars(a);
        dataOutput.writeInt(b);
    }
    /*
     * 反序列化方法
     */
    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.a = dataInput.readLine();
        this.b = dataInput.readInt();
    }
}

你可能感兴趣的:(新手hadoop学习)