自定义inputFormat && outputFormat

 


                                            代码实现 

Custom_RecordReader  :

package demozdy;

import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;

/**
 * Created by 一个蔡狗 on 2019/11/18.
 */
public class Custom_RecordReader  extends RecordReader {

    private FileSplit fileSplit;
    private Configuration conf;
    private BytesWritable bytesWritable = new BytesWritable();
    private boolean pressced= false;

    /**
     *
     * @param split 封装的文件的对象内容
     * @param context 上下文对象
     * @throws IOException
     * @throws InterruptedException
     */
    public void initialize(InputSplit split, TaskAttemptContext context) {

        this.fileSplit=(FileSplit) split;
        this.conf=context.getConfiguration();

    }

    //读取下一个文件
    @Override
    public boolean nextKeyValue() throws IOException, InterruptedException {
        //判断
        if (!pressced){
            //获取文件 路径
            Path path = fileSplit.getPath();
            //获取 FileSystem对象
            FileSystem fileSystem =null;
            FSDataInputStream inputStream = null;

            try {
               fileSystem = fileSystem.get(conf);
               //读取文件
                inputStream = fileSystem.open(path);
                //初始化 一个字节数据 大小为 文件的长度
                byte[] bytes  = new byte[(int) fileSplit.getLength()];
                //把数据 流 转换 成 字节 数组
                IOUtils.readFully(inputStream,bytes,0,bytes.length);
                //把 字节数组 转换成 BytesWritable 对象
                bytesWritable.set(bytes,0,bytes.length);

            }catch (IOException e){
                e.printStackTrace();

            }finally {
                fileSystem.close();
                if (null != inputStream){
                    inputStream.close();
                }

            }
            pressced=true;
            return  true;


        }else {
            return  false;

        }

    }

    //获取当前 的 key 值
    @Override
    public NullWritable getCurrentKey() throws IOException, InterruptedException {

        return NullWritable.get();
    }

    //获取当前的 value
    @Override
    public BytesWritable getCurrentValue() throws IOException, InterruptedException {

        return bytesWritable;
    }

    //进度
    @Override
    public float getProgress() throws IOException, InterruptedException {
        return pressced?0:1;
    }

    //关闭方法
    @Override
    public void close() throws IOException {

    }


}

 Custom_FileInputFormat : 

package demozdy;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import java.io.IOException;

/**
 * Created by 一个蔡狗 on 2019/11/18.
 */
public class Custom_FileInputFormat  extends FileInputFormat {



    protected boolean isSplitable(JobContext context, Path filename) {
        return false;
    }


    @Override
    public RecordReader createRecordReader(InputSplit Split, TaskAttemptContext context) throws IOException {

        Custom_RecordReader  custom_recordReader  = new Custom_RecordReader();
        custom_recordReader.initialize(Split,context);

        return custom_recordReader;
    }



}

 

Custom_Mapper :

package demozdy;

import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;

/**
 * Created by 一个蔡狗 on 2019/11/18.
 */
public class Custom_Mapper extends Mapper {
    @Override
    protected void map(NullWritable key, BytesWritable value, Context context) throws IOException, InterruptedException {

        //获取 文件对象
        FileSplit fileSplit = (FileSplit) context.getInputSplit();
        //获取文件 名称
        String name = fileSplit.getPath().getName();
        context.write(new Text(name),value);

    }
}

 

Custom_Driver :

package demozdy;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;

/**
 * Created by 一个蔡狗 on 2019/11/18.
 */
public class Custom_Driver  {


        public static void main(String[] args) throws Exception {

        Job job = Job.getInstance(new Configuration(), "Custom_Driver");
        job.setJarByClass(Custom_Driver.class);

        job.setInputFormatClass(Custom_FileInputFormat.class);
        Custom_FileInputFormat.addInputPath(job,new Path("E:\\2019-传智资料5\\MapReduce\\自定义inputFormat&&outputFormat\\input"));


        job.setMapperClass(Custom_Mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(BytesWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(BytesWritable.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setOutputPath(job,new Path("E:\\wordcount\\CustomFileInputFormat_outputdududu"));

        boolean b = job.waitForCompletion(true);
        System.exit(b?0:1);
    }

}

                                                       合并 后的 效果 展示 

自定义inputFormat && outputFormat_第1张图片

你可能感兴趣的:(#,MapReduce,大数据)