Hadoop mapreduce

1、

package com.jiepu.mr;

import java.io.IOException;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

public class WholeFileRecordReader extends
		RecordReader<NullWritable, BytesWritable> {

	private FileSplit fileSplit;
	private JobContext jobContext;
	private NullWritable currentKey=NullWritable.get();
	private BytesWritable currentValue;
	private boolean finishConverting=false;

	@Override
	public void close() throws IOException {
		// TODO Auto-generated method stub

	}

	@Override
	public NullWritable getCurrentKey() throws IOException,
			InterruptedException {
		// TODO Auto-generated method stub
		return currentKey;
	}

	@Override
	public BytesWritable getCurrentValue() throws IOException,
			InterruptedException {
		// TODO Auto-generated method stub
		return currentValue;
	}

	@Override
	public float getProgress() throws IOException, InterruptedException {

		float progress=0;
		if(finishConverting)
		{
			progress=1;
		}
		return progress;
	}

	@Override
	public void initialize(InputSplit split, TaskAttemptContext context)
			throws IOException, InterruptedException {
		
		this.fileSplit=(FileSplit) split;
		this.jobContext=context;
		context.getConfiguration().set("map.input.file",fileSplit.getPath().getName());

	}

	@Override
	public boolean nextKeyValue() throws IOException, InterruptedException {
		if(!finishConverting)
		{
			currentValue=new BytesWritable();
			int len=(int) fileSplit.getLength();
			byte[] content=new byte[len];
			Path file=fileSplit.getPath();
			FileSystem fs=file.getFileSystem(jobContext.getConfiguration());
			FSDataInputStream in = null;
			try {

				in = fs.open(file);
				org.apache.hadoop.io.IOUtils.readFully(in, content, 0, len);
				currentValue.set(content, 0, len);
			} finally{
				if(in!=null)
				{
					org.apache.hadoop.io.IOUtils.closeStream(in);
				}
			}
			finishConverting=true;
			return true;
		}

		return false;
	}

}
package com.jiepu.mr;



import java.io.IOException;



import org.apache.hadoop.conf.Configuration;


import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.BytesWritable;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.RecordReader;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.TaskAttemptContext;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;


public class WholeCombinedSmallfiles {

	static class WholeSmallfilesMapper extends

			Mapper<NullWritable, BytesWritable, Text, BytesWritable> {

		private Text file = new Text();

		@Override

		protected void map(NullWritable key, BytesWritable value,

				Context context)

				throws IOException, InterruptedException {



			String filename = context.getConfiguration().get("map.input.file");

			file.set(filename);

			context.write(file, value);

			// super.map(key, value, context);

		}

	}

	static class IdentityReducer extends

			Reducer<Text, BytesWritable, Text, BytesWritable> {

		@Override

		protected void reduce(Text key, Iterable<BytesWritable> values,

				Context context) throws IOException, InterruptedException {

			for (BytesWritable value : values)
			{

				context.write(key, value);
			}

		}


	}



	static class WholeFileInputFormat extends

			FileInputFormat<NullWritable, BytesWritable> {

		@Override

		public org.apache.hadoop.mapreduce.RecordReader<NullWritable, BytesWritable> createRecordReader(

				org.apache.hadoop.mapreduce.InputSplit split,

				TaskAttemptContext context) throws IOException,

				InterruptedException {

			RecordReader<NullWritable, BytesWritable> recordReader = new WholeFileRecordReader();

			recordReader.initialize(split, context);

			return recordReader;


		}

	}



	public static void main(String[] args) throws IOException,

			ClassNotFoundException, InterruptedException {

		Configuration conf = new Configuration();

		String[] otherArgs = new GenericOptionsParser(conf, args)

				.getRemainingArgs();

		if (otherArgs.length != 2) {

			System.err.println("Usage:WholeCombinedSmallfiles <in> <out>");

			System.exit(2);

		}

		Job job = new Job(conf, "WholeCombinedSmallfiles");

		job.setJarByClass(WholeCombinedSmallfiles.class);

		job.setMapperClass(WholeSmallfilesMapper.class);

		job.setReducerClass(IdentityReducer.class);

		job.setMapOutputKeyClass(Text.class);

		job.setMapOutputValueClass(BytesWritable.class);

		job.setOutputKeyClass(Text.class);

		job.setOutputValueClass(BytesWritable.class);

		job.setInputFormatClass(WholeFileInputFormat.class);

		job.setOutputFormatClass(SequenceFileOutputFormat.class);

		job.setNumReduceTasks(5);

		FileInputFormat.addInputPath(job, new Path(otherArgs[0]));

		 Path outpath=new Path(otherArgs[1]);

		FileSystem fs=outpath.getFileSystem(conf);

		if(fs.exists(outpath))

		{

			fs.delete(outpath);

		}
		FileOutputFormat.setOutputPath(job,outpath);

		int exitflag = job.waitForCompletion(true) ? 0 : 1;

		System.exit(exitflag);

	}

}



你可能感兴趣的:(Hadoop mapreduce)