Hadoop2.2.0 mapreduce 例子

1wordcount

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

//Administrator
public class WordCountExample {
	private static class WordCountMapper extends Mapper<Object, Text, Text, IntWritable>{

		@Override
		protected void map(Object key, Text value, Context context)
				throws IOException, InterruptedException {
			String str=value.toString();
			String []strArray=str.split(" ");
			for(String s:strArray){
				context.write(new Text(s), new IntWritable(1));
			}
		}
		
	}
	
	private static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable>{

		@Override
		protected void reduce(Text key, Iterable<IntWritable> values,
				Context context)
				throws IOException, InterruptedException {
			int sum=0;
			for(IntWritable count:values){
				sum+=count.get();
			}
			context.write(key, new IntWritable(sum));
		}
		
	}

	/**
	 * @param args
	 */
	public static void main(String[] args) throws Exception{
		Configuration conf=new Configuration();
		String []argArray=new GenericOptionsParser(conf,args).getRemainingArgs();
		if(argArray.length!=2){
			System.out.println("需要两个参数");
			System.exit(1);
		}
		Job job=new Job(conf,"wordcount");
		job.setJarByClass(WordCountExample.class);
		job.setMapperClass(WordCountMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		job.setReducerClass(WordCountReducer.class);
		FileInputFormat.addInputPath(job, new Path(argArray[0]));
		FileOutputFormat.setOutputPath(job, new Path(argArray[1]));
		System.exit(job.waitForCompletion(true)?0:1);
	}

}

2去重

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

//Administrator
public class DeleteRepeatExample {
	private static class DeleteRepeatMapper extends Mapper<Object, Text, Text, IntWritable>{

		@Override
		protected void map(Object key, Text value, Context context)
				throws IOException, InterruptedException {
			context.write(value, new IntWritable(0));
		}
		
	}
	
	private static class DeleteRepeatReducer extends Reducer<Text, IntWritable, Text, Object>{

		@Override
		protected void reduce(Text key, Iterable<IntWritable> values,
				Context context)
				throws IOException, InterruptedException {
			context.write(key, null);
		}
		
	}
	
	/**
	 * @param args
	 */
	public static void main(String[] args) throws Exception{
		Configuration conf=new Configuration();
		String[]argArray=new GenericOptionsParser(conf, args).getRemainingArgs();
		if(argArray.length!=2){
			System.out.println("请提供两个参数");
			System.exit(1);
		}
		Job job=new Job(conf,"delete repeat");
		job.setJarByClass(DeleteRepeatExample.class);
		job.setMapperClass(DeleteRepeatMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setReducerClass(DeleteRepeatReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Object.class);
		FileInputFormat.addInputPath(job, new Path(argArray[0]));
		FileOutputFormat.setOutputPath(job,new Path(argArray[1]));
		System.exit(job.waitForCompletion(true)?0:1);

	}

}

3排序

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

//Administrator
public class SortExample {
	private static class SortMapper extends Mapper<Object, Text, IntWritable, IntWritable>{

		@Override
		protected void map(Object key, Text value, Context context)
				throws IOException, InterruptedException {
			context.write(new IntWritable(Integer.parseInt(value.toString())), new IntWritable(0));
		}
		
	}
	
	private static class SortReducer extends Reducer<IntWritable, IntWritable, Text,Text>{
		private int index=0;
		@Override
		protected void reduce(IntWritable key, Iterable<IntWritable> values,
				Context context)
				throws IOException, InterruptedException {
			for(IntWritable i:values){
				index++;
				context.write(new Text(index+""),new Text(key.get()+""));
			}
		}
		
	}

	/**
	 * @param args
	 */
	public static void main(String[] args) throws Exception{
		Configuration conf=new Configuration();
		String[]argArray=new GenericOptionsParser(conf, args).getRemainingArgs();
		if(argArray.length!=2){
			System.out.println("请输入两个参数");
			System.exit(1);
		}
		Job job=new Job(conf,"sort");
		job.setJarByClass(SortExample.class);
		job.setMapperClass(SortMapper.class);
		job.setMapOutputKeyClass(IntWritable.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setReducerClass(SortReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		FileInputFormat.addInputPath(job, new Path(argArray[0]));
		FileOutputFormat.setOutputPath(job, new Path(argArray[1]));
		System.exit(job.waitForCompletion(true)?0:1);

	}

}

4表自连接

package demo;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.vod.Ejob;

public class SelfJoin {
	private static class SelfJoinMapper extends Mapper<Object, Text, Text, Text>{

		@Override
		protected void map(Object key, Text value, Context context)
				throws IOException, InterruptedException {
			String str=value.toString();
			String[] nameArray=str.split(" ");
			context.write(new Text(nameArray[1]), new Text("1-"+nameArray[0]+"-"+nameArray[1]));
			context.write(new Text(nameArray[0]), new Text("2-"+nameArray[0]+"-"+nameArray[1]));
			
		}
		
	}
	private static class SelfJoinReducer extends Reducer<Text, Text, Text, Text>{

		@Override
		protected void reduce(Text key, Iterable<Text> values,
				Context context)
				throws IOException, InterruptedException {
			List<String> outKey=new ArrayList<String>();
			List<String> outValue=new ArrayList<String>();
			/*for(Text value:values){
			context.write(NullWritable.get(), value);
			}
			context.write(NullWritable.get(), new Text("---------"));*/
			for(Text value:values){
				String[] relationArray=value.toString().split("-");
				if(relationArray[0].equals("1")){
					outKey.add(relationArray[1]);
				}else if(relationArray[0].equals("2")){
					outValue.add(relationArray[2]);
				}
			}
			for(String k:outKey){
				for(int i=0;i<outValue.size();i++){
					context.write(new Text(k), new Text(outValue.get(i)));
				}
			}
		}
		
	}
	public static void main(String[] args) throws Exception{
		File jarFile = Ejob.createTempJar("bin");
		  //Ejob.addClasspath("/opt/hadoop/conf");
	      ClassLoader classLoader = Ejob.getClassLoader();
		  Thread.currentThread().setContextClassLoader(classLoader);
		  
		Configuration conf=new Configuration();
		String [] argArray=new GenericOptionsParser(conf, args).getRemainingArgs();
		if(argArray.length!=2){
			System.out.println("参数错误");
			System.exit(1);
		}
		JobConf jobConf=new JobConf(conf);
		jobConf.setJar(jarFile.toString());
		Job job=new Job(jobConf,"self join");
		job.setJarByClass(SelfJoin.class);
		job.setMapperClass(SelfJoinMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setReducerClass(SelfJoinReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		FileInputFormat.addInputPath(job, new Path(argArray[0]));
		FileOutputFormat.setOutputPath(job, new Path(argArray[1]));
		System.exit(job.waitForCompletion(true)?0:1);

	}

}

数据:
Tom Lucy
Tom Jack
Jone Lucy
Jone Jack
Lucy Mary
Lucy Ben
Jack Alice
Jack Jesse
Terry Alice
Terry Jesse
Philip Terry
Philip Alma
Mark Terry
Mark Alma
结果:

Tom	Alice
Tom	Jesse
Jone	Alice
Jone	Jesse
Tom	Mary
Tom	Ben
Jone	Mary
Jone	Ben
Philip	Alice
Philip	Jesse
Mark	Alice
Mark	Jesse


5多表连接

package demo;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.vod.Ejob;

public class MultiTableJoin {
	private static class MultiTableMapper extends Mapper<Object, Text, Text, Text>{

		@Override
		protected void map(Object key, Text value, Context context)
				throws IOException, InterruptedException {
			String str=value.toString();
			if(str.charAt(0)>'0'&&str.charAt(0)<'9'){
				context.write(new Text(str.charAt(0)+""), new Text("2-"+str.substring(1).trim()));
			}else{
				context.write(new Text(str.substring(str.length()-1)), new Text("1-"+str.substring(0, str.length()-1).trim()));
			}
		}
		
	}
	
	private static class MultiTableReducer extends Reducer<Text, Text, Text, Text>{

		@Override
		protected void reduce(Text key, Iterable<Text> values,
				Context context)
				throws IOException, InterruptedException {
			List<String>keyList=new ArrayList<String>();
			List<String>valueList=new ArrayList<String>();
			for(Text value:values){
				String str=value.toString();
				String []strArray=str.split("-");
				if(strArray[0].equals("1")){
					keyList.add(strArray[1]);
				}else if(strArray[0].equals("2")){
					valueList.add(strArray[1]);
				}
			}
			for(String skey:keyList){
				for(String svalue:valueList){
					context.write(new Text(skey), new Text(svalue));
				}
			}
		}
		
	}
	
	
	public static void main(String[] args) throws Exception{
		File jarFile=Ejob.createTempJar("bin");
		ClassLoader classLoader=Ejob.getClassLoader();
		Thread.currentThread().setContextClassLoader(classLoader);
		
		Configuration conf=new Configuration();
		String [] argArray=new GenericOptionsParser(conf, args).getRemainingArgs();
		if(argArray.length!=2){
			System.out.println("参数错误");
			System.exit(1);
		}
		JobConf jobConf=new JobConf(conf);
		jobConf.setJar(jarFile.toString());
		Job job=new Job(jobConf,"multiTalbe join");
		job.setMapperClass(MultiTableMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setReducerClass(MultiTableReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		FileInputFormat.addInputPath(job, new Path(argArray[0]));
		FileOutputFormat.setOutputPath(job, new Path(argArray[1]));
		System.exit(job.waitForCompletion(true)?0:1);
		
	}
}

数据:table1.txt

Beijing Red Star 1
Shenzhen Thunder 3
Guangzhou Honda 2
Beijing Rising 1
Guangzhou Development Bank 2
Tencent 3
Bank of Beijing 1

table2.txt

1 Beijing
2 Guangzhou
3 Shenzhen
4 Xian

运行结果:

Beijing Red Star	Beijing
Beijing Rising	Beijing
Bank of Beijing	Beijing
Guangzhou Honda	Guangzhou
Guangzhou Development Bank	Guangzhou
Shenzhen Thunder	Shenzhen
Tencent	Shenzhen


你可能感兴趣的:(Hadoop2.2.0 mapreduce 例子)