1wordcount
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; //Administrator public class WordCountExample { private static class WordCountMapper extends Mapper<Object, Text, Text, IntWritable>{ @Override protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { String str=value.toString(); String []strArray=str.split(" "); for(String s:strArray){ context.write(new Text(s), new IntWritable(1)); } } } private static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable>{ @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum=0; for(IntWritable count:values){ sum+=count.get(); } context.write(key, new IntWritable(sum)); } } /** * @param args */ public static void main(String[] args) throws Exception{ Configuration conf=new Configuration(); String []argArray=new GenericOptionsParser(conf,args).getRemainingArgs(); if(argArray.length!=2){ System.out.println("需要两个参数"); System.exit(1); } Job job=new Job(conf,"wordcount"); job.setJarByClass(WordCountExample.class); job.setMapperClass(WordCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setReducerClass(WordCountReducer.class); FileInputFormat.addInputPath(job, new Path(argArray[0])); FileOutputFormat.setOutputPath(job, new Path(argArray[1])); System.exit(job.waitForCompletion(true)?0:1); } }
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; //Administrator public class DeleteRepeatExample { private static class DeleteRepeatMapper extends Mapper<Object, Text, Text, IntWritable>{ @Override protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { context.write(value, new IntWritable(0)); } } private static class DeleteRepeatReducer extends Reducer<Text, IntWritable, Text, Object>{ @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { context.write(key, null); } } /** * @param args */ public static void main(String[] args) throws Exception{ Configuration conf=new Configuration(); String[]argArray=new GenericOptionsParser(conf, args).getRemainingArgs(); if(argArray.length!=2){ System.out.println("请提供两个参数"); System.exit(1); } Job job=new Job(conf,"delete repeat"); job.setJarByClass(DeleteRepeatExample.class); job.setMapperClass(DeleteRepeatMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(DeleteRepeatReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Object.class); FileInputFormat.addInputPath(job, new Path(argArray[0])); FileOutputFormat.setOutputPath(job,new Path(argArray[1])); System.exit(job.waitForCompletion(true)?0:1); } }
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; //Administrator public class SortExample { private static class SortMapper extends Mapper<Object, Text, IntWritable, IntWritable>{ @Override protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { context.write(new IntWritable(Integer.parseInt(value.toString())), new IntWritable(0)); } } private static class SortReducer extends Reducer<IntWritable, IntWritable, Text,Text>{ private int index=0; @Override protected void reduce(IntWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { for(IntWritable i:values){ index++; context.write(new Text(index+""),new Text(key.get()+"")); } } } /** * @param args */ public static void main(String[] args) throws Exception{ Configuration conf=new Configuration(); String[]argArray=new GenericOptionsParser(conf, args).getRemainingArgs(); if(argArray.length!=2){ System.out.println("请输入两个参数"); System.exit(1); } Job job=new Job(conf,"sort"); job.setJarByClass(SortExample.class); job.setMapperClass(SortMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(SortReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(argArray[0])); FileOutputFormat.setOutputPath(job, new Path(argArray[1])); System.exit(job.waitForCompletion(true)?0:1); } }
4表自连接
package demo; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.vod.Ejob; public class SelfJoin { private static class SelfJoinMapper extends Mapper<Object, Text, Text, Text>{ @Override protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { String str=value.toString(); String[] nameArray=str.split(" "); context.write(new Text(nameArray[1]), new Text("1-"+nameArray[0]+"-"+nameArray[1])); context.write(new Text(nameArray[0]), new Text("2-"+nameArray[0]+"-"+nameArray[1])); } } private static class SelfJoinReducer extends Reducer<Text, Text, Text, Text>{ @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { List<String> outKey=new ArrayList<String>(); List<String> outValue=new ArrayList<String>(); /*for(Text value:values){ context.write(NullWritable.get(), value); } context.write(NullWritable.get(), new Text("---------"));*/ for(Text value:values){ String[] relationArray=value.toString().split("-"); if(relationArray[0].equals("1")){ outKey.add(relationArray[1]); }else if(relationArray[0].equals("2")){ outValue.add(relationArray[2]); } } for(String k:outKey){ for(int i=0;i<outValue.size();i++){ context.write(new Text(k), new Text(outValue.get(i))); } } } } public static void main(String[] args) throws Exception{ File jarFile = Ejob.createTempJar("bin"); //Ejob.addClasspath("/opt/hadoop/conf"); ClassLoader classLoader = Ejob.getClassLoader(); Thread.currentThread().setContextClassLoader(classLoader); Configuration conf=new Configuration(); String [] argArray=new GenericOptionsParser(conf, args).getRemainingArgs(); if(argArray.length!=2){ System.out.println("参数错误"); System.exit(1); } JobConf jobConf=new JobConf(conf); jobConf.setJar(jarFile.toString()); Job job=new Job(jobConf,"self join"); job.setJarByClass(SelfJoin.class); job.setMapperClass(SelfJoinMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(SelfJoinReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(argArray[0])); FileOutputFormat.setOutputPath(job, new Path(argArray[1])); System.exit(job.waitForCompletion(true)?0:1); } }
Tom Lucy Tom Jack Jone Lucy Jone Jack Lucy Mary Lucy Ben Jack Alice Jack Jesse Terry Alice Terry Jesse Philip Terry Philip Alma Mark Terry Mark Alma结果:
Tom Alice Tom Jesse Jone Alice Jone Jesse Tom Mary Tom Ben Jone Mary Jone Ben Philip Alice Philip Jesse Mark Alice Mark Jesse
5多表连接
package demo; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.vod.Ejob; public class MultiTableJoin { private static class MultiTableMapper extends Mapper<Object, Text, Text, Text>{ @Override protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { String str=value.toString(); if(str.charAt(0)>'0'&&str.charAt(0)<'9'){ context.write(new Text(str.charAt(0)+""), new Text("2-"+str.substring(1).trim())); }else{ context.write(new Text(str.substring(str.length()-1)), new Text("1-"+str.substring(0, str.length()-1).trim())); } } } private static class MultiTableReducer extends Reducer<Text, Text, Text, Text>{ @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { List<String>keyList=new ArrayList<String>(); List<String>valueList=new ArrayList<String>(); for(Text value:values){ String str=value.toString(); String []strArray=str.split("-"); if(strArray[0].equals("1")){ keyList.add(strArray[1]); }else if(strArray[0].equals("2")){ valueList.add(strArray[1]); } } for(String skey:keyList){ for(String svalue:valueList){ context.write(new Text(skey), new Text(svalue)); } } } } public static void main(String[] args) throws Exception{ File jarFile=Ejob.createTempJar("bin"); ClassLoader classLoader=Ejob.getClassLoader(); Thread.currentThread().setContextClassLoader(classLoader); Configuration conf=new Configuration(); String [] argArray=new GenericOptionsParser(conf, args).getRemainingArgs(); if(argArray.length!=2){ System.out.println("参数错误"); System.exit(1); } JobConf jobConf=new JobConf(conf); jobConf.setJar(jarFile.toString()); Job job=new Job(jobConf,"multiTalbe join"); job.setMapperClass(MultiTableMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(MultiTableReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(argArray[0])); FileOutputFormat.setOutputPath(job, new Path(argArray[1])); System.exit(job.waitForCompletion(true)?0:1); } }
Beijing Red Star 1 Shenzhen Thunder 3 Guangzhou Honda 2 Beijing Rising 1 Guangzhou Development Bank 2 Tencent 3 Bank of Beijing 1
1 Beijing 2 Guangzhou 3 Shenzhen 4 Xian
Beijing Red Star Beijing Beijing Rising Beijing Bank of Beijing Beijing Guangzhou Honda Guangzhou Guangzhou Development Bank Guangzhou Shenzhen Thunder Shenzhen Tencent Shenzhen