package cn.yws; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; //倒排索引 请在hadoop index_in目录下放置file1,file2测试文件 public class MyInvertedIndex { public static class Map extends Mapper<Object, Text, Text, Text> { private Text keyinfo=new Text(); private Text valueinfo=new Text(); private FileSplit split; //映射 @Override protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { //super.map(key, value, context); //获取文件分词 split=(FileSplit) context.getInputSplit(); StringTokenizer tokenizer=new StringTokenizer(value.toString()); while(tokenizer.hasMoreTokens()) { int splitindex=split.getPath().toString().indexOf("file"); keyinfo.set(tokenizer.nextToken()+":"+split.getPath().toString().substring(splitindex)); valueinfo.set("1"); //file3:1; context.write(keyinfo, valueinfo); } } } public static class Combine extends Reducer<Text, Text, Text, Text> { private Text infoText=new Text(); @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { //super.reduce(key, values, context); int sum=0; for(Text value:values) { sum+=Integer.parseInt(value.toString()); } int splitindex=key.toString().indexOf(":"); //file2:1;file3:2;file1:1 infoText.set(key.toString().substring(splitindex+1)+":"+sum); key.set(key.toString().substring(0,splitindex)); context.write(key, infoText); } } public static class Reduce extends Reducer<Text, Text, Text, Text> { private Text result=new Text(); @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { //super.reduce(key, values, context); //生成文档列表 String filelist=new String(); for(Text value:values) { filelist+=value.toString()+";"; } result.set(filelist); context.write(key, result); } } public static void main(String[] args) { try { Configuration configuration=new Configuration(); //这句话很关键 configuration.set("mapred.job.tracker", "192.168.1.15:9001"); String[] ioargs=new String[]{"index_in","index_out3"}; if(args.length==2) { ioargs=args; } String[] otherArgs=new GenericOptionsParser(configuration,ioargs).getRemainingArgs(); if(otherArgs.length!=2) { System.err.println("Usage:inverted "+MyInvertedIndex.class.getSimpleName()+" <in> <out>"); System.exit(2); } //启动计算任务 Job job=new Job(configuration, MyInvertedIndex.class.getSimpleName()); job.setJarByClass(MyInvertedIndex.class); //映射 job.setMapperClass(Map.class); //合成 job.setCombinerClass(Combine.class); //规约 job.setReducerClass(Reduce.class); //设置映射Map输出类型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); //设置reduce规约输出类型 job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); //设置输入和输出目录 FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true)?0:1); } catch (Exception e) { e.printStackTrace(); } } }