倒排索引案例(多Job串联)

需求


需求分析


第一次处理

OneIndexMapper

public class OneIndexMapper extends Mapper {

    String name;

    @Override

    protected void setup(Context context) throws IOException, InterruptedException {

        // 获取文件名称

        FileSplit inputSplit = (FileSplit) context.getInputSplit();

        name = inputSplit.getPath().getName();

    }

    Text k = new Text();

    IntWritable v = new IntWritable(1);

    @Override

    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        // 1 获取一行

        String line = value.toString();

        // 2 切割

        String[] fields = line.split(" ");

        // 3 写出

        for (String word : fields) {

            k.set(word + "--" + name);

            context.write(k, v);

        }

    }

}


OneIndexReducer

public class OneIndexReducer extends Reducer {

    @Override

    protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {

        // 1 累加求和

        int sum = 0;

        for (IntWritable value : values) {

            sum += value.get();

        }

        IntWritable v = new IntWritable();

        v.set(sum);

        // 2 写出

        context.write(key, v);

    }

}


OneIndexDriver

public class OneIndexDriver {

    public static void main(String[] args) throws Exception {

        args = new String[]{"e:/input/inputoneindex", "e:/output5"};

        Configuration configuration = new Configuration();

        Job job = Job.getInstance(configuration);

        job.setJarByClass(OneIndexDriver.class);

        job.setMapperClass(OneIndexMapper.class);

        job.setReducerClass(OneIndexReducer.class);

        job.setMapOutputKeyClass(Text.class);

        job.setMapOutputValueClass(IntWritable.class);

        job.setOutputKeyClass(Text.class);

        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));

        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        boolean result = job.waitForCompletion(true);

        System.exit(result ? 0 : 1);

    }

}



第二次处理

TwoIndexMapper

public class TwoIndexMapper extends Mapper {

    Text k = new Text();

    Text v = new Text();

    @Override

    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        // 1 获取一行

        String line = value.toString();

        // 2 切割

        String[] fields = line.split("--");

        // 3 封装

        k.set(fields[0]);

        v.set(fields[1]);

        // 4 写出

        context.write(k, v);

    }

}


TwoIndexReducer

public class TwoIndexReducer extends Reducer {

    Text v = new Text();

    @Override

    protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {

        // 1 拼接字符串

        StringBuffer sb = new StringBuffer();

        for (Text value : values) {

            sb.append(value.toString().replace("\t", "-->") + "\t");

        }

        v.set(sb.toString());

        // 2 写出

        context.write(key, v);

    }

}


TwoIndexDriver

public class TwoIndexDriver {

    public static void main(String[] args) throws Exception {

        args = new String[]{"e:/input/inputtwoindex", "e:/output6"};

        Configuration configuration = new Configuration();

        Job job = Job.getInstance(configuration);

        job.setJarByClass(TwoIndexDriver.class);

        job.setMapperClass(TwoIndexMapper.class);

        job.setReducerClass(TwoIndexReducer.class);

        job.setMapOutputKeyClass(Text.class);

        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(Text.class);

        job.setOutputValueClass(Text.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));

        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        boolean result = job.waitForCompletion(true);

        System.exit(result ? 0 : 1);

    }

}

你可能感兴趣的:(倒排索引案例(多Job串联))