package com.Practice.RemoveDupData
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.io.NullWritable
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.Mapper
import org.apache.hadoop.mapreduce.Reducer
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import java.io.IOException
public class RemoveDupData {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration()
FileSystem fs = FileSystem.get(conf)
Job job = Job.getInstance(conf)
job.setJar("wordcountJar/wordcount.jar")
job.setMapperClass(RemoveDupDataMapper.class)
job.setReducerClass(RemoveDupDataReducer.class)
job.setOutputKeyClass(Text.class)
job.setOutputValueClass(NullWritable.class)
Path inputPath = new Path("input/RemoveDupData")
Path outputPath = new Path("output/RemoveDupData")
if(fs.isDirectory(outputPath)){
fs.delete(outputPath,true)
}
FileInputFormat.setInputPaths(job,inputPath)
FileOutputFormat.setOutputPath(job,outputPath)
boolean waitForCompletion = job.waitForCompletion(true)
System.exit(waitForCompletion ? 0 : 1 )
}
public static class RemoveDupDataMapper extends Mapper{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
context.write(value,NullWritable.get())
}
}
public static class RemoveDupDataReducer extends Reducer{
@Override
protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
context.write(key,NullWritable.get())
}
}
}