实验目的:
1、理解集群分布式计算原理
2、熟悉MR程序中Mapper、Reducer函数的编写
实验要求
实验思路
要想实现实验效果,首先我们需要理清此次mapreduce的整体逻辑思路:
文件内容
先将三个文件上传至分布式储存hdfs的 input/DP 目录下
三个文件的内容:
public class FirstMapper extends Mapper<Object, Text, Text, IntWritable> {
protected void map(Object k1, Text v1, Context context)
throws IOException, InterruptedException {
String line = v1.toString().trim();//提取内容
String[] words = line.split(" ");//提取word
FileSplit inputSplit = (FileSplit)context.getInputSplit();
Path path = inputSplit.getPath();
String filename = path.getName();//获取单词所在文件名
for(String word : words) {
context.write(new Text(filename+"->"+word), new IntWritable(1));
}
}
}
public class FirstReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
protected void reduce(Text k2, Iterable<IntWritable> v2s, Context context)
throws IOException, InterruptedException {
int count = 0;
for(IntWritable val : v2s) {
count += val.get(); //累加v2s得出该单词的出现总次数
}
context.write(k2, new IntWritable(count));
}
}
public class SencondMapper extends Mapper<Object, Text, Text, Text> {
protected void map(Object k1, Text v1, Context context)
throws IOException, InterruptedException {
String line=v1.toString().trim();
String[] data=line.split("->");
String filename = data[0];
String[] wordcount = data[1].split("\t");
String word = wordcount[0];
String count = wordcount[1];
context.write(new Text(word), new Text(filename + "->" + count));
}
}
public class SencondReducer extends Reducer<Text, Text, Text, Text> {
protected void reduce(Text k2, Iterable<Text> v2s, Context context)
throws IOException, InterruptedException {
String str = new String();
for(Text val : v2s) {
str = str + val.toString() + ",";
}
str = str.substring(0, str.length()-1);
context.write(new Text(k2), new Text(str));
}
}
public class SencondPartitioner extends Partitioner<Text, Text> {
private static int PatitionNumber=0;
public int getPartition(Text k2, Text v2, int numPartitions) {
String word=k2.toString().trim();
if (word.length()==0) return 0;
char firstchar=Character.toUpperCase(word.charAt(0));
if (firstchar>='N'&&firstchar<='Z' || firstchar>='n'&&firstchar<='z')
PatitionNumber=1;
else if (firstchar>='A'&&firstchar<='M' || firstchar>='a'&&firstchar<='m')
PatitionNumber=0;
else PatitionNumber=2;
return PatitionNumber;
}
}
第一次MR
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class FirstMapper extends Mapper<Object, Text, Text, IntWritable> {
protected void map(Object k1, Text v1, Context context)
throws IOException, InterruptedException {
String line = v1.toString().trim();
String[] words = line.split(" ");
FileSplit inputSplit = (FileSplit)context.getInputSplit();
Path path = inputSplit.getPath();
String filename = path.getName();
for(String word : words) {
context.write(new Text(filename+"->"+word), new IntWritable(1));
}
}
}
public class FirstReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
protected void reduce(Text k2, Iterable<IntWritable> v2s, Context context)
throws IOException, InterruptedException {
int count = 0;
for(IntWritable val : v2s) {
count += val.get();
}
context.write(k2, new IntWritable(count));
}
}
public class FirstMain {
public static void main(String[] args) throws Exception, IOException {
Configuration conf=new Configuration();
Job job=Job.getInstance(conf, "FirstDP");
if (args.length!=2){
System.err.println("Usage: WordCount45 " );
System.exit(2);
}
job.setJarByClass(FirstMain.class);
job.setMapperClass(FirstMapper.class);
job.setReducerClass(FirstReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Partitioner;
public class SencondMapper extends Mapper<Object, Text, Text, Text> {
protected void map(Object k1, Text v1, Context context)
throws IOException, InterruptedException {
String line=v1.toString().trim();
String[] data=line.split("->");
String filename = data[0];
String[] wordcount = data[1].split("\t");
String word = wordcount[0];
String count = wordcount[1];
context.write(new Text(word), new Text(filename + "->" + count));
}
}
public class SencondReducer extends Reducer<Text, Text, Text, Text> {
protected void reduce(Text k2, Iterable<Text> v2s, Context context)
throws IOException, InterruptedException {
String str = new String();
for(Text val : v2s) {
str = str + val.toString() + ",";
}
str = str.substring(0, str.length()-1);
context.write(new Text(k2), new Text(str));
}
}
public class SencondPartitioner extends Partitioner<Text, Text> {
private static int PatitionNumber=0;
public int getPartition(Text k2, Text v2, int numPartitions) {
String word=k2.toString().trim();
if (word.length()==0) return 0;
char firstchar=Character.toUpperCase(word.charAt(0));
if (firstchar>='N'&&firstchar<='Z' || firstchar>='n'&&firstchar<='z')
PatitionNumber=1;
else if (firstchar>='A'&&firstchar<='M' || firstchar>='a'&&firstchar<='m')
PatitionNumber=0;
else PatitionNumber=2;
return PatitionNumber;
}
}
public class SecendMain {
public static void main(String[] args) throws Exception, IOException {
Configuration conf=new Configuration();
Job job=Job.getInstance(conf, "SecondDP");
if (args.length!=2){
System.err.println("Usage: WordCount45 " );
System.exit(2);
}
job.setJarByClass(SecendMain.class);
job.setMapperClass(SencondMapper.class);
job.setReducerClass(SencondReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setPartitionerClass(SencondPartitioner.class);
job.setNumReduceTasks(3);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}