集群环境:namenode s100
2namenode s104
datanode s101、s102、s105
在完全分布式集群,为了就跟深入的剖析MapReduce计算模型的工作机制,通过一个wordcount案例来进行分布式调试:
观察每个主机运行的进程ID,线程,运行时的类,Hashcode及其运行的方法:
1、WordCountMapper
package hadoop.mr;
import com.it18zhang.debugtool.RTUtil;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* WordCountMapper
*/
public class WordCountMapper extends Mapper {
public WordCountMapper(){
RTUtil.sendInfo("s104",8888,this,"new()" , "");
}
protected void setup(Context context) throws IOException, InterruptedException {
RTUtil.sendInfo("s104", 8888, this, "setup()", "");
}
protected void cleanup(Context context) throws IOException, InterruptedException {
RTUtil.sendInfo("s104", 8888, this, "cleanup()", "");
}
/**
* 每一行
* @param key
* @param value
* @param context
* @throws IOException
* @throws InterruptedException
*/
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
RTUtil.sendInfo("s104", 8888, this, "map()", "value = " + value.toString());
String line = value.toString();
String[] arr = line.split(" ");
Text keyOut = new Text();
IntWritable valueOut = new IntWritable(1);
for(String word : arr){
keyOut.set(word);
context.write(keyOut,valueOut);
}
}
}
2、WordCountReducer
package hadoop.mr;
import com.it18zhang.debugtool.RTUtil;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* WordCountReducer
*/
public class WordCountReducer extends Reducer{
public WordCountReducer(){
RTUtil.sendInfo("s104", 8888, this, "new()", "");
}
protected void setup(Context context) throws IOException, InterruptedException {
RTUtil.sendInfo("s104", 8888, this, "setup()", "");
}
protected void cleanup(Context context) throws IOException, InterruptedException {
RTUtil.sendInfo("s104", 8888, this, "cleanup()", "");
}
protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
RTUtil.sendInfo("s104", 8888, this, "reduce()", "key = " + key.toString());
int count = 0 ;
for(IntWritable iw : values){
count = count + iw.get() ;
}
context.write(key,new IntWritable(count));
}
}
3、App
package hadoop.mr;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
*/
public class App {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
if(fs.exists(new Path(args[1]))){
fs.delete(new Path(args[1]),true);
}
Job job = Job.getInstance(conf);
// job.getConfiguration().setLong("fs.local.block.size", 1 * 1024 * 1024);
//设置最大切片和最小切片
// job.getConfiguration().set("mapreduce.input.fileinputformat.split.minsize","14");
// job.getConfiguration().set("mapreduce.input.fileinputformat.split.maxsize","14");
// FileInputFormat.setMinInputSplitSize(job,7);
// FileInputFormat.setMaxInputSplitSize(job,7);
job.setJobName("WordCount");
job.setJarByClass(App.class);
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
//添加输入路径
FileInputFormat.addInputPath(job,new Path(args[0]));
//设置输出路径
FileOutputFormat.setOutputPath(job,new Path(args[1]));
//设置mapreduce输出
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//设置合成类
job.setCombinerClass(WordCountReducer.class);
job.setNumReduceTasks(2);
//手动设置map的个数
// job.getConfiguration().set(MRJobConfig.NUM_MAPS,"3");
job.waitForCompletion(true);
}
}
4、HDFS写入3个文件
1.txt hello world tom1
2.txt hello world tom2
hello world tom3
3.txt hello world tom4
hello world tom5
hello world tom6
5、在s104主机进行监听
输入命令 nc -lk 8888
6、调试结果
观察每个主机运行的进程ID,线程,运行时的类,Hashcode及其运行的方法:
=======3.txt 1.map===========
192.168.231.102:3631:main:WordCountMapper@1475192960:new()()
192.168.231.102:3631:main:WordCountMapper@1475192960:setup()()
192.168.231.102:3631:main:WordCountMapper@1475192960:map()(value = how world tom4)
192.168.231.102:3631:main:WordCountMapper@1475192960:map()(value = how world tom5)
192.168.231.102:3631:main:WordCountMapper@1475192960:map()(value = how world tom6)
192.168.231.102:3631:main:WordCountMapper@1475192960:cleanup()()
=======3.txt 1.combine===========
192.168.231.102:3631:main:WordCountReducer@1788380050:new()()
192.168.231.102:3631:main:WordCountReducer@1788380050:setup()()
192.168.231.102:3631:main:WordCountReducer@1788380050:reduce()(key = tom5)
192.168.231.102:3631:main:WordCountReducer@1788380050:cleanup()()
=======3.txt 2.combine===========
192.168.231.102:3631:main:WordCountReducer@307605969:new()()
192.168.231.102:3631:main:WordCountReducer@307605969:setup()()
192.168.231.102:3631:main:WordCountReducer@307605969:reduce()(key = how)
192.168.231.102:3631:main:WordCountReducer@307605969:reduce()(key = tom4)
192.168.231.102:3631:main:WordCountReducer@307605969:reduce()(key = tom6)
192.168.231.102:3631:main:WordCountReducer@307605969:reduce()(key = world)
192.168.231.102:3631:main:WordCountReducer@307605969:cleanup()()
========1.txt map===========
192.168.231.103:3498:main:WordCountMapper@1475192960:new()()
192.168.231.103:3498:main:WordCountMapper@1475192960:setup()()
192.168.231.103:3498:main:WordCountMapper@1475192960:map()(value = hello world tom1)
192.168.231.103:3498:main:WordCountMapper@1475192960:cleanup()()
========1.txt 1.combine
192.168.231.103:3498:main:WordCountReducer@1788380050:new()()
192.168.231.103:3498:main:WordCountReducer@1788380050:setup()()
192.168.231.103:3498:main:WordCountReducer@1788380050:reduce()(key = tom1)
192.168.231.103:3498:main:WordCountReducer@1788380050:cleanup()()
========1.txt 2.combine
192.168.231.103:3498:main:WordCountReducer@307605969:new()()
192.168.231.103:3498:main:WordCountReducer@307605969:setup()()
192.168.231.103:3498:main:WordCountReducer@307605969:reduce()(key = hello)
192.168.231.103:3498:main:WordCountReducer@307605969:reduce()(key = world)
192.168.231.103:3498:main:WordCountReducer@307605969:cleanup()()
========2.txt map
192.168.231.103:3499:main:WordCountMapper@1475192960:new()()
192.168.231.103:3499:main:WordCountMapper@1475192960:setup()()
192.168.231.103:3499:main:WordCountMapper@1475192960:map()(value = hello world tom2)
192.168.231.103:3499:main:WordCountMapper@1475192960:map()(value = hello world tom3)
192.168.231.103:3499:main:WordCountMapper@1475192960:cleanup()()
========2.txt 1.combiner
192.168.231.103:3499:main:WordCountReducer@1788380050:new()()
192.168.231.103:3499:main:WordCountReducer@1788380050:setup()()
192.168.231.103:3499:main:WordCountReducer@1788380050:reduce()(key = tom3)
192.168.231.103:3499:main:WordCountReducer@1788380050:cleanup()()
========2.txt 2.combiner
192.168.231.103:3499:main:WordCountReducer@307605969:new()()
192.168.231.103:3499:main:WordCountReducer@307605969:setup()()
192.168.231.103:3499:main:WordCountReducer@307605969:reduce()(key = hello)
192.168.231.103:3499:main:WordCountReducer@307605969:reduce()(key = tom2)
192.168.231.103:3499:main:WordCountReducer@307605969:reduce()(key = world)
192.168.231.103:3499:main:WordCountReducer@307605969:cleanup()()
################# reduce ##################
=========1.reduce
192.168.231.102:3692:main:WordCountReducer@95685867:new()()
192.168.231.102:3692:main:WordCountReducer@95685867:setup()()
192.168.231.102:3692:main:WordCountReducer@95685867:reduce()(key = tom1)
192.168.231.102:3692:main:WordCountReducer@95685867:reduce()(key = tom3)
192.168.231.102:3692:main:WordCountReducer@95685867:reduce()(key = tom5)
192.168.231.102:3692:main:WordCountReducer@95685867:cleanup()()
=========2.reduce
192.168.231.101:3686:main:WordCountReducer@95685867:new()()
192.168.231.101:3686:main:WordCountReducer@95685867:setup()()
192.168.231.101:3686:main:WordCountReducer@95685867:reduce()(key = hello)
192.168.231.101:3686:main:WordCountReducer@95685867:reduce()(key = how)
192.168.231.101:3686:main:WordCountReducer@95685867:reduce()(key = tom2)
192.168.231.101:3686:main:WordCountReducer@95685867:reduce()(key = tom4)
192.168.231.101:3686:main:WordCountReducer@95685867:reduce()(key = tom6)
192.168.231.101:3686:main:WordCountReducer@95685867:reduce()(key = world)
192.168.231.101:3686:main:WordCountReducer@95685867:cleanup()()