模拟分布式集群调试

    集群环境:namenode s100

                       2namenode s104

                        datanode  s101、s102、s105

   在完全分布式集群,为了就跟深入的剖析MapReduce计算模型的工作机制,通过一个wordcount案例来进行分布式调试:

观察每个主机运行的进程ID,线程,运行时的类,Hashcode及其运行的方法:

1、WordCountMapper


package hadoop.mr;

import com.it18zhang.debugtool.RTUtil;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * WordCountMapper
 */
public class WordCountMapper extends Mapper {
	
	public WordCountMapper(){
		RTUtil.sendInfo("s104",8888,this,"new()" , "");
	}

	protected void setup(Context context) throws IOException, InterruptedException {
		RTUtil.sendInfo("s104", 8888, this, "setup()", "");

	}

	protected void cleanup(Context context) throws IOException, InterruptedException {
		RTUtil.sendInfo("s104", 8888, this, "cleanup()", "");
	}

	/**
	 * 每一行
	 * @param key
     * @param value
	 * @param context
	 * @throws IOException
	 * @throws InterruptedException
	 */
	protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
		RTUtil.sendInfo("s104", 8888, this, "map()", "value = " + value.toString());
		String line = value.toString();
		String[] arr = line.split(" ");

		Text keyOut = new Text();
		IntWritable valueOut = new IntWritable(1);
		for(String word : arr){
			keyOut.set(word);
			context.write(keyOut,valueOut);
		}
	}
}

2、WordCountReducer

package hadoop.mr;

import com.it18zhang.debugtool.RTUtil;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * WordCountReducer
 */
public class WordCountReducer extends Reducer{

	public WordCountReducer(){
		RTUtil.sendInfo("s104", 8888, this, "new()", "");

	}

	protected void setup(Context context) throws IOException, InterruptedException {
		RTUtil.sendInfo("s104", 8888, this, "setup()", "");
	}

	protected void cleanup(Context context) throws IOException, InterruptedException {
		RTUtil.sendInfo("s104", 8888, this, "cleanup()", "");
	}

	protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
		RTUtil.sendInfo("s104", 8888, this, "reduce()", "key = " + key.toString());
		int count = 0 ;
		for(IntWritable iw : values){
			count = count + iw.get() ;
		}
		context.write(key,new IntWritable(count));
	}
}

3、App

package hadoop.mr;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 */
public class App {
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();

		FileSystem fs = FileSystem.get(conf);
		if(fs.exists(new Path(args[1]))){
			fs.delete(new Path(args[1]),true);
		}

		Job job = Job.getInstance(conf);
//		job.getConfiguration().setLong("fs.local.block.size", 1 * 1024 * 1024);

		//设置最大切片和最小切片
//		job.getConfiguration().set("mapreduce.input.fileinputformat.split.minsize","14");
//		job.getConfiguration().set("mapreduce.input.fileinputformat.split.maxsize","14");

//		FileInputFormat.setMinInputSplitSize(job,7);
//		FileInputFormat.setMaxInputSplitSize(job,7);

		job.setJobName("WordCount");
		job.setJarByClass(App.class);

		job.setMapperClass(WordCountMapper.class);
		job.setReducerClass(WordCountReducer.class);

		//添加输入路径
		FileInputFormat.addInputPath(job,new Path(args[0]));
		//设置输出路径
		FileOutputFormat.setOutputPath(job,new Path(args[1]));

		//设置mapreduce输出
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);

		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		//设置合成类
		job.setCombinerClass(WordCountReducer.class);

		job.setNumReduceTasks(2);

		//手动设置map的个数
//		job.getConfiguration().set(MRJobConfig.NUM_MAPS,"3");

		job.waitForCompletion(true);
	}
}

4、HDFS写入3个文件

1.txt        hello world  tom1

2.txt        hello world  tom2

               hello world  tom3

3.txt         hello world  tom4

                hello world  tom5

                hello world  tom6

5、在s104主机进行监听

  输入命令 nc -lk 8888

6、调试结果

观察每个主机运行的进程ID,线程,运行时的类,Hashcode及其运行的方法:

=======3.txt  1.map===========
192.168.231.102:3631:main:WordCountMapper@1475192960:new()()
192.168.231.102:3631:main:WordCountMapper@1475192960:setup()()
192.168.231.102:3631:main:WordCountMapper@1475192960:map()(value = how world tom4)
192.168.231.102:3631:main:WordCountMapper@1475192960:map()(value = how world tom5)
192.168.231.102:3631:main:WordCountMapper@1475192960:map()(value = how world tom6)
192.168.231.102:3631:main:WordCountMapper@1475192960:cleanup()()

=======3.txt  1.combine===========
192.168.231.102:3631:main:WordCountReducer@1788380050:new()()
192.168.231.102:3631:main:WordCountReducer@1788380050:setup()()
192.168.231.102:3631:main:WordCountReducer@1788380050:reduce()(key = tom5)
192.168.231.102:3631:main:WordCountReducer@1788380050:cleanup()()

=======3.txt  2.combine===========
192.168.231.102:3631:main:WordCountReducer@307605969:new()()
192.168.231.102:3631:main:WordCountReducer@307605969:setup()()
192.168.231.102:3631:main:WordCountReducer@307605969:reduce()(key = how)
192.168.231.102:3631:main:WordCountReducer@307605969:reduce()(key = tom4)
192.168.231.102:3631:main:WordCountReducer@307605969:reduce()(key = tom6)
192.168.231.102:3631:main:WordCountReducer@307605969:reduce()(key = world)
192.168.231.102:3631:main:WordCountReducer@307605969:cleanup()()

========1.txt map===========
192.168.231.103:3498:main:WordCountMapper@1475192960:new()()
192.168.231.103:3498:main:WordCountMapper@1475192960:setup()()
192.168.231.103:3498:main:WordCountMapper@1475192960:map()(value = hello world tom1)
192.168.231.103:3498:main:WordCountMapper@1475192960:cleanup()()

========1.txt 1.combine
192.168.231.103:3498:main:WordCountReducer@1788380050:new()()
192.168.231.103:3498:main:WordCountReducer@1788380050:setup()()
192.168.231.103:3498:main:WordCountReducer@1788380050:reduce()(key = tom1)
192.168.231.103:3498:main:WordCountReducer@1788380050:cleanup()()

========1.txt 2.combine
192.168.231.103:3498:main:WordCountReducer@307605969:new()()
192.168.231.103:3498:main:WordCountReducer@307605969:setup()()
192.168.231.103:3498:main:WordCountReducer@307605969:reduce()(key = hello)
192.168.231.103:3498:main:WordCountReducer@307605969:reduce()(key = world)
192.168.231.103:3498:main:WordCountReducer@307605969:cleanup()()

========2.txt map
192.168.231.103:3499:main:WordCountMapper@1475192960:new()()
192.168.231.103:3499:main:WordCountMapper@1475192960:setup()()
192.168.231.103:3499:main:WordCountMapper@1475192960:map()(value = hello world tom2)
192.168.231.103:3499:main:WordCountMapper@1475192960:map()(value = hello world tom3)
192.168.231.103:3499:main:WordCountMapper@1475192960:cleanup()()
========2.txt 1.combiner
192.168.231.103:3499:main:WordCountReducer@1788380050:new()()
192.168.231.103:3499:main:WordCountReducer@1788380050:setup()()
192.168.231.103:3499:main:WordCountReducer@1788380050:reduce()(key = tom3)
192.168.231.103:3499:main:WordCountReducer@1788380050:cleanup()()
========2.txt 2.combiner
192.168.231.103:3499:main:WordCountReducer@307605969:new()()
192.168.231.103:3499:main:WordCountReducer@307605969:setup()()
192.168.231.103:3499:main:WordCountReducer@307605969:reduce()(key = hello)
192.168.231.103:3499:main:WordCountReducer@307605969:reduce()(key = tom2)
192.168.231.103:3499:main:WordCountReducer@307605969:reduce()(key = world)
192.168.231.103:3499:main:WordCountReducer@307605969:cleanup()()

#################  reduce           ##################
=========1.reduce
192.168.231.102:3692:main:WordCountReducer@95685867:new()()
192.168.231.102:3692:main:WordCountReducer@95685867:setup()()
192.168.231.102:3692:main:WordCountReducer@95685867:reduce()(key = tom1)
192.168.231.102:3692:main:WordCountReducer@95685867:reduce()(key = tom3)
192.168.231.102:3692:main:WordCountReducer@95685867:reduce()(key = tom5)
192.168.231.102:3692:main:WordCountReducer@95685867:cleanup()()

=========2.reduce
192.168.231.101:3686:main:WordCountReducer@95685867:new()()
192.168.231.101:3686:main:WordCountReducer@95685867:setup()()
192.168.231.101:3686:main:WordCountReducer@95685867:reduce()(key = hello)
192.168.231.101:3686:main:WordCountReducer@95685867:reduce()(key = how)
192.168.231.101:3686:main:WordCountReducer@95685867:reduce()(key = tom2)
192.168.231.101:3686:main:WordCountReducer@95685867:reduce()(key = tom4)
192.168.231.101:3686:main:WordCountReducer@95685867:reduce()(key = tom6)
192.168.231.101:3686:main:WordCountReducer@95685867:reduce()(key = world)
192.168.231.101:3686:main:WordCountReducer@95685867:cleanup()()




你可能感兴趣的:(Hadoop实战)