前面写一篇hadoop学习记录-安装的文章发现其实没有必要这么详细,我其实就是想记录那些自己的见解和学习心得也是为了日后的复习,所以没有必要写的那么详尽。
slaves.sh uptime | sort:检查hadoop datanode节点启动时间等信息。
slaves.sh jps | sort:检查各个datanode节点进程启动情况
在windows上开发hadoop程序的过程中,很多人喜欢在eclipse上安装插件实现远程调试,其实不然,
可以利用ant来实现同样的功能,通过ant将程序打包以后使用scp将jar包传送到hadoop平台运行即可,
这个过程ant需要引用jsch这个第三方jar包,然后才能在build.xml脚本中使用scp和sshexec:
<target name="scp" depends="jar" description="将项目jar文件通过ssh拷贝到远程服务器指定目录下">
<scp file="${product}/${jar}" todir="root:liuweicai@master:~" trust="true" />
</target>
<target name="sshexec" depends="scp" description="通过ssh远程执行hadoop任务" >
<sshexec host="master" username="root" password="liuweicai" command="hadoop jar ${jar} -jt master:9001 /cache /output " trust="true"/>
</target>
这样就能通过ssh协议远程运行hadoop的job了!
在编写hadoop程序的运行驱动程序Driver时,推荐使用继承Configured和实现Tool接口,如以下面参考代码:
package com.ims.hadoop.commentwordcount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class CommentWordCountDriver extends Configured implements Tool {
public static void main(String[] args) throws Exception{
int res = ToolRunner.run(new CommentWordCountDriver(), args);
System.exit(res);
}
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
if(args.length != 2){
System.err.printf("Usage: %s [generic options] <input> <output>\n",getClass().getSimpleName());
ToolRunner.printGenericCommandUsage(System.err);
return -1;
}
Path in = new Path(args[0]);
Path out = new Path(args[1]);
FileSystem fs = FileSystem.get(conf);
if(fs.exists(out))
fs.delete(out, true);
conf.setBoolean("mapred.output.compress", true);
conf.setClass("mapred.output.compression.codec", GzipCodec.class,CompressionCodec.class);
Job job = new Job(conf,"StackOverFlow Comment Word Count");
job.setJarByClass(CommentWordCount.class);
job.setMapperClass(WordCountMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, in);
FileOutputFormat.setOutputPath(job, out);
return (job.waitForCompletion(true) ? 0 : 1);
}
}