步骤四、编写打包程序
因为mapreduce需要运行在hadoop环境下,所以需要把mapreduce程序打成jar包并通过jobconf设置。这样在jobclient提交job任务是会自动把jar文件生成的hadoop文件系统并解压缩,通过反射的方式调用mapreduce程序的main函数执行
//本方式只实现了一级目录,没有递归打包子目录。
public static File createJar(Class> clazz)
{
try
{
String pathName = clazz.getPackage().getName().replace(".", "/");
final String fileName = clazz.getName().substring(clazz.getName().lastIndexOf(".")+1);
final File file = File.createTempFile("MapreduceJar-"+fileName, ".jar",new File("."));
Runtime.getRuntime().addShutdownHook(new Thread(){
@Override
public void run() {
file.delete();
}
});
Manifest manifest = new Manifest();
manifest.getMainAttributes().putValue("Manifest-Version", "1.0");
manifest.getMainAttributes().putValue("Created-By", "ejtoo");
JarOutputStream out = new JarOutputStream(new FileOutputStream(file), manifest);
File dir = new File(clazz.getResource("").getFile());
File[] names = dir.listFiles();
for(File name : names)
{
JarEntry entry = new JarEntry(pathName+"/"+name.getName());
out.putNextEntry(entry);
FileInputStream in = new FileInputStream(name);
byte[] buffer = new byte[1024];
int n = in.read(buffer);
while (n != -1) {
out.write(buffer, 0, n);
n = in.read(buffer);
}
in.close();
}
out.flush();
out.close();
return file;
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
return null;
}
}
步骤五、编写mapreduce程序,在执行job前需要首先执行打包方法,另外需要实现自己的map和reduce类
package com.ejtoo;
import java.io.File;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Maptest extends Configured implements Tool {
public static class Map extends Mapper
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
System.out.println("line===>" + line);
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
word.set(tokenizer.nextToken());
context.write(word, one);
}
}
}
public static class Reduce extends Reducer
public void reduce(Text key, Iterable
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
context.write(key, new IntWritable(sum));
}
}
public int run(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
System.out.println(conf.get("mapred.job.tracker"));
System.out.println(conf.get("fs.default.name"));
File jarFile =createJar(Maptest.class);
((JobConf) job.getConfiguration()).setJar(jarFile.toString());
job.setJarByClass(Maptest.class);
job.setJobName("wordcount");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path("test/jjj.txt"));//指定在hadoop文件系统的文件
Path path = new Path("output");
FileSystem fSystem = (DistributedFileSystem)FileSystem.get(conf);
fSystem.delete(path, true);
FileOutputFormat.setOutputPath(job, path);
boolean success = job.waitForCompletion(true);
System.out.println(job.isComplete());
System.out.println("JobID: " + job.getJobID());
job.getJobClient().close();//如果不通过jobclient关闭rpc,服务端会报错readAndProcess threw exception java.io.IOException: Connection reset by peer。该方法未开放
return success ? 0 : 1;
}
public static void main(String[] args) throws Exception {
/**
* TODO:调用一
*/
int ret = ToolRunner.run(new Maptest(), args);
System.exit(ret);
}
}
步骤五、提交执行mapreduce程序,并在浏览器通过http://8.8.8.7:50030查看job执行结果
问题处理,如果服务端jobtracker日志中有如下异常,则表示本地程序执行完毕异常关闭了与hadoop中IPC Server的连接,Server向客户端发送数据是异常。(连接被关闭)
2013-12-26 23:43:37,821 INFO org.apache.hadoop.ipc.Server: IPC Server listener on 9001: readAndProcess threw exception java.io.IOException: Connection reset by peer. Count of bytes read: 0
java.io.IOException: Connection reset by peer
at sun.nio.ch.FileDispatcher.read0(Native Method)
at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:21)
at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:233)
at sun.nio.ch.IOUtil.read(IOUtil.java:206)
at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:236)
at org.apache.hadoop.ipc.Server.channelRead(Server.java:1776)
at org.apache.hadoop.ipc.Server.access$2700(Server.java:97)
at org.apache.hadoop.ipc.Server$Connection.readAndProcess(Server.java:1134)
at org.apache.hadoop.ipc.Server$Listener.doRead(Server.java:577)
at org.apache.hadoop.ipc.Server$Listener$Reader.run(Server.java:384)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:619)
上述异常不影响功能,如果需要处理异常则需要使用job.getJobClient().close();方法,当前getJobClient方法未开放。