MapReduce练习(一)
路由日志:
如下:
Apr 23 11:49:54 hostapd: wlan0: STA14:7d:c5:9e:fd:84
Apr 23 11:49:52 hostapd: wlan0: STA74:e5:0b:04:28:f2
Apr 23 11:49:50 hostapd: wlan0: STAcc:af:78:cc:d5:5d
Apr 23 11:49:44 hostapd: wlan0: STAcc:af:78:cc:d5:5d
Apr 23 11:49:52 hostapd: wlan0: STA74:e5:0b:04:28:f2
Apr 23 11:49:54 hostapd: wlan0: STA14:7d:c5:9e:fd:84
要求:
提取时间和MAC地址,删除其它部分。
代码解析:
package hadoop; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class Luyou extends Configured implements Tool{ //Counter即是一个计数器,可以记录这个程序的一些数据用于统计 enum Counter{ LINESKIP, //出错的行 } public static class Map extends Mapper<LongWritable,Text,NullWritable,Text>{ public void map(LongWritable key,Text value,Context context) throws IOException{ //读取源文件,line得到的就是输入文件的一行数据 String line=value.toString(); try{ //对源数据进行分割和重组 String [] lineSplit=line.split(" "); String month =lineSplit[0]; String time=lineSplit[1]; String mac=lineSplit[6]; Text out=new Text(month+' '+time+' '+mac); //把两个参数分别作为KEY和VALUE输出 context.write(NullWritable.get(), out); }catch(Exception e){ //如果发生异常,则指定计数器中的LINESKIP自增 context.getCounter(Counter.LINESKIP).increment(1); return; } } } public int run(String[] args) throws Exception { Configuration conf=getConf(); //任务名 Job job=new Job(conf,"Luyou"); //指定Class,必须是当前所在的Class名 job.setJarByClass(Luyou.class); //输入路径 FileInputFormat.addInputPath(job,new Path(args[0])); //输出路径 FileOutputFormat.setOutputPath(job, new Path(args[1])); //调用上面Map类作为Map任务代码 job.setMapperClass(Map.class); job.setOutputFormatClass(TextOutputFormat.class); //指定输出KEY格式 job.setOutputKeyClass(NullWritable.class); //指定输出VALUE格式 job.setOutputValueClass(Text.class); job.waitForCompletion(true); //输出任务完成情况 System.out.println("任务名称"+job.getJobName()); System.out.println("任务成功"+(job.isSuccessful()?"是":"否")); System.out.println("跳过的行"+job.getCounters().findCounter(Counter.LINESKIP).getValue()); return job.isSuccessful()?0:1; } public static void main(String args[]) throws Exception{ //在main函数调用run方法,启动一个mr任务 int res=ToolRunner.run(new Configuration(), new Luyou(),args); System.exit(res); } }
启动hadoop:
路由日志上传到hdfs:
导入的包:
Map函数:
Run方法:
主函数:
类名和记录出错的小函数:
设置hdfs输入输出路径:
运行结果:
如下:
如下:
结果: