通过map reduce统计应用ip访问次数
1:数据源文件准备
数据源文件:access.log
内容如下:部分:
2:上传数据源文件到hdfs中
将此文件上传到hadoop hdfs文件系统中:
hadoop fs -put access.log /ip/input/
3:编写map reduce程序:
package com.wangmm.hadoop.mapreduce; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class IPCounter { static final String IN_PUT = "hdfs://icity0:9000/ip/input"; static final String OUT_PUT = "hdfs://icity0:9000/ip/output"; public static void main(String[] args) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); FileSystem fileSystem = FileSystem.get(new URI(IN_PUT), conf); Path outPath = new Path(OUT_PUT); if(fileSystem.exists(outPath)){ fileSystem.delete(outPath,true); } Job job = new Job(conf, IPCounter.class.getSimpleName()); job.setJarByClass(IPCounter.class); // FileInputFormat.addInputPath(job, new Path(IN_PUT)); FileInputFormat.setInputPaths(job, IN_PUT); job.setMapperClass(IpMapper.class); job.setReducerClass(IpReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileOutputFormat.setOutputPath(job, outPath); job.waitForCompletion(true); } static class IpMapper extends Mapper<LongWritable, Text, Text, LongWritable>{ protected void map(LongWritable key, Text value, org.apache.hadoop.mapreduce.Mapper<LongWritable,Text,Text,LongWritable>.Context context) throws java.io.IOException ,InterruptedException { System.out.println(value.toString()); String ip[] = value.toString().split("- -"); for (int i = 0; i < ip.length; i++) { System.out.print(ip[0].toString()); } System.out.println("------------------"); String ipStr = ip[0].toString().trim(); System.out.println(ipStr+"-----"); context.write(new Text(ipStr), new LongWritable(1)); }; } static class IpReduce extends Reducer<Text,LongWritable,Text, LongWritable>{ protected void reduce(Text k2, java.lang.Iterable<LongWritable> v2, org.apache.hadoop.mapreduce.Reducer<Text,LongWritable,Text,LongWritable>.Context context) throws java.io.IOException ,InterruptedException { Long counts = 0L; for (LongWritable v : v2) { counts += v.get(); } context.write(k2, new LongWritable(counts)); }; } }
工程截图:
4:运行map reduce程序
5:查看hdfs结果
[hadoop@icity0 ~]$ hadoop fs -cat /ip/output/part-r-00000 Warning: $HADOOP_HOME is deprecated. 172.16.17.166 31 172.16.17.198 79 172.16.18.148 186 172.16.16.219 1256 172.16.35.141 1102 172.16.35.146 2986 172.16.35.159 6