生产环境!实战!电信支付系统使用Hadoop分析Apache日志!

1、输入数据


1.2.3.4- - [20/Feb/2016:00:05:11 +0800] "POST /zhifubao/zhifu HTTP/1.1" 200 1286
1.2.3.4 - - [20/Feb/2016:00:05:14 +0800] "POST /pay/zf HTTP/1.1" 200 96
2.2.3.4 - - [20/Feb/2016:00:05:15 +0800] "POST /zhifubao/zhifu HTTP/1.1" 200 1290
2.2.3.4 - - [20/Feb/2016:00:05:18 +0800] "POST /pay2/pay.do HTTP/1.1" 200 32
1.2.4.4 - - [20/Feb/2016:00:05:22 +0800] "POST /zhifubao/zhifu HTTP/1.1" 200 1285
1.2.3.5 - - [20/Feb/2016:00:05:23 +0800] "POST /zhifubao/zhifu HTTP/1.1" 200 1291
1.3.3.4 - - [20/Feb/2016:00:05:25 +0800] "POST /pay2/pay.do HTTP/1.1" 200 1976

 

2、正则表达式 取IP 日期 支付方式

package com.dt.java.test;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class RhzfApacheLog {

 public static void main(String[] args) {
  // TODO Auto-generated method stub
  //String  a ="a\" ";
    // String pattern ="^(\\S+) (\\S+) (\\S+)\\[([\\w/]+)([\\w:/]+)\\s([+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+) \" (\\d{3}) (\\d+)";
  /* String pattern ="^(\\S+) (\\S+) (\\S+)";
       m.group():11.5.41.3 - -
     m.group(1):11.5.41.3
  m.group(2):-*/

     /*     String pattern ="^(\\S+) (\\S+) (\\S+)\\[([\\w/]+)([\\w:/]+)\\s([+\\-]\\d{4})\\]";
 
         m.group():11.7.11.3 - - [18/Feb/2016:00:00:55 +0800]
    m.group(1):11.7.11.3
    m.group(2):-
    m.group(3):-
    m.group(4):18/Feb/2016
    m.group(5)::00:00:55
    m.group(6):+0800

    捕获个数:groupCount()=6*/
 
  /* String pattern ="^(\\S+) (\\S+) (\\S+)\\[([\\w/]+)([\\w:/]+)\\s([+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\"";
   m.group():11.5.41.3 - - [18/Feb/2016:00:00:55 +0800] "POST /zhifubao/pay HTTP/1.1"
     m.group(1):11.5.41.3
     m.group(2):-
     m.group(3):-
     m.group(4):18/Feb/2016
     m.group(5)::00:00:55
     m.group(6):+0800
     m.group(7):POST
     m.group(8):/zhifubao/pay
     m.group(9):HTTP/1.1

     捕获个数:groupCount()=9*/
 
 /* String pattern ="^(\\S+) (\\S+) (\\S+)\\[([\\w/]+)([\\w:/]+)\\s([+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) (\\d+)";
 
  m.group():11.5.41.3 - - [18/Feb/2016:00:00:55 +0800] "POST /zhifubao/pay HTTP/1.1" 200 1285
  m.group(1):11.5.41.3
  m.group(2):-
  m.group(3):-
  m.group(4):18/Feb/2016
  m.group(5)::00:00:55
  m.group(6):+0800
  m.group(7):POST
  m.group(8):/zhifubao/pay
  m.group(9):HTTP/1.1
  m.group(10):200
  m.group(11):1285

  捕获个数:groupCount()=11
*/
 
  String pattern ="^(\\S+) (\\S+) (\\S+)\\[([\\w/]+)([\\w:/]+)\\s([+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) (\\d+)";
 
  Pattern p = Pattern.compile(pattern);

   String s = "11.5.41.3 - - [18/Feb/2016:00:00:55 +0800] \"POST /zhifubao/pay HTTP/1.1\" 200 1285";

 Matcher m = p.matcher(s);
 while(m.find())

{

 System.out.println("m.group():"+m.group()); //打印一个大组

System.out.println("m.group(1):"+m.group(1)); //打印组1

 System.out.println("m.group(2):"+m.group(2)); //打印组2
 System.out.println("m.group(3):"+m.group(3));
 System.out.println("m.group(4):"+m.group(4));
 System.out.println("m.group(5):"+m.group(5));
 System.out.println("m.group(6):"+m.group(6));
 
 System.out.println("m.group(7):"+m.group(7));
 System.out.println("m.group(8):"+m.group(8));
 System.out.println("m.group(9):"+m.group(9));
 System.out.println("m.group(10):"+m.group(10));
 System.out.println("m.group(11):"+m.group(11));
System.out.println();

}
System.out.println("捕获个数:groupCount()="+m.groupCount());
 
 
 

 


}}


 

 

3、输出结果

 

1.1.1.2||18/Feb/2016||/zhifubao/zhifu    50000
1.1.1.2||19/Feb/2016||/zhifubao/zhifu    60000
1.1.1.3||18/Feb/2016||/zhifubao/zhifu    70000
1.1.1.3||19/Feb/2016||/zhifubao/zhifu    50000
1.1.1.1||18/Feb/2016||/zhifubao/zhifu    60000
1.1.1.1||19/Feb/2016||/zhifubao/zhifu    70000
1.1.1.2||18/Feb/2016||/zhifubao/zhifu    70000
1.1.1.2||19/Feb/2016||/zhifubao/zhifu    50000

 

4、源代码

package com.dtspark.hadoop.hellomapreduce;

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class RhzfApacheURLLog {
 
  public static class DataMapper
     extends Mapper<LongWritable, Text, Text, LongWritable>{
  private String pattern ="^(\\S+) (\\S+) (\\S+)\\[([\\w/]+)([\\w:/]+)\\s([+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) (\\d+)";
  
   private Pattern p = Pattern.compile(pattern);
   private LongWritable resultValue = new LongWritable(1);
 private Text text =new Text();
  
   public void map(LongWritable key, Text value, Context context
                   ) throws IOException, InterruptedException {
 
    System.out.println("Map Methond Invoked!!!");
    String line =value.toString();
    String result = handleLine(line);
    if (result != null &&  result.length() > 0 ){
     text.set(result);
     context.write(text, resultValue);
    }
    }
 private String handleLine(String line) {
 
  String handResult = null;
  if(line.length()>0){
   Matcher m = p.matcher(line);
   while(m.find())
  {    String mip =m.group(1);
    String mdate =m.group(4);
    String malipay =m.group(8);
    handResult = mip.trim() +"||"+mdate.trim() + "||"+ malipay.trim();
 
  System.out.println("m.group(1):"+m.group(1)); //打印组1
   System.out.println("m.group(4):"+m.group(4));
     System.out.println("m.group(8):"+m.group(8));  

  } 
   
  }
  
  return handResult;
 }
 
  }

public static class DataReducer
     extends Reducer<Text,LongWritable,Text, LongWritable> {
  private LongWritable totalresultValue = new LongWritable(1);
 

 public void reduce(Text key, Iterable<LongWritable> values,
                      Context context
                      ) throws IOException, InterruptedException {
    System.out.println("Reduce Methond Invoked!!!" );
    int total =0;
    for (LongWritable item : values){
     total += item.get();
    }
    totalresultValue.set(total);
    context.write(key, totalresultValue);
  
   }
 
 
}

public static void main(String[] args) throws Exception {
 
 
 
  Configuration conf = new Configuration();

  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length < 2) {
    System.err.println("Usage: RhzfApacheURLLog <in> [<in>...] <out>");
    System.exit(2);
  }

  Job job = Job.getInstance(conf, "RhzfApacheURLLog");
  job.setJarByClass(RhzfApacheURLLog.class);
  job.setMapperClass(DataMapper.class);
  job.setReducerClass(DataReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);
  for (int i = 0; i < otherArgs.length - 1; ++i) {
    FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
  }
  FileOutputFormat.setOutputPath(job,
    new Path(otherArgs[otherArgs.length - 1]));
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}

}

 

 

 

 

 

 

 

你可能感兴趣的:(生产环境!实战!电信支付系统使用Hadoop分析Apache日志!)