2019年安徽省大数据网络赛数据预处理(-)

数据样式

{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276436920","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"NEWLIVEVIEW_QUIT_TAB","value":"0","du":""}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276436923","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"EVENT_ZIP_UPLOAD","value":"1","du":""},"properties":{"property1":"1"}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276844841","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"AMAP_LOCATION_UPDATE","value":"0","du":"446"},"properties":{"property1":"0","property3":"1"}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276844865","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"HTTP_START","value":"http:\/\/weather.api.moji.com\/data\/detail","du":""},"properties":{"property1":"1","property2":"0","property3":"{\"common\":{\"platform\":\"Android\",\"identifier\":\"869121033612809\",\"app_version\":\"1007090002\",\"os_version\":\"23\",\"device\":\"MYA-AL10\",\"pid\":\"5057\",\"language\":\"CN\",\"uid\":\"188495963831271424\",\"uaid\":\"0\",\"width\":720,\"height\":1192,\"package_name\":\"com.moji.mjweather\",\"amp\":\"1557276844828\",\"locationcity\":0,\"current_city\":2503,\"token\":\"ac96b2c49daaeb0e8fdc9671ede79022\"},\"params\":{\"city\":[{\"avatarId\":8,\"type\":1,\"lat\":31.28037,\"lon\":104.452387,\"coordinate\":2,\"location\":\"四川省德阳市罗江区G5京昆高速靠近侯家湾\",\"voice\":{\"lang\":\"CN\",\"tu\":\"c\",\"wu\":\"beau\"},\"cr\":1}]}}","property4":"1557276844829-f0fceefb1c2f4ef6a1fc271ed97a9bdf-188495963831271424"}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276845076","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"HTTP_UPDATE","value":"http:\/\/weather.api.moji.com\/data\/detail","du":"243"},"properties":{"property1":0,"property4":"1","property5":"1557276844829-f0fceefb1c2f4ef6a1fc271ed97a9bdf-188495963831271424","property6":"weather.api.moji.com\/111.13.70.18:80"}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276845226","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"WEATHER_UPDATE","value":"1","du":"327"},"properties":{"property1":1,"property2":-1,"property3":"http:\/\/weather.api.moji.com\/data\/detail","property4":"2","property5":"RequestParams:[city=[{\"avatarId\":8,\"type\":1,\"lat\":31.28037,\"lon\":104.452387,\"coordinate\":2,\"location\":\"四川省德阳市罗江区G5京昆高速靠近侯家湾\",\"voice\":{\"lang\":\"CN\",\"tu\":\"c\",\"wu\":\"beau\"},\"cr\":1}]], commonParams:{\"platform\":\"Android\",\"identifier\":\"869121033612809\",\"app_version\":\"1007090002\",\"os_version\":\"23\",\"device\":\"MYA-AL10\",\"pid\":\"5057\",\"language\":\"CN\",\"uid\":\"188495963831271424\",\"uaid\":\"0\",\"width\":720,\"height\":1192,\"package_name\":\"com.moji.mjweather\",\"amp\":\"1557276844828\",\"locationcity\":0,\"current_city\":2503,\"token\":\"ac96b2c49daaeb0e8fdc9671ede79022\"}","property6":"1557276844829-f0fceefb1c2f4ef6a1fc271ed97a9bdf-188495963831271424"}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276845304","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"SHOWER_CONDITION_CONSIS_MONITOR","value":"1","du":""},"properties":{"property1":0,"property2":0,"property3":"31.28037,104.452387"}}
{"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0","platform":"Android","app_version":"1007090002","net":"WIFI","pid":"5057","identifier":"869121033612809","cityid":"2503","iccid":"89860077221897301901","snsid":"","ts":"1557276845312","versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"LOCATION_UPDATE","value":"0","du":"1096"},"properties":{"property1":"0","property3":"1"}}

需求

需求一:

  • 如果数据中uid,platform,app_version,pid四个字段不同时出现,请过滤掉
  • 将数据字段中locationcity的值为0的全部替换为1

代码

package com.mr2;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class preOne {
       public static class MyMapper extends Mapper<LongWritable,Text,Text,NullWritable>
       {
    	   /*
    	    * 数据:
    	    * {"common":{"locationcity":0,"uid":"188495963831271424","uaid":"0",
    	    * "platform":"Android","app_version":"1007090002","net":"WIFI",
    	    * "pid":"5057","identifier":"869121033612809","cityid":"2503",
    	    * "iccid":"89860077221897301901","snsid":"","ts":"1557276436920",
    	    * "versionType":"1","pkg":"com.moji.mjweather"},"event":{"key":"NEWLIVEVIEW_QUIT_TAB","value":"0","du":""}}
    	    */
    	  
    	   protected void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException
    	   {
    		   StringBuffer k = new StringBuffer();
    		   String m = String.valueOf(value);
    		   /*利用if()实现规则一。*/
    		   if(m.contains("uid")&&m.contains("platform")&&m.contains("app_version")&&m.contains("pid"))
    		   {
    			  String[] n = m.split(",");
    			  /*将n[1]:{"common":{"locationcity":0在进行拆分*/
    			  String[] s = n[0].split(":");
    			  if(s[2].equals("0"))
    			  {
    				  s[2]="1";
    			  }
    			  for(int i=0;i<s.length-1;i++)
    			  {
    				  k.append(s[i]+":");
    			  }
    			  k.append(s[s.length-1]);
    			  for(int i=1;i<n.length;i++)
    			  {
    				  k.append(","+n[i]);
    			  }
    			  context.write(new Text(String.valueOf(k)), NullWritable.get());
    		   }
    		   
    	   }
       }
       public static void main(String[] args) throws IOException,InterruptedException, ClassNotFoundException
       {
    	   Configuration conf = new Configuration();
    	   Job job = Job.getInstance(conf,preOne.class.getSimpleName());
    	   job.setJarByClass(preOne.class);
    	   job.setMapperClass(MyMapper.class);
    	   /*采用默认的reducer类*/
    	   job.setReducerClass(Reducer.class);
    	   job.setMapOutputKeyClass(Text.class);
    	   job.setMapOutputValueClass(NullWritable.class);
    	   job.setOutputKeyClass(Text.class);
    	   job.setOutputValueClass(NullWritable.class);
    	   FileInputFormat.addInputPath(job,new Path(args[0]));
    	   FileOutputFormat.setOutputPath(job,new Path(args[1]));
    	   job.waitForCompletion(true);	   
       }
}

结果

2019年安徽省大数据网络赛数据预处理(-)_第1张图片

你可能感兴趣的:(MapReduce,mapreduce)