<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0modelVersion>
<groupId>com.ccj.pxjgroupId>
<artifactId>IPETLartifactId>
<version>1.0-SNAPSHOTversion>
<properties>
<project.build.sourceEncoding>UTF-8project.build.sourceEncoding>
<maven.compiler.source>1.8maven.compiler.source>
<maven.compiler.target>1.8maven.compiler.target>
<hadoop.version>2.6.0-cdh5.16.2hadoop.version>
<hive.version>1.1.0-cdh5.16.2hive.version>
properties>
<repositories>
<repository>
<id>clouderaid>
<url>http://repository.cloudera.com/artifactory/cloudera-repos/url>
repository>
repositories>
<dependencies>
<dependency>
<groupId>org.lionsoulgroupId>
<artifactId>ip2regionartifactId>
<version>1.7.2version>
dependency>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-clientartifactId>
<version>${hadoop.version}version>
dependency>
dependencies>
<build>
<pluginManagement>
<plugins>
<plugin>
<artifactId>maven-clean-pluginartifactId>
<version>3.1.0version>
plugin>
<plugin>
<artifactId>maven-resources-pluginartifactId>
<version>3.0.2version>
plugin>
<plugin>
<artifactId>maven-compiler-pluginartifactId>
<version>3.8.0version>
plugin>
<plugin>
<artifactId>maven-surefire-pluginartifactId>
<version>2.22.1version>
plugin>
<plugin>
<artifactId>maven-jar-pluginartifactId>
<version>3.0.2version>
plugin>
<plugin>
<artifactId>maven-install-pluginartifactId>
<version>2.5.2version>
plugin>
<plugin>
<artifactId>maven-deploy-pluginartifactId>
<version>2.8.2version>
plugin>
<plugin>
<artifactId>maven-site-pluginartifactId>
<version>3.7.1version>
plugin>
<plugin>
<artifactId>maven-project-info-reports-pluginartifactId>
<version>3.0.0version>
plugin>
plugins>
pluginManagement>
build>
project>
package com.ccj.pxj.pk;
import org.lionsoul.ip2region.DataBlock;
import org.lionsoul.ip2region.DbConfig;
import org.lionsoul.ip2region.DbSearcher;
import org.lionsoul.ip2region.Util;
import java.io.File;
import java.lang.reflect.Method;
public class IPUtils {
public static String parseIP(String ip){
//db
String dbPath = IPUtils.class.getResource("/ip2region.db").getPath();
// String dbPath = IPUtils.class.getClassLoader().getResource("ip2region.db").getFile();
File file = new File(dbPath);
if ( file.exists() == false ) {
System.out.println("Error: Invalid ip2region.db file");
}
//查询算法
int algorithm = DbSearcher.BTREE_ALGORITHM; //B-tree
//DbSearcher.BINARY_ALGORITHM //Binary
//DbSearcher.MEMORY_ALGORITYM //Memory
try {
DbConfig config = new DbConfig();
DbSearcher searcher = new DbSearcher(config, dbPath);
//define the method
Method method = null;
switch ( algorithm )
{
case DbSearcher.BTREE_ALGORITHM:
method = searcher.getClass().getMethod("btreeSearch", String.class);
break;
case DbSearcher.BINARY_ALGORITHM:
method = searcher.getClass().getMethod("binarySearch", String.class);
break;
case DbSearcher.MEMORY_ALGORITYM:
method = searcher.getClass().getMethod("memorySearch", String.class);
break;
}
DataBlock dataBlock = null;
if ( Util.isIpAddress(ip) == false ) {
System.out.println("Error: Invalid ip address");
}
dataBlock = (DataBlock) method.invoke(searcher, ip);
String IP = dataBlock.getRegion();
StringBuilder sb = new StringBuilder(IP);
sb.replace(IP.indexOf("|")+1,(IP.indexOf("|",IP.indexOf("|")+1)),"-");
String IPS = sb.substring(IP.indexOf("|", IP.indexOf("|") + 1) + 1);
String province = IPS.substring(0, IPS.indexOf("|"));
String city = IPS.substring(IPS.indexOf("|") + 1, IPS.indexOf("|", IPS.indexOf("|") + 1));
String isp = IPS.substring(IPS.indexOf("|", IPS.indexOf("|") + 1) + 1);
sb.setLength(0);
sb.append(province).append("|").append(city).append("|").append(isp);
return sb.toString();
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
/* public static void main(String[] args) {
System.out.println(IPUtils.parseIP("182.82.3.148"));
}*/
}
package com.ccj.pxj.pk;
public class Access {
//[01/02/2019:06:44:46 +0800] 121.77.248.104 - 651 - https://www.bilibili.com/video/av80522857 404 297 1204 MISS
private String ip; // 原始日志的字段 ==> 国家 省份 城市 运营商
private String proxyIp;
private long reponseTime;
private String referer;
private String method;
private String url; // 原始日志的字段 ==> http,domain,path
private String httpCode;
private long requestSize;
private long responseSize;
private String cache;
private String year;
private String month;
private String day;
private String province;
private String city;
private String isp;
private String http;
private String domain;
private String path; //==> params
public String getIp() {
return ip;
}
public void setIp(String ip) {
this.ip = ip;
}
public String getProxyIp() {
return proxyIp;
}
public void setProxyIp(String proxyIp) {
this.proxyIp = proxyIp;
}
public long getReponseTime() {
return reponseTime;
}
public void setReponseTime(long reponseTime) {
this.reponseTime = reponseTime;
}
public String getReferer() {
return referer;
}
public void setReferer(String referer) {
this.referer = referer;
}
public String getMethod() {
return method;
}
public void setMethod(String method) {
this.method = method;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getHttpCode() {
return httpCode;
}
public void setHttpCode(String httpCode) {
this.httpCode = httpCode;
}
public long getRequestSize() {
return requestSize;
}
public void setRequestSize(long requestSize) {
this.requestSize = requestSize;
}
public long getResponseSize() {
return responseSize;
}
public void setResponseSize(long responseSize) {
this.responseSize = responseSize;
}
public String getCache() {
return cache;
}
public void setCache(String cache) {
this.cache = cache;
}
public String getYear() {
return year;
}
public void setYear(String year) {
this.year = year;
}
public String getMonth() {
return month;
}
public void setMonth(String month) {
this.month = month;
}
public String getDay() {
return day;
}
public void setDay(String day) {
this.day = day;
}
public String getProvince() {
return province;
}
public void setProvince(String province) {
this.province = province;
}
public String getCity() {
return city;
}
public void setCity(String city) {
this.city = city;
}
public String getIsp() {
return isp;
}
public void setIsp(String isp) {
this.isp = isp;
}
public String getHttp() {
return http;
}
public void setHttp(String http) {
this.http = http;
}
public String getDomain() {
return domain;
}
public void setDomain(String domain) {
this.domain = domain;
}
public String getPath() {
return path;
}
public void setPath(String path) {
this.path = path;
}
public Access() {
}
public Access(String ip, String proxyIp, long reponseTime, String referer, String method, String url, String httpCode, long requestSize, long responseSize, String cache, String year, String month, String day, String province, String city, String isp, String http, String domain, String path) {
this.ip = ip;
this.proxyIp = proxyIp;
this.reponseTime = reponseTime;
this.referer = referer;
this.method = method;
this.url = url;
this.httpCode = httpCode;
this.requestSize = requestSize;
this.responseSize = responseSize;
this.cache = cache;
this.year = year;
this.month = month;
this.day = day;
this.province = province;
this.city = city;
this.isp = isp;
this.http = http;
this.domain = domain;
this.path = path;
}
@Override
public String toString() {
return ip + "\t" +
proxyIp + "\t" +
reponseTime + "\t" +
referer + "\t" +
method + "\t" +
url + "\t" +
httpCode + "\t" +
requestSize + "\t" +
responseSize +"\t" +
cache + "\t" +
province + "\t" +
city + "\t" +
isp + "\t" +
http + "\t" +
domain + "\t" +
path + "\t" +
year + "\t" +
month + "\t" +
day;
}
}
package com.ccj.pxj.pk;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class FileUtils {
public static void deleteOutput(Configuration configuration, String out) throws Exception{
FileSystem fileSystem = FileSystem.get(configuration);
Path path = new Path(out);
if(fileSystem.exists(path)){
fileSystem.delete(path,true);
}
}
}
package com.ccj.pxj.pk;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
public class LogMapper extends Mapper<LongWritable, Text,Text, NullWritable> {
private Access access;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
access=new Access();
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
try{
context.getCounter("etl","access_total").increment(1);
String log= value.toString();
String[] logs = log.split("\t");
String times = logs[0];
String ips = logs[1];
String proxyIp = logs[2];
String reponseTime = logs[3];
String referer = logs[4];
String method = logs[5];
String url = logs[6];
String httpcode = logs[7];
String requestsize = logs[8];
String responsesize = logs[9];
String cache = logs[10];
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("[dd/MM/yyyy:HH:mm:ss ZZZ]");
Date date = simpleDateFormat.parse(times);
Calendar calendar = Calendar.getInstance();
calendar.setTime(date);
int year = calendar.get(Calendar.YEAR);
int month = calendar.get(Calendar.MONTH) + 1;
int day = calendar.get(Calendar.DATE);
URL urls = new URL(url);
String http = urls.getProtocol();
String domain = urls.getAuthority();
String path = urls.getPath();
Access access = new Access();
access.setYear(String.valueOf(year));
access.setMonth(month<10?"0"+month:month+"");
access.setDay(day<10?"0"+day:day+"");
access.setProxyIp(proxyIp);
access.setReponseTime(Long.parseLong(reponseTime));
access.setReferer(referer);
access.setMethod(method);
access.setUrl(url);
access.setHttpCode(httpcode);
access.setRequestSize(Long.parseLong(requestsize));
String ip = IPUtils.parseIP(ips);
access.setResponseSize(Long.parseLong(responsesize));
context.getCounter("etl","access_format").increment(1);
String[] split = ip.split("\\|");
access.setProvince(split[0]);
access.setCity(split[1]);
access.setIsp(split[2]);
access.setCache(cache);
access.setHttp(http);
access.setDomain(domain);
access.setPath(path);
access.setIp(ip);
context.write(new Text(access.toString()),NullWritable.get());
}catch (Exception e){
context.getCounter("etl","access_error").increment(1);
e.printStackTrace();
}
}
}
package com.ccj.pxj.pk;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.CounterGroup;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Iterator;
public class LogDriver2 extends Configured implements Tool {
private static Logger logger= LoggerFactory.getLogger("LogDriver2");
public static void main(String[] args)throws Exception {
int run = ToolRunner.run(new Configuration(), new LogDriver2(), args);
System.exit(run );
}
@Override
public int run(String[] args) throws Exception {
String InputPath=args[0];
String OutPath=args[1];
/*String InputPath="hdfs://pxj:9000/user/pxj/pxj/app/offline-dw/data";
String OutPath="hdfs://pxj:9000/user/pxj/pxj/app/offline-dw/out";*/
// 1)获取Job对象
Configuration configuration = super.getConf();
Job job = Job.getInstance(configuration);
FileUtils.deleteOutput(configuration, OutPath);
// 2)本job对应要执行的主类是哪个
job.setJarByClass(LogDriver2.class);
// 3)设置Mapper和Reducer
job.setMapperClass(LogMapper.class);
// 4)设置Mapper阶段输出数据的类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
//设置Combiner
//
// 6)设置输入和输出路径
FileInputFormat.setInputPaths(job, new Path(InputPath));
FileOutputFormat.setOutputPath(job, new Path(OutPath));
// 7)提交作业
boolean result = job.waitForCompletion(true);
CounterGroup etl = job.getCounters().getGroup("etl");
Iterator<Counter> iterator = etl.iterator();
while (iterator.hasNext()){
Counter next = iterator.next();
System.out.println(next.getName() + "-->" + next.getValue());
logger.info(next.getName() + "-->" + next.getValue());
}
return 0;
}
}
[15/01/2020:22:40:42 +0800] 182.82.3.148 - 1425 - GET https://www.bilibili.com/video/av73376233 202 198 - HIT
[22/12/2019:02:10:37 +0800] 139.215.218.49 - 44 - GET https://www.bilibili.com/video/av76542615 200 276 1917 MISS
[27/01/2020:22:14:03 +0800] 210.33.139.98 - 2889 - POST https://www.bilibili.com/video/av76542615 200 167 438 MISS
[03/02/2020:02:41:04 +0800] 36.63.106.187 - 1482 - GET https://www.bilibili.com/video/av76542615 200 150 2618 MISS
[20/01/2018:21:41:06 +0800] 222.88.27.61 - 2334 - GET https://www.bilibili.com/video/av30031910 200 260 - MISS
[15/02/2020:19:40:23 +0800] 121.77.147.133 - 618 - POST https://www.bilibili.com/video/av52167219 404 226 4682 HIT
[26/01/2018:18:55:35 +0800] 171.14.50.145 - 987 - GET https://www.bilibili.com/video/av80522857 200 42 - MISS
[16/01/2020:02:41:04 +0800] 139.214.37.55 - 572 - POST https://ruoze.ke.qq.com 404 139 - MISS
[16/02/2019:02:26:03 +0800] 61.232.28.171 - 183 - POST https://www.bilibili.com/video/av80522857 500 3 - MISS
[16/02/2020:01:52:27 +0800] 36.62.235.207 - 465 - GET https://www.bilibili.com/video/av52167219 506 67 - MISS
[18/01/2018:02:41:04 +0800] 171.13.167.211 - 120 - POST https://www.bilibili.com/video/av34829124 202 188 3131 MISS
[16/02/2018:02:40:29 +0800] 171.11.12.169 - 2067 - GET https://www.bilibili.com/video/av34829124 200 294 - MISS
[09/02/2020:02:40:19 +0800] 36.59.81.58 - 366 - POST https://www.bilibili.com/video/av73376233 500 259 - HIT
[06/02/2019:20:04:06 +0800] 121.76.66.41 - 429 - GET https://www.bilibili.com/video/av73376233 506 213 - HIT
[04/01/2018:22:41:05 +0800] 36.61.136.75 - 2573 - GET https://www.bilibili.com/video/av80522857 500 114 1258 MISS
[15/02/2020:21:53:48 +0800] 171.15.138.128 - 1000 - POST https://ruoze.ke.qq.com 506 243 2470 MISS
[02/02/2019:02:40:49 +0800] 222.79.99.1 - 2239 - GET https://ruoze.ke.qq.com 200 230 2240 MISS
[29/01/2020:02:38:06 +0800] 121.77.237.110 - 517 - GET https://www.bilibili.com/video/av52167219 506 138 1136 MISS
[06/02/2018:02:17:03 +0800] 123.234.170.91 - 603 - GET https://ruoze.ke.qq.com 202 107 - HIT
[28/01/2020:02:41:03 +0800] 182.83.239.168 - 2160 - POST https://www.bilibili.com/video/av34829124 404 35 4113 MISS
[04/02/2020:00:00:43 +0800] 121.77.104.20 - 2450 - POST https://ruoze.ke.qq.com 202 194 4828 MISS
[16/02/2020:02:41:03 +0800] 171.14.120.192 - 1318 - POST https://www.bilibili.com/video/av34829124 500 224 1549 MISS
[19/12/2017:22:51:44 +0800] 36.61.143.111 - 725 - GET https://ruoze.ke.qq.com 200 174 1361 HIT
[18/01/2020:23:41:06 +0800] 139.214.152.182 - 53 - GET https://www.bilibili.com/video/av76542615 404 128 - HIT
[03/01/2020:00:41:07 +0800] 210.27.223.68 - 1065 - POST https://www.bilibili.com/video/av76542615 506 157 - HIT
[16/02/2019:02:41:04 +0800] 171.8.149.129 - 2081 - POST https://www.bilibili.com/video/av30031910 506 8 2961 HIT
[13/02/2020:02:41:07 +0800] 121.76.103.138 - 887 - POST https://www.bilibili.com/video/av30031910 500 4 - HIT
[05/01/2019:02:41:05 +0800] 182.90.180.94 - 838 - GET https://www.bilibili.com/video/av76542615 202 34 1170 MISS
[16/02/2020:02:34:25 +0800] 123.235.173.110 - 1668 - GET https://www.bilibili.com/video/av73376233 200 237 - HIT
[12/02/2020:02:03:00 +0800] 222.77.179.225 - 1295 - POST https://www.bilibili.com/video/av52167219 202 1 1345 HIT
[07/02/2018:19:21:22 +0800] 61.233.188.93 - 2652 - POST https://www.bilibili.com/video/av30031910 506 98 3506 HIT
[11/02/2019:01:40:38 +0800] 182.91.225.89 - 2619 - GET https://www.bilibili.com/video/av73376233 506 108 - MISS
[16/02/2020:02:41:05 +0800] 222.94.117.246 - 2082 - GET https://www.bilibili.com/video/av73376233 202 213 3334 HIT
[16/02/2018:02:40:57 +0800] 106.89.69.81 - 2380 - GET https://www.bilibili.com/video/av52167219 506 140 1719 MISS
[16/02/2020:02:41:05 +0800] 139.196.86.93 - 1784 - POST http://www.ruozedata.com 500 65 - MISS
[15/01/2020:22:29:06 +0800] 210.38.109.213 - 2791 - GET https://www.bilibili.com/video/av30031910 506 143 3052 MISS
[16/02/2020:02:41:06 +0800] 123.234.46.110 - 2315 - GET https://www.bilibili.com/video/av76542615 200 136 3662 HIT
[22/01/2020:00:02:16 +0800] 121.76.226.122 - 1356 - GET http://www.ruozedata.com 202 17 4435 MISS
[24/01/2020:02:41:06 +0800] 123.235.227.183 - 382 - GET https://www.bilibili.com/video/av34829124 506 177 - MISS
[15/02/2019:22:41:04 +0800] 106.92.141.175 - 153 - POST https://www.bilibili.com/video/av80522857 404 1 3425 MISS
[16/02/2019:02:41:05 +0800] 171.10.3.71 - 1697 - POST https://www.bilibili.com/video/av52167219 404 255 - MISS
[15/02/2020:22:20:04 +0800] 139.208.181.68 - 64 - POST https://www.bilibili.com/video/av52167219 500 62 - MISS
[13/02/2020:20:40:29 +0800] 61.236.83.33 - 567 - POST https://www.bilibili.com/video/av34829124 506 91 - HIT
[28/01/2020:02:40:25 +0800] 123.234.219.81 - 331 - GET https://www.bilibili.com/video/av52167219 506 78 - MISS
[16/02/2019:02:19:46 +0800] 171.10.57.64 - 2478 - GET https://www.bilibili.com/video/av30031910 200 129 - HIT
[30/12/2019:02:33:50 +0800] 171.13.243.236 - 2730 - POST https://ruoze.ke.qq.com 500 34 - MISS
[16/02/2020:02:41:03 +0800] 182.81.91.30 - 518 - GET https://www.bilibili.com/video/av30031910 200 178 - HIT
[16/02/2020:02:41:06 +0800] 61.236.246.172 - 246 - GET https://www.bilibili.com/video/av52167219 202 234 2256 HIT
[16/02/2020:02:40:23 +0800] 36.58.17.244 - 1872 - GET https://ruoze.ke.qq.com 202 244 - MISS
[16/02/2020:00:17:04 +0800] 121.76.207.213 - 2839 - POST https://www.bilibili.com/video/av76542615 200 113 - HIT
[pxj@pxj /home/pxj/app/offline-dw/lib]$export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:ip2region-1.7.2.jar:ip2region.db
[pxj@pxj /home/pxj/app/offline-dw/lib]$hadoop jar IPETL-1.0-SNAPSHOT.jar com.ccj.pxj.pk.LogDriver2 -libjars ip2region-1.7.2.jar,ip2region.db /user/pxj/pxj/app/offline-dw/data/access.log /user/pxj/pxj/app/offline-dw/outpath
20/02/17 12:40:36 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
20/02/17 12:40:38 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032
20/02/17 12:40:40 INFO input.FileInputFormat: Total input paths to process : 1
20/02/17 12:40:41 INFO mapreduce.JobSubmitter: number of splits:1
20/02/17 12:40:41 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1581901965461_0003
20/02/17 12:40:42 INFO impl.YarnClientImpl: Submitted application application_1581901965461_0003
20/02/17 12:40:42 INFO mapreduce.Job: The url to track the job: http://pxj:38088/proxy/application_1581901965461_0003/
20/02/17 12:40:42 INFO mapreduce.Job: Running job: job_1581901965461_0003
20/02/17 12:40:55 INFO mapreduce.Job: Job job_1581901965461_0003 running in uber mode : false
20/02/17 12:40:55 INFO mapreduce.Job: map 0% reduce 0%
20/02/17 12:41:08 INFO mapreduce.Job: map 100% reduce 0%
20/02/17 12:41:19 INFO mapreduce.Job: map 100% reduce 100%
20/02/17 12:41:20 INFO mapreduce.Job: Job job_1581901965461_0003 completed successfully
20/02/17 12:41:20 INFO mapreduce.Job: Counters: 52
File System Counters
FILE: Number of bytes read=4233
FILE: Number of bytes written=297683
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=5584
HDFS: Number of bytes written=4131
HDFS: Number of read operations=6
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
Job Counters
Launched map tasks=1
Launched reduce tasks=1
Data-local map tasks=1
Total time spent by all maps in occupied slots (ms)=11821
Total time spent by all reduces in occupied slots (ms)=6834
Total time spent by all map tasks (ms)=11821
Total time spent by all reduce tasks (ms)=6834
Total vcore-milliseconds taken by all map tasks=11821
Total vcore-milliseconds taken by all reduce tasks=6834
Total megabyte-milliseconds taken by all map tasks=12104704
Total megabyte-milliseconds taken by all reduce tasks=6998016
Map-Reduce Framework
Map input records=50
Map output records=24
Map output bytes=4155
Map output materialized bytes=4233
Input split bytes=124
Combine input records=0
Combine output records=0
Reduce input groups=24
Reduce shuffle bytes=4233
Reduce input records=24
Reduce output records=24
Spilled Records=48
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=883
CPU time spent (ms)=6370
Physical memory (bytes) snapshot=460800000
Virtual memory (bytes) snapshot=5547888640
Total committed heap usage (bytes)=399507456
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
etl
access_error=26
access_format=24
access_total=50
File Input Format Counters
Bytes Read=5460
File Output Format Counters
Bytes Written=4131
access_error-->26
20/02/17 12:41:20 INFO LogDriver2: access_error-->26
access_format-->24
20/02/17 12:41:20 INFO LogDriver2: access_format-->24
access_total-->50
20/02/17 12:41:20 INFO LogDriver2: access_total-->50
[pxj@pxj /home/pxj/app/offline-dw/lib]$hadoop fs -text /user/pxj/pxj/app/offline-dw/outpath/part*
20/02/17 12:53:01 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
上海|上海市|有线通 - 1356 - GET http://www.ruozedata.com 202 17 4435 MISS 上海 上海市 有线通 http www.ruozedata.com 2020 01 22
上海|上海市|有线通 - 2450 - POST https://ruoze.ke.qq.com 202 194 4828 MISS 上海 上海市 有线通 https ruoze.ke.qq.com 2020 02 04
上海|上海市|有线通 - 517 - GET https://www.bilibili.com/video/av52167219 506 138 1136 MISS 上海 上海市 有线通 https www.bilibili.com /video/av52167219 2020 01 29
上海|上海市|有线通 - 618 - POST https://www.bilibili.com/video/av52167219 404 226 4682 HIT 上海 上海市 有线通 https www.bilibili.com /video/av52167219 2020 02 15
吉林省|长春市|联通 - 44 - GET https://www.bilibili.com/video/av76542615 200 276 1917 MISS 吉林省 长春市 联通https www.bilibili.com /video/av76542615 2019 12 22
安徽省|合肥市|电信 - 2573 - GET https://www.bilibili.com/video/av80522857 500 114 1258 MISS 安徽省 合肥市 电信https www.bilibili.com /video/av80522857 2018 01 04
安徽省|合肥市|电信 - 725 - GET https://ruoze.ke.qq.com 200 174 1361 HIT 安徽省 合肥市 电信 https ruoze.ke.qq.com 2017 12 19
安徽省|安庆市|电信 - 1482 - GET https://www.bilibili.com/video/av76542615 200 150 2618 MISS 安徽省 安庆市 电信https www.bilibili.com /video/av76542615 2020 02 03
山东省|青岛市|联通 - 2315 - GET https://www.bilibili.com/video/av76542615 200 136 3662 HIT 山东省 青岛市 联通https www.bilibili.com /video/av76542615 2020 02 16
广东省|广州市|教育网 - 2791 - GET https://www.bilibili.com/video/av30031910 506 143 3052 MISS 广东省 广州市 教育网 https www.bilibili.com /video/av30031910 2020 01 15
广西|梧州市|联通 - 838 - GET https://www.bilibili.com/video/av76542615 202 34 1170 MISS 广西 梧州市 联通https www.bilibili.com /video/av76542615 2019 01 05
江苏省|南京市|电信 - 2082 - GET https://www.bilibili.com/video/av73376233 202 213 3334 HIT 江苏省 南京市 电信https www.bilibili.com /video/av73376233 2020 02 16
河南省|信阳市|电信 - 1318 - POST https://www.bilibili.com/video/av34829124 500 224 1549 MISS 河南省 信阳市 电信https www.bilibili.com /video/av34829124 2020 02 16
河南省|郑州市|电信 - 2081 - POST https://www.bilibili.com/video/av30031910 506 8 2961 HIT 河南省 郑州市 电信https www.bilibili.com /video/av30031910 2019 02 16
河南省|驻马店市|电信 - 1000 - POST https://ruoze.ke.qq.com 506 243 2470 MISS 河南省 驻马店市 电信 https ruoze.ke.qq.com 2020 02 15
河南省|驻马店市|电信 - 120 - POST https://www.bilibili.com/video/av34829124 202 188 3131 MISS 河南省 驻马店市 电信 https www.bilibili.com /video/av34829124 2018 01 18
浙江省|舟山市|教育网 - 2889 - POST https://www.bilibili.com/video/av76542615 200 167 438 MISS 浙江省 舟山市 教育网 https www.bilibili.com /video/av76542615 2020 01 27
福建省|厦门市|电信 - 2239 - GET https://ruoze.ke.qq.com 200 230 2240 MISS 福建省 厦门市 电信 https ruoze.ke.qq.com 2019 02 02
福建省|福州市|电信 - 1295 - POST https://www.bilibili.com/video/av52167219 202 1 1345 HIT 福建省 福州市 电信https www.bilibili.com /video/av52167219 2020 02 12
重庆|重庆市|电信 - 153 - POST https://www.bilibili.com/video/av80522857 404 1 3425 MISS 重庆 重庆市 电信https www.bilibili.com /video/av80522857 2019 02 15
重庆|重庆市|电信 - 2380 - GET https://www.bilibili.com/video/av52167219 506 140 1719 MISS 重庆 重庆市 电信https www.bilibili.com /video/av52167219 2018 02 16
陕西省|商洛市|陕西广电 - 2160 - POST https://www.bilibili.com/video/av34829124 404 35 4113 MISS 陕西省 商洛市 陕西广电 https www.bilibili.com /video/av34829124 2020 01 28
陕西省|汉中市|铁通 - 246 - GET https://www.bilibili.com/video/av52167219 202 234 2256 HIT 陕西省 汉中市 铁通https www.bilibili.com /video/av52167219 2020 02 16
黑龙江省|哈尔滨市|铁通 - 2652 - POST https://www.bilibili.com/video/av30031910 506 98 3506 HIT 黑龙江省 哈尔滨市 铁通 https www.bilibili.com /video/av30031910 2018 02 07
作者:pxj(潘陈)
日期:2020-02-17 下午12:54:32
你若安好便是晴天,愿汝一切安好!