1
2
3
4
5
|
log_format main
'$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for" '
'"$gzip_ratio" $request_time $bytes_sent $request_length '
'"$upstream_addr" $upstream_status $upstream_response_time'
;
|
1
2
3
4
5
6
7
8
9
10
11
12
13
|
8.8.8.8 - - [22
/Aug/2014
:20:23:45 +0800]
"GET / HTTP/1.1"
200 2373
"-"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36"
"-"
"2.78"
0.004 2683 369
"unix:/var/run/php5-fpm.sock"
200 0.004
8.8.8.8 - - [22
/Aug/2014
:20:23:45 +0800]
"GET 8.8.8.8/b519d8ca/css/base.css HTTP/1.1"
200 940
"http://xxx.com/"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36"
"-"
"-"
0.000 1247 373
"-"
- -
8.8.8.8 - - [22
/Aug/2014
:20:23:45 +0800]
"GET 8.8.8.8/a3e2e507/jquery.min.js HTTP/1.1"
200 93636
"http://xxx.com/"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36"
"-"
"-"
0.152 93976 359
"-"
- -
8.8.8.8 - - [22
/Aug/2014
:20:23:45 +0800]
"GET 8.8.8.8/b519d8ca/image/logo.png HTTP/1.1"
200 6059
"http://xxx.com/"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36"
"-"
"-"
0.000 6369 377
"-"
- -
8.8.8.8 - - [22
/Aug/2014
:20:23:45 +0800]
"GET 8.8.8.8/b519d8ca/image/p02.jpg HTTP/1.1"
200 22177
"http://xxx.com/"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36"
"-"
"-"
0.000 22489 376
"-"
- -
8.8.8.8 - - [22
/Aug/2014
:20:23:45 +0800]
"GET 8.8.8.8/b519d8ca/image/p03.png HTTP/1.1"
200 3012
"http://xxx.com/"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36"
"-"
"-"
0.000 3321 376
"-"
- -
8.8.8.8 - - [22
/Aug/2014
:20:23:45 +0800]
"GET 8.8.8.8/b519d8ca/image/two-dimension-code1.png HTTP/1.1"
200 761
"http://xxx.com/"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36"
"-"
"-"
0.000 1069 392
"-"
- -
8.8.8.8 - - [22
/Aug/2014
:20:23:45 +0800]
"GET 8.8.8.8/b519d8ca/image/bg.png HTTP/1.1"
200 11474
"http://xxx.com/"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36"
"-"
"-"
0.000 11785 375
"-"
- -
8.8.8.8 - - [22
/Aug/2014
:20:23:45 +0800]
"GET 8.8.8.8/b519d8ca/image/p04.png HTTP/1.1"
200 2860
"http://xxx.com/"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36"
"-"
"-"
0.000 3169 376
"-"
- -
8.8.8.8 - - [22
/Aug/2014
:20:23:45 +0800]
"GET 8.8.8.8/b519d8ca/image/p06.png HTTP/1.1"
200 74097
"http://xxx.com/"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36"
"-"
"-"
0.062 74409 376
"-"
- -
8.8.8.8 - - [22
/Aug/2014
:20:23:45 +0800]
"GET 8.8.8.8/b519d8ca/image/p05.png HTTP/1.1"
200 132072
"http://xxx.com/"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36"
"-"
"-"
0.256 132385 376
"-"
- -
8.8.8.8 - - [22
/Aug/2014
:20:23:46 +0800]
"GET 8.8.8.8/b519d8ca/image/p07.png HTTP/1.1"
200 207987
"http://xxx.com/"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36"
"-"
"-"
0.592 208300 376
"-"
- -
8.8.8.8 - - [22
/Aug/2014
:20:23:46 +0800]
"GET 8.8.8.8/b519d8ca/image/p01.png HTTP/1.1"
200 310418
"http://xxx.com/"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36"
"-"
"-"
0.918 310731 376
"-"
- -
|
1
2
3
4
5
|
root@m1:
/home/hadoop
# /home/hadoop/hadoop-2.2.0/bin/hadoop fs -mkdir /user/hive/warehouse/nginxlog
root@m1:
/home/hadoop
# /home/hadoop/hadoop-2.2.0/bin/hadoop fs -ls /user/hive/warehouse
Found 1 items
drwxr-xr-x - root supergroup 0 2014-01-22 23:13
/user/hive/warehouse/nginxlog
root@m1:
/home/hadoop
#
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
|
package
idoall.org.hive;
import
org.apache.hadoop.hive.ql.exec.Description;
import
org.apache.hadoop.hive.ql.exec.UDF;
import
org.apache.hadoop.hive.ql.udf.UDFType;
import
org.apache.hadoop.io.LongWritable;
/**
* UDFRowSequence.
*/
@Description
(name =
"row_sequence"
,
value =
"_FUNC_() - Returns a generated row sequence number starting from 1"
)
@UDFType
(deterministic =
false
, stateful =
true
)
public
class
UDFRowSequence
extends
UDF
{
private
LongWritable result =
new
LongWritable();
public
UDFRowSequence() {
result.set(
0
);
}
public
LongWritable evaluate() {
result.set(result.get() +
1
);
return
result;
}
}
|
1
2
3
4
|
hive>
ADD
JAR /home/hadoop/hive-0.13.1/lib/idoall.org-0.0.1-SNAPSHOT-jar-
with
-dependencies.jar;
Added /home/hadoop/hive-0.13.1/lib/idoall.org-0.0.1-SNAPSHOT-jar-
with
-dependencies.jar
to
class path
Added resource: /home/hadoop/hive-0.13.1/lib/idoall.org-0.0.1-SNAPSHOT-jar-
with
-dependencies.jar
hive>
|
1
2
3
|
hive>
CREATE
TEMPORARY
FUNCTION
rowSequence
AS
'idoall.org.hive.UDFRowSequence'
;
OK
Time
taken: 0.048 seconds
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
create
external
table
nginx_accesslog(
host string,
hostuser string,
times string,
requestmethond string,
requesturl string,
requesthttp string,
status string,
body_bytes_sent string,
referer string,
useragent string,
http_x_forwarded_for string,
gzip_ratio string,
request_time string,
bytes_sent string,
request_length string,
upstream_addr string,
upstream_status string,
upstream_response_time string)
PARTITIONED
BY
(
YEAR
STRING,
MONTH
STRING,
DAY
STRING)
row format SERDE
'org.apache.hadoop.hive.contrib.serde2.RegexSerDe'
WITH
SERDEPROPERTIES (
"input.regex"
=
"([^ ]*)\\s+-\\s+(.+?|-)\\s+\\[(.*)\\]\\s+\"([^ ]*)\\s+([^ ]*)\\s+([^ |\"]*)\"\\s+(-|[0-9]*)\\s+(-|[0-9]*)\\s+\"(.+?|-)\"\\s+\"(.+?|-)\"\\s+\"(.+?|-)\"\\s+\"(.+?|-)\"\\s+(.+?|-)\\s+(.+?|-)\\s+(.+?|-)\\s+\"(.+?|-)\"\\s+(.+?|-)\\s+(.*)"
,
"output.format.string"
=
"%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s %10$s %11$s %12$s %13$s %14$s %15$s %16$s %17$s %18$s"
)
STORED
AS
TEXTFILE
location
'/user/hive/warehouse/nginxlog'
;
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
hive>
LOAD
DATA
LOCAL
INPATH
'/home/hadoop/hive-0.13.1/a.com.access.20140821.log'
OVERWRITE
INTO
TABLE
nginx_accesslog partition (
YEAR
=
'2014'
,
MONTH
=
'08'
,
DAY
=
'21'
);
Copying data
from
file:/home/hadoop/hive-0.13.1/a.com.access.20140821.log
Copying file: file:/home/hadoop/hive-0.13.1/a.com.access.20140821.log
Loading data
to
table
default
.nginx_accesslog partition (
year
=2014,
month
=08,
day
=21)
Partition
default
.nginx_accesslog{
year
=2014,
month
=08,
day
=21} stats: [numFiles=1, numRows=0, totalSize=3483, rawDataSize=0]
OK
Time
taken: 1.046 seconds
hive>
select
count
(0)
from
nginx_accesslog;
Total jobs = 1
Launching Job 1
out
of
1
Number
of
reduce tasks determined
at
compile
time
: 1
In
order
to
change the average
load
for
a reducer (
in
bytes):
set
hive.
exec
.reducers.bytes.per.reducer=<number>
In
order
to
limit the maximum number
of
reducers:
set
hive.
exec
.reducers.
max
=<number>
In
order
to
set
a constant number
of
reducers:
set
mapreduce.job.reduces=<number>
Starting Job = job_1408550631561_0005, Tracking URL = http://m1:8088/proxy/application_1408550631561_0005/
Kill Command = /home/hadoop/hadoop-2.2.0/bin/hadoop job -kill job_1408550631561_0005
Hadoop job information
for
Stage-1: number
of
mappers: 1; number
of
reducers: 1
2014-08-22 23:19:55,322 Stage-1 map = 0%, reduce = 0%
2014-08-22 23:20:01,669 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 0.74 sec
2014-08-22 23:20:08,926 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 1.59 sec
MapReduce Total cumulative CPU
time
: 1 seconds 590 msec
Ended Job = job_1408550631561_0005
MapReduce Jobs Launched:
Job 0: Map: 1 Reduce: 1 Cumulative CPU: 1.59 sec HDFS
Read
: 3734 HDFS Write: 3 SUCCESS
Total MapReduce CPU
Time
Spent: 1 seconds 590 msec
OK
13
Time
taken: 24.762 seconds, Fetched: 1 row(s)
hive>
|
1
2
3
4
5
|
root@m1:
/home/hadoop
# /home/hadoop/hadoop-2.2.0/bin/hadoop fs -copyFromLocal /home/hadoop/hive-0.13.1/a.com.access.20140821.log /user/hive/warehouse/nginxlog/
root@m1:
/home/hadoop
# /home/hadoop/hadoop-2.2.0/bin/hadoop fs -ls /user/hive/warehouse/nginxlog
Found 1 items
-rw-r--r-- 3 root supergroup 3483 2014-08-22 23:18
/user/hive/warehouse/nginxlog/a
.com.access.20140821.log
root@m1:
/home/hadoop
#
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
#先删除表
hive> drop table nginx_accesslog;
OK
Time taken: 0.363 seconds
hive>
#再创建表
....此处省略,参考上文命令重新创建一次
#从HDFS导入数据(如果文件存在,要先删除),从下图可以看到,数据导入成功
hive> LOAD DATA inpath
'/user/hive/warehouse/nginxlog/a.com.access.20140821.log'
overwrite INTO TABLE nginx_accesslog partition (YEAR=
'2014'
, MONTH=
'08'
,DAY=
'21'
);
Loading data to table default.nginx_accesslog partition (year=2014, month=08, day=21)
Partition default.nginx_accesslog{year=2014, month=08, day=21} stats: [numFiles=1, numRows=0, totalSize=3483, rawDataSize=0]
OK
Time taken: 0.373 seconds
hive>
select
* from nginx_accesslog limit 100;
OK
8.8.8.8 - 22
/Aug/2014
:20:23:45 +0800 GET / HTTP
/1
.1 200 2373 - Mozilla
/5
.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit
/537
.36 (KHTML, like Gecko) Chrome
/38
.0.2125.0 Safari
/537
.36 - 2.78 0.004 2683 369 unix:
/var/run/php5-fpm
.sock 200 0.004 2014 08 21
8.8.8.8 - 22
/Aug/2014
:20:23:45 +0800 GET 8.8.8.8
/b519d8ca/css/base
.css HTTP
/1
.1 200 940 http:
//xxx
.com/ Mozilla
/5
.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit
/537
.36 (KHTML, like Gecko) Chrome
/38
.0.2125.0 Safari
/537
.36 - - 0.000 1247 373 - - - 2014 08 21
8.8.8.8 - 22
/Aug/2014
:20:23:45 +0800 GET 8.8.8.8
/a3e2e507/jquery
.min.js HTTP
/1
.1 200 93636 http:
//xxx
.com/ Mozilla
/5
.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit
/537
.36 (KHTML, like Gecko) Chrome
/38
.0.2125.0 Safari
/537
.36 - - 0.152 93976 359 - - - 2014 08 21
8.8.8.8 - 22
/Aug/2014
:20:23:45 +0800 GET 8.8.8.8
/b519d8ca/image/logo
.png HTTP
/1
.1 200 6059 http:
//xxx
.com/ Mozilla
/5
.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit
/537
.36 (KHTML, like Gecko) Chrome
/38
.0.2125.0 Safari
/537
.36 - - 0.000 6369 377 - - - 2014 08 21
8.8.8.8 - 22
/Aug/2014
:20:23:45 +0800 GET 8.8.8.8
/b519d8ca/image/p02
.jpg HTTP
/1
.1 200 22177 http:
//xxx
.com/ Mozilla
/5
.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit
/537
.36 (KHTML, like Gecko) Chrome
/38
.0.2125.0 Safari
/537
.36 - - 0.000 22489 376 - - - 2014 08 21
8.8.8.8 - 22
/Aug/2014
:20:23:45 +0800 GET 8.8.8.8
/b519d8ca/image/p03
.png HTTP
/1
.1 200 3012 http:
//xxx
.com/ Mozilla
/5
.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit
/537
.36 (KHTML, like Gecko) Chrome
/38
.0.2125.0 Safari
/537
.36 - - 0.000 3321 376 - - - 2014 08 21
8.8.8.8 - 22
/Aug/2014
:20:23:45 +0800 GET 8.8.8.8
/b519d8ca/image/two-dimension-code1
.png HTTP
/1
.1 200 761 http:
//xxx
.com/ Mozilla
/5
.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit
/537
.36 (KHTML, like Gecko) Chrome
/38
.0.2125.0 Safari
/537
.36 - - 0.000 1069 392 - - - 2014 08 21
8.8.8.8 - 22
/Aug/2014
:20:23:45 +0800 GET 8.8.8.8
/b519d8ca/image/bg
.png HTTP
/1
.1 200 11474 http:
//xxx
.com/ Mozilla
/5
.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit
/537
.36 (KHTML, like Gecko) Chrome
/38
.0.2125.0 Safari
/537
.36 - - 0.000 11785 375 - - - 2014 08 21
8.8.8.8 - 22
/Aug/2014
:20:23:45 +0800 GET 8.8.8.8
/b519d8ca/image/p04
.png HTTP
/1
.1 200 2860 http:
//xxx
.com/ Mozilla
/5
.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit
/537
.36 (KHTML, like Gecko) Chrome
/38
.0.2125.0 Safari
/537
.36 - - 0.000 3169 376 - - - 2014 08 21
8.8.8.8 - 22
/Aug/2014
:20:23:45 +0800 GET 8.8.8.8
/b519d8ca/image/p06
.png HTTP
/1
.1 200 74097 http:
//xxx
.com/ Mozilla
/5
.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit
/537
.36 (KHTML, like Gecko) Chrome
/38
.0.2125.0 Safari
/537
.36 - - 0.062 74409 376 - - - 2014 08 21
8.8.8.8 - 22
/Aug/2014
:20:23:45 +0800 GET 8.8.8.8
/b519d8ca/image/p05
.png HTTP
/1
.1 200 132072 http:
//xxx
.com/ Mozilla
/5
.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit
/537
.36 (KHTML, like Gecko) Chrome
/38
.0.2125.0 Safari
/537
.36 - - 0.256 132385 376 - - - 2014 08 21
8.8.8.8 - 22
/Aug/2014
:20:23:46 +0800 GET 8.8.8.8
/b519d8ca/image/p07
.png HTTP
/1
.1 200 207987 http:
//xxx
.com/ Mozilla
/5
.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit
/537
.36 (KHTML, like Gecko) Chrome
/38
.0.2125.0 Safari
/537
.36 - - 0.592 208300 376 - - - 2014 08 21
8.8.8.8 - 22
/Aug/2014
:20:23:46 +0800 GET 8.8.8.8
/b519d8ca/image/p01
.png HTTP
/1
.1 200 310418 http:
//xxx
.com/ Mozilla
/5
.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit
/537
.36 (KHTML, like Gecko) Chrome
/38
.0.2125.0 Safari
/537
.36 - - 0.918 310731 376 - - - 2014 08 21
Time taken: 0.056 seconds, Fetched: 13 row(s)
hive>
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
CREATE
TABLE
h2b_nginx_accesslog(
key
int
,
host string,
hostuser string,
times string,
requestmethond string,
requesturl string,
requesthttp string,
status string,
body_bytes_sent string,
referer string,
useragent string,
http_x_forwarded_for string,
gzip_ratio string,
request_time string,
bytes_sent string,
request_length string,
upstream_addr string,
upstream_status string,
upstream_response_time string)
STORED
BY
'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH
SERDEPROPERTIES (
"hbase.columns.mapping"
=
":key,log:host,log:hostuser,log:times,log:requestmethond,log:requesturl,log:requesthttp,log:status,log:body_bytes_sent,log:referer,log:useragent,log:http_x_forwarded_for,log:gzip_ratio,log:request_time,log:bytes_sent,log:request_length,log:upstream_addr,log:upstream_status,log:upstream_response_time"
) TBLPROPERTIES (
"hbase.table.name"
=
"h2b_nginx_accesslog"
);
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
|
hbase(main):002:0> list
TABLE
h2b_nginx_accesslog
1 row(s)
in
0.1220 seconds
=> [
"h2b_nginx_accesslog"
]
/* 查看表结构时,只会显示列族,而不会显示列。Hbase表中的每个列,都归属与某个列族。列族是表的chema的一部分(而列不是)。*/
hbase(main):003:0> describe
"h2b_nginx_accesslog"
DESCRIPTION ENABLED
'h2b_nginx_accesslog'
, {
NAME
=>
'log'
, DATA_BLOCK_ENCODING =>
'NONE'
, BLOOMFILTER =>
'ROW'
, REPLICATION_SCOPE =>
'0'
, VERSIONS =>
'1'
, CO
true
MPRESSION =>
'NONE'
, MIN_VERSIONS =>
'0'
, TTL =>
'2147483647'
, KEEP_DELETED_CELLS =>
'false'
, BLOCKSIZE =>
'65536'
, IN_MEMORY =>
'false'
,
BLOCKCACHE =>
'true'
}
1 row(s)
in
0.5890 seconds
hbase(main):004:0>
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
|
insert
overwrite
table
h2b_nginx_accesslog
select
a.*
from
(
select
rowSequence(),
host,
hostuser,
times,
requestmethond,
requesturl,
requesthttp,
status,
body_bytes_sent,
referer,
useragent,
http_x_forwarded_for,
gzip_ratio,
request_time,
bytes_sent,
request_length,
upstream_addr,
upstream_status,
upstream_response_time
from
nginx_accesslog) a;
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
hive>
insert
overwrite
table
h2b_nginx_accesslog
>
select
a.*
>
from
(
select
> rowSequence(),
> host,
> hostuser,
> times,
> requestmethond,
> requesturl,
> requesthttp,
> status,
> body_bytes_sent,
> referer,
> useragent,
> http_x_forwarded_for,
> gzip_ratio,
> request_time,
> bytes_sent,
> request_length,
> upstream_addr,
> upstream_status,
> upstream_response_time
>
from
nginx_accesslog) a;
Total jobs = 1
Launching Job 1
out
of
1
Number
of
reduce tasks
is
set
to
0 since there's
no
reduce operator
Starting Job = job_1408550631561_0017, Tracking URL = http://m1:8088/proxy/application_1408550631561_0017/
Kill Command = /home/hadoop/hadoop-2.2.0/bin/hadoop job -kill job_1408550631561_0017
Hadoop job information
for
Stage-0: number
of
mappers: 1; number
of
reducers: 0
2014-08-24 11:57:24,051 Stage-0 map = 0%, reduce = 0%
2014-08-24 11:57:32,403 Stage-0 map = 100%, reduce = 0%, Cumulative CPU 1.96 sec
MapReduce Total cumulative CPU
time
: 1 seconds 960 msec
Ended Job = job_1408550631561_0017
MapReduce Jobs Launched:
Job 0: Map: 1 Cumulative CPU: 1.96 sec HDFS
Read
: 3734 HDFS Write: 0 SUCCESS
Total MapReduce CPU
Time
Spent: 1 seconds 960 msec
OK
Time
taken: 20.378 seconds
hive>
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
hbase(main):013:0> get
"h2b_nginx_accesslog"
,1
COLUMN
CELL
log:body_bytes_sent
timestamp
=1408852652522, value=2373
log:bytes_sent
timestamp
=1408852652522, value=2683
log:gzip_ratio
timestamp
=1408852652522, value=2.78
log:host
timestamp
=1408852652522, value=8.8.8.8
log:hostuser
timestamp
=1408852652522, value=-
log:http_x_forwarded_for
timestamp
=1408852652522, value=-
log:referer
timestamp
=1408852652522, value=-
log:request_length
timestamp
=1408852652522, value=369
log:request_time
timestamp
=1408852652522, value=0.004
log:requesthttp
timestamp
=1408852652522, value=HTTP/1.1
log:requestmethond
timestamp
=1408852652522, value=GET
log:requesturl
timestamp
=1408852652522, value=/
log:status
timestamp
=1408852652522, value=200
log:times
timestamp
=1408852652522, value=22/Aug/2014:20:23:45 +0800
log:upstream_addr
timestamp
=1408852652522, value=unix:/var/run/php5-fpm.sock
log:upstream_response_time
timestamp
=1408852652522, value=0.004
log:upstream_status
timestamp
=1408852652522, value=200
log:useragent
timestamp
=1408852652522, value=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML,
like
Gecko) Chrome/38.0.2125.0 Safari/537.36
18 row(s)
in
0.0440 seconds
hbase(main):015:0> get
"h2b_nginx_accesslog"
,1,{
COLUMN
=>
'log:useragent'
}
COLUMN
CELL
log:useragent
timestamp
=1408852652522, value=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML,
like
Gecko) Chrome/38.0.2125.0 Safari/537.36
1 row(s)
in
0.0080 seconds
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
hbase(main):031:0> import org.apache.hadoop.hbase.filter.CompareFilter
hbase(main):032:0> import org.apache.hadoop.hbase.filter.SingleColumnValueFilter
hbase(main):033:0> import org.apache.hadoop.hbase.filter.SubstringComparator
hbase(main):034:0> import org.apache.hadoop.hbase.util.Bytes
hbase(main):035:0> scan
"h2b_nginx_accesslog"
,{FILTER => SingleColumnValueFilter.new(Bytes.toBytes(
'log'
),Bytes.toBytes(
'requesturl'
),CompareFilter::CompareOp.valueOf(
'EQUAL'
),SubstringComparator.new(
'p04.png'
))}
ROW
COLUMN
+CELL
9
column
=log:body_bytes_sent,
timestamp
=1408852652522, value=2860
9
column
=log:bytes_sent,
timestamp
=1408852652522, value=3169
9
column
=log:gzip_ratio,
timestamp
=1408852652522, value=-
9
column
=log:host,
timestamp
=1408852652522, value=8.8.8.8
9
column
=log:hostuser,
timestamp
=1408852652522, value=-
9
column
=log:http_x_forwarded_for,
timestamp
=1408852652522, value=-
9
column
=log:referer,
timestamp
=1408852652522, value=http://xxx.com/
9
column
=log:request_length,
timestamp
=1408852652522, value=376
9
column
=log:request_time,
timestamp
=1408852652522, value=0.000
9
column
=log:requesthttp,
timestamp
=1408852652522, value=HTTP/1.1
9
column
=log:requestmethond,
timestamp
=1408852652522, value=GET
9
column
=log:requesturl,
timestamp
=1408852652522, value=8.8.8.8/b519d8ca/image/p04.png
9
column
=log:status,
timestamp
=1408852652522, value=200
9
column
=log:times,
timestamp
=1408852652522, value=22/Aug/2014:20:23:45 +0800
9
column
=log:upstream_addr,
timestamp
=1408852652522, value=-
9
column
=log:upstream_response_time,
timestamp
=1408852652522, value=-
9
column
=log:upstream_status,
timestamp
=1408852652522, value=-
9
column
=log:useragent,
timestamp
=1408852652522, value=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML,
like
Gecko) Chrome/38.0.2125.0 Sa
fari/537.36
1 row(s)
in
0.0320 seconds
hbase(main):036:0>
|
1
|
hive>
add
jar /home/hjl/hive/lib/hive_contrib.jar;
|