Hive开发例子- lzo方式存储

lzo的设置:

set mapred.output.compress=true; 

set hive.exec.compress.output=true; 
set mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec; 

set io.compression.codecs=com.hadoop.compression.lzo.LzopCodec;  

执行脚本如下:

#!/bin/bash
day=$1
bash="./"
running="首焦统计上游数据click_orc is not ready"
partitionNumsOfDay=`hive -e "show partitions click_orc" | grep day=$day/hour= | wc -l`
sql="  
set mapred.output.compress=true;
set hive.exec.compress.output=true; 
set mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec; 
set io.compression.codecs=com.hadoop.compression.lzo.LzopCodec;  
INSERT OVERWRITE TABLE app.clicks_statis partition (dt='$day')
       SELECT 
       advertise_pin,
       CASE WHEN mobile_type == 0 THEN 'PC'
            ELSE '无线'
       END as mobile_type,
       pos_id,
       ad_position as pos_name,
       to_url,
       to_url_clicks
  FROM (SELECT advertise_pin,
               pos_id,
               mobile_type,
	       regexp_extract(to_url, '(.*)(\\\?|\\\&){1}(jj_pop)', 1) as to_url,
               count(to_url) as to_url_clicks
          FROM click_orc
         where day = '$day'
           AND pos_id in (1,2,3)
         group by advertise_pin, pos_id, mobile_type,regexp_extract(to_url, '(.*)(\\\?|\\\&){1}(jj_pop)', 1)) click_orc
  JOIN (SELECT id, ad_position
          FROM media_chain
         WHERE dp = 'ACTIVE'
           AND id in (1,2,3)) media
    ON click_orc.pos_id = media.id;"

if [ $partitionNumsOfDay -eq 24 ]; then
    hive -e "$sql"
else
    println("$running")
fi

建表语句如下:

create external table clicks_statis(
advertise_pin      string                  comment '广告主pin '
,mobile_type     string                  comment '点击设备类型'
,pos_id     bigint                  comment '广告位id'
,pos_name     string                  comment '广告位名称'
,to_url       string                 comment ''
,to_url_clicks  bigint   comment '点击次数'
) comment '首焦落地页点击次数统计'
partitioned by(dt string)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES
('field.delim' = '\t',
'serialization.null.format' = ''
)STORED AS INPUTFORMAT "com.hadoop.mapred.DeprecatedLzoTextInputFormat"
OUTPUTFORMAT "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"
;



你可能感兴趣的:(Hive开发例子- lzo方式存储)