hive、sql union关联,嵌套查询,分组排序查询等语法

UNION 关联

select count (distinct user_id) from

(select user_id from idata_ods.t_ods_bookstore_d

where ds between '2019-01-01'and'2019-01-31'

and busi_type<>'ClientApi_Channel.FrequencyDetail'

and busi_type in ('sns.topic_detail','Topic.Detail')

union all

select user_id from idata_ods.t_ods_ireader_track_d

where get_json_object(biz_json,'$.cli_res_type')='show'

and page_type = 'postdetail'

and ds between '2019-01-01'and'2019-01-31')as a

嵌套查询

字段形式 biz_json{  ext{level} }

get_json_object(biz_json,'$.ext.level')

查询最后的时间

show partitions import_user_points_log

查询文件大小

desc formatted import_user_points_log;

dfs -du -h hdfs://dap-cluster/warehouse/import_user_points_log;


hive、sql union关联,嵌套查询,分组排序查询等语法_第1张图片

分组排序查询


create table idata_tmp.wzhdata_tingshu as

select userid as userid ,first_pay_ds as ftime ,row_number()over(partition by userid order by first_pay_ds) rank  from  idata_dwd.t_dwd_user_book_pay_increment

where ds <= '2019-02-26'

and bookid like '300%'

row_number()over(partition by userid就是按用户分类 order by first_pay_ds按时间从小到大排序) rank  小的标记为1

select ds, userid, amount,row_number()over(partition by userid,amount order by ds desc ) rank

from  idata_tmp.wzhdata_huaweisccz  从大到小排序desc写在最后

循环日期跑数据

首先创建一个表

create table idata_tmp.wzhdata_tingshu_laoyonghu_07

(channel string

,people int

)

PARTITIONED BY (ds string)

ROW FORMAT DELIMITED

FIELDS TERMINATED BY '\t'

STORED AS TEXTFILE;

for ((i=0;i<=56;i++))

do

vd=`date -d "2019-01-01 $i days" +%Y-%m-%d`

hive -e "

insert overwrite table idata_tmp.wzhdata_tingshu_xinyonghu_07 partition(ds='$vd')  最好覆盖表不然一天一天出数据

查询的数据和表中要对应

XXXXXX 写SQL脚本

date_add('$vd',1)  日期加1

"

done

限制imei 前2位为 85 或者 86

substr(imei,1,2) in ('85','86')

在表中追加新的字段

alter table idata_tmp.wzhdata_huawei_user add columns (dw string)

在data_tmp.wzhdata_huawei_user表中追加dw字段

你可能感兴趣的:(hive、sql union关联,嵌套查询,分组排序查询等语法)