UNION 关联
select count (distinct user_id) from
(select user_id from idata_ods.t_ods_bookstore_d
where ds between '2019-01-01'and'2019-01-31'
and busi_type<>'ClientApi_Channel.FrequencyDetail'
and busi_type in ('sns.topic_detail','Topic.Detail')
union all
select user_id from idata_ods.t_ods_ireader_track_d
where get_json_object(biz_json,'$.cli_res_type')='show'
and page_type = 'postdetail'
and ds between '2019-01-01'and'2019-01-31')as a
嵌套查询
字段形式 biz_json{ ext{level} }
get_json_object(biz_json,'$.ext.level')
查询最后的时间
show partitions import_user_points_log
查询文件大小
desc formatted import_user_points_log;
dfs -du -h hdfs://dap-cluster/warehouse/import_user_points_log;
分组排序查询
create table idata_tmp.wzhdata_tingshu as
select userid as userid ,first_pay_ds as ftime ,row_number()over(partition by userid order by first_pay_ds) rank from idata_dwd.t_dwd_user_book_pay_increment
where ds <= '2019-02-26'
and bookid like '300%'
row_number()over(partition by userid就是按用户分类 order by first_pay_ds按时间从小到大排序) rank 小的标记为1
select ds, userid, amount,row_number()over(partition by userid,amount order by ds desc ) rank
from idata_tmp.wzhdata_huaweisccz 从大到小排序desc写在最后
循环日期跑数据
首先创建一个表
create table idata_tmp.wzhdata_tingshu_laoyonghu_07
(channel string
,people int
)
PARTITIONED BY (ds string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE;
for ((i=0;i<=56;i++))
do
vd=`date -d "2019-01-01 $i days" +%Y-%m-%d`
hive -e "
insert overwrite table idata_tmp.wzhdata_tingshu_xinyonghu_07 partition(ds='$vd') 最好覆盖表不然一天一天出数据
查询的数据和表中要对应
XXXXXX 写SQL脚本
date_add('$vd',1) 日期加1
"
done
限制imei 前2位为 85 或者 86
substr(imei,1,2) in ('85','86')
在表中追加新的字段
alter table idata_tmp.wzhdata_huawei_user add columns (dw string)
在data_tmp.wzhdata_huawei_user表中追加dw字段