今天执行hql时,遇到问题。
一直报错说不能识别user.user_id,估计是user别名用得不对。后来我修改了 表的别名为没出现的表名就执行成功了
hive (hive)> select user.user_id,user.date_dt,user.low_carbon
> from
> user_low_carbon user
> join
> (select user_id,date_dt
> from
> (select user_id,date_dt,
> datediff(date_dt,lag2) lag2_diff,
> datediff(date_dt,lag1) lag1_diff,
> datediff(date_dt,lead1) lead1_diff,
> datediff(date_dt,lead2) lead2_diff
> from
> (select user_id ,date_dt,
> lag(date_dt,2,'1970-01-01') over(partition by user_id order by date_dt) lag2,
> lag(date_dt,1,'1970-01-01') over(partition by user_id order by date_dt) lag1,
> lead(date_dt,1,'1970-01-01') over(partition by user_id order by date_dt) lead1,
> lead(date_dt,2,'1970-01-01') over(partition by user_id order by date_dt) lead2
> from
> (select user_id,date_format(regexp_replace(date_dt,'/','-'),'yyyy-MM-dd') date_dt,sum(low_carbon) sum_low_carbon
> from user_low_carbon
> where
> substring(date_dt,1,4)='2017'
> group by user_id,date_dt
> having
> sum_low_carbon>=100)t1)t2)t3
> where
> (lag2_diff=2 and lag1_diff=1)
> or
> (lag1_diff=1 and lead1_diff=-1)
> or
> (lead1_diff=-1 and lead2_diff=-2))t4
> on
> t4.user_id=user.user_id and t4.date_dt=date_format(regexp_replace(user.date_dt,'/','-'),'yyyy-MM-dd');
正确代码:
select a.user_id,a.date_dt,a.low_carbon
from
user_low_carbon a
join
(select user_id,date_dt
from
(select user_id,date_dt,
datediff(date_dt,lag2) lag2_diff,
datediff(date_dt,lag1) lag1_diff,
datediff(date_dt,lead1) lead1_diff,
datediff(date_dt,lead2) lead2_diff
from
(select user_id ,date_dt,
lag(date_dt,2,'1970-01-01') over(partition by user_id order by date_dt) lag2,
lag(date_dt,1,'1970-01-01') over(partition by user_id order by date_dt) lag1,
lead(date_dt,1,'1970-01-01') over(partition by user_id order by date_dt) lead1,
lead(date_dt,2,'1970-01-01') over(partition by user_id order by date_dt) lead2
from
(select user_id,date_format(regexp_replace(date_dt,'/','-'),'yyyy-MM-dd') date_dt,sum(low_carbon) sum_low_carbon
from user_low_carbon
where
substring(date_dt,1,4)='2017'
group by user_id,date_dt
having
sum_low_carbon>=100)t1)t2)t3
where
(lag2_diff=2 and lag1_diff=1)
or
(lag1_diff=1 and lead1_diff=-1)
or
(lead1_diff=-1 and lead2_diff=-2)) t4
on
t4.user_id=a.user_id and t4.date_dt=date_format(regexp_replace(a.date_dt,'/','-'),'yyyy-MM-dd');
这个多层嵌套的hql语句优点复杂,总共执行了三个mapreducer!