with temp as(
select a,b,c from t1
union all
select a,b,c from t2
)
select a,b,c
from (select a,b,c,row_number() over (partition by a sort by b,c) rn from temp)
where rn=1
;
根据a列去重
业务场景:工单的去重
数据来源于mysql和es,一个工单在mysql中完成后,同步到es
如果mysql中该条数据没有删除,则会同步两次,在后面的old和new的full outer join中将会导致错误
因此需要在ods层处理该问题
采用窗口函数,按照orderid分区,按照来源排序,如果是重复的,取来源于es的数据
with temp as(
select
orderid ,
ordercreatetime ,
companyId ,
areasid ,
institutionid ,
platfromfiledcode ,
orderlargertype ,
ordersecondtype ,
orderthirdlytype ,
serviceflowalias ,
ordersource ,
ordersourcename ,
orderstatus ,
orderstatusname ,
actualhour ,
urgent ,
supervisenum ,
reworknum ,
importance,
dealuserids ,
dealuserorgids,
'es' as comefrom,
dt
from wfs_order_list_index where dt='$do_date'
union all
select
id,
create_time,
company_id,
areas_id,
institution_id,
'' as platfromfiledcode,
order_larger_type,
order_second_type,
order_thirdly_type,
'' as serviceflowalias,
order_source,
'' as ordersourcename,
status,
'' as orderstatusname,
actual_hour,
urgent,
supervise_num,
rework_num,
importance,
deal_user_ids,
deal_user_org_ids,
'mysql' as comefrom,
dt
from tbwork_order where dt='$do_date'
)
insert overwrite table ods_order partition(dt)
select
orderid ,
ordercreatetime ,
companyId ,
areasid ,
institutionid ,
platfromfiledcode ,
orderlargertype ,
ordersecondtype ,
orderthirdlytype ,
serviceflowalias ,
ordersource ,
ordersourcename ,
orderstatus ,
orderstatusname ,
actualhour ,
urgent ,
supervisenum ,
reworknum ,
importance,
dealuserids ,
dealuserorgids,
comefrom,
rn,
dt
from (
select *,ROW_NUMBER() over(partition by orderid sort by comefrom) as rn from temp
)
where rn=1
https://wenwen.sogou.com/z/q705629911.htm