select mobile,count(1)cnt from rbc_audience_users group by mobile having count(mobile) >1
可以查询出当前需要去重的UID 和电话号码! 仅有一份
SELECT uid, mobile
FROM rbc_audience_users
WHERE mobile IN (
SELECT mobile
FROM (
SELECT mobile, COUNT(1) AS cnt
FROM rbc_audience_users
WHERE mobile != ''
GROUP BY mobile
HAVING COUNT(mobile) > 1
) a
);
SELECT uid,mobile
FROM rbc_audience_users
WHERE mobile IN (
SELECT mobile
FROM (
SELECT mobile, COUNT(1) AS cnt
FROM rbc_audience_users
WHERE mobile != ''
AND mobile != 'null'
GROUP BY mobile
HAVING COUNT(mobile) > 1
) a
);
查询出所有的电话号码出现次数为两次数据,并且查询mobile 不为null 和空串的电话号码!
将上述两个表的结果数据数据插入到分别的两张表,然后使用 left join where is null(取差集的方式) 的方式查询出来重复数据
将ID全部查询出来,然后传入Es对当前用户ID数据进行删除!
即可达到数据删除冗余数据的目的!
全部重复数据,
微信
insert into rbc_audience_user_not_distinct_mobiledata select uid,mobile,'10300010001' from rbc_audience_user_v1 where mobile in (select mobile from (select mobile,count(1)cnt from rbc_audience_user_v1 where mobile !='null' and mobile !='' and data_code='10300010001' group by mobile having count(mobile) >1) a) and data_code='10300010001';
1947
听听
insert into rbc_audience_user_not_distinct_mobiledata select uid,mobile,'10300010002' from rbc_audience_user_v1 where mobile in (select mobile from (select mobile,count(1)cnt from rbc_audience_user_v1 where mobile !='null' and mobile !='' and data_code='10300010002' group by mobile having count(mobile) >1) a) and data_code='10300010002';
0
交通台数据
insert into rbc_audience_user_not_distinct_mobiledata select uid,mobile,'10300010003' from rbc_audience_user_v1 where mobile in (select mobile from (select mobile,count(1)cnt from rbc_audience_user_v1 where mobile !='null' and mobile !='' and data_code='10300010003' group by mobile having count(mobile) >1) a) and data_code='10300010003';
35743
========================================================================================
仅有一条数据
微信
insert into rbc_audience_user_distinct_mobiledata SELECT uid, mobile,'10300010001' FROM rbc_audience_user_v1 WHERE uid IN (SELECT uid FROM (SELECT count(1) cnt,min(uid) uid, mobile FROM rbc_audience_user_v1 WHERE mobile != '' and mobile !='null' and data_code='10300010001' GROUP BY mobile HAVING count(1) > 1) tb) and data_code='10300010001';
860
听听
insert into rbc_audience_user_distinct_mobiledata SELECT uid, mobile,'10300010002' FROM rbc_audience_user_v1 WHERE uid IN (SELECT uid FROM (SELECT count(1) cnt,min(uid) uid, mobile FROM rbc_audience_user_v1 WHERE mobile != '' and mobile !='null' and data_code='10300010002' GROUP BY mobile HAVING count(1) > 1) tb) and data_code='10300010002';
0
交通台数据
insert into rbc_audience_user_distinct_mobiledata SELECT uid, mobile,'10300010003' FROM rbc_audience_user_v1 WHERE uid IN (SELECT uid FROM (SELECT count(1) cnt,min(uid) uid, mobile FROM rbc_audience_user_v1 WHERE mobile != '' and mobile !='null' and data_code='10300010003' GROUP BY mobile HAVING count(1) > 1) tb) and data_code='10300010003';
数据取差集
select a.id from rbc_audience_user_not_distinct_mobiledata where id not in (select id from rbc_audience_user_distinct_mobiledata) limit 10;