20150225_Oracle_去重.sql
参考:
http://wenku.baidu.com/view/0a362b2003d8ce2f006623e4.html
create table t_distinct(
c1 integer,c2 varchar2(50)
);
select t.*,t.rowid from t_distinct t;
--1. distinct
--查看 去重后数据
select distinct t.* from t_distinct t;
--删除 重复数据
create table t_distinct_tmp as select distinct t.* from t_distinct t;
drop table t_distinct;
alter table t_distinct_tmp rename to t_distinct;
--2. rowid + 关联条件
--查看 去重后数据
select t1.* from t_distinct t1
where t1.rowid = (
select min(t2.rowid) from t_distinct t2 where t2.c1 = t1.c1 and t2.c2 = t2.c1
);
--删除 重复数据
delete from t_distinct t1
where t1.rowid <> (
select min(t2.rowid) from t_distinct t2 where t2.c1 = t1.c1 and t2.c2 = t2.c1
);
--3. rowid + group by
--查看 去重后数据
select t1.* from t_distinct t1
where t1.rowid in (
select min(t2.rowid) from t_distinct group by t2.c1,t2.c2
);
--删除 重复数据
delete from t_distinct t1
where t1.rowid not in (
select min(t2.rowid) from t_distinct group by t2.c1,t2.c2
);
--4. row_number() over(partition by order by)
--查看 去重后数据
select * from (
select t.*,row_number() over(partition by c1,c2 order by c1) rn from t_distinct t
) where rn = 1;
--删除 重复数据
delete from t_distinct where rowid not in (
select rid from (
select t.*,t.rowid rid,row_number() over(partition by c1,c2 order by c1) rn from t_distinct t
) where rn = 1
);
补充:
伪劣 rownum: 全表 排序,无间断无重复
函数 row_number() over(partition by order by): 组内 排序,无间断无重复
函数 dense_rank() over(partition by order by): 组内 排序,无间断有重复
函数 rank() over(partition by order by): 组内 排序,有间断有重复
create table t_depart_emp(
depart_no varchar2(20),
emp_name varchar2(50),
emp_salary number(9,2)
);
select t.*,t.rowid from t_depart_emp t;
select
depart_no,emp_name,emp_salary,
rownum,
row_number() over(partition by depart_no order by emp_salary) rn,
dense_rank() over(partition by depart_no order by emp_salary) drk,
rank() over(partition by depart_no order by emp_salary) rk
from t_depart_emp
order by depart_no,emp_salary;