【Oracle】删除重复记录

--复习autotrace:

SET AUTOTRACE OFF               --不生成AUTOTRACE 报告,这是缺省模式

SET AUTOTRACE ON EXPLAIN        --AUTOTRACE只显示优化器执行路径报告

SET AUTOTRACE ON STATISTICS     --只显示执行统计信息

SET AUTOTRACE ON                --包含执行计划和统计信息

SET AUTOTRACE TRACEONLY         --同set autotrace on,但是不显示查询输出



--案例模拟

--查询原始表:

23:13:36 SQL> select * from emp;



     EMPNO ENAME      JOB              MGR HIREDATE            SAL       COMM     DEPTNO

---------- ---------- --------- ---------- ------------ ---------- ---------- ----------

      7698 BLAKE      MANAGER         7839 01-MAY-81          2850                    30

      7782 CLARK      MANAGER         7839 09-JUN-81          2450                    10

      7788 SCOTT      ANALYST         7566 19-APR-87          3000                    20

      7839 KING       PRESIDENT            17-NOV-81          5000                    10

      7844 TURNER     SALESMAN        7698 08-SEP-81          1500          0         30

      7876 ADAMS      CLERK           7788 23-MAY-87          1100                    20

      7900 JAMES      CLERK           7698 03-DEC-81           950                    30

      7902 FORD       ANALYST         7566 03-DEC-81          3000                    20

      7934 MILLER     CLERK           7782 23-JAN-82          1300                    10

      7369 SMITH      CLERK           7902 17-DEC-80           800                    20

      7499 ALLEN      SALESMAN        7698 20-FEB-81          1600        300         30

      7521 WARD       SALESMAN        7698 22-FEB-81          1250        500         30

      7566 JONES      MANAGER         7839 02-APR-81          2975                    20

      7654 MARTIN     SALESMAN        7698 28-SEP-81          1250       1400         30



14 rows selected.



Elapsed: 00:00:00.09



--模拟,插入重复记录

INSERT INTO emp select * from emp;



--检查重复记录

23:13:39 SQL> SELECT COUNT(empno), empno FROM emp GROUP BY empno HAVING COUNT(empno) > 1;



COUNT(EMPNO)      EMPNO

------------ ----------

           2       7782

           2       7839

           2       7844

           2       7698

           2       7902

           2       7521

           2       7788

           2       7934

           2       7566

           2       7654

           2       7499

           2       7876

           2       7900

           2       7369



14 rows selected.



Elapsed: 00:00:00.04



--方法一(只保留ROWID最小的记录):

DELETE FROM emp

WHERE  empno IN

       (SELECT empno FROM emp GROUP BY empno HAVING COUNT(empno) > 1)

AND    ROWID NOT IN

       (SELECT MIN(ROWID) FROM emp GROUP BY empno HAVING COUNT(empno) > 1);



[lubinsu@200-168-46-3 shell_monitor]$ sqlplus lubinsu/lubinsu



SQL*Plus: Release 10.2.0.1.0 - Production on Sun Aug 18 23:09:34 2013



Copyright (c) 1982, 2005, Oracle.  All rights reserved.





Connected to:

Oracle Database 10g Enterprise Edition Release 10.2.0.1.0 - Production

With the Partitioning, OLAP and Data Mining options



23:09:46 SQL> set timing on

23:09:51 SQL> set autotrace traceonly

23:09:58 SQL> set linesize 200

23:17:32 SQL> DELETE FROM emp

23:17:38   2  WHERE  empno IN

23:17:40   3         (SELECT empno FROM emp GROUP BY empno HAVING COUNT(empno) > 1)

23:17:45   4  AND    ROWID NOT IN

23:17:49   5         (SELECT MIN(ROWID) FROM emp GROUP BY empno HAVING COUNT(empno) > 1);



14 rows deleted.



Elapsed: 00:00:00.07



Execution Plan

----------------------------------------------------------

Plan hash value: 319293636



------------------------------------------------------------------------------------

| Id  | Operation               | Name     | Rows  | Bytes | Cost (%CPU)| Time     |

------------------------------------------------------------------------------------

|   0 | DELETE STATEMENT        |          |     1 |    41 |     9  (34)| 00:00:01 |

|   1 |  DELETE                 | EMP      |       |       |            |          |

|*  2 |   HASH JOIN ANTI        |          |     1 |    41 |     9  (34)| 00:00:01 |

|*  3 |    HASH JOIN SEMI       |          |     1 |    29 |     6  (34)| 00:00:01 |

|   4 |     TABLE ACCESS FULL   | EMP      |    14 |   224 |     2   (0)| 00:00:01 |

|   5 |     VIEW                | VW_NSO_1 |     1 |    13 |     3  (34)| 00:00:01 |

|*  6 |      FILTER             |          |       |       |            |          |

|   7 |       SORT GROUP BY     |          |     1 |     4 |     3  (34)| 00:00:01 |

|   8 |        TABLE ACCESS FULL| EMP      |    14 |    56 |     2   (0)| 00:00:01 |

|   9 |    VIEW                 | VW_NSO_2 |     1 |    12 |     3  (34)| 00:00:01 |

|* 10 |     FILTER              |          |       |       |            |          |

|  11 |      SORT GROUP BY      |          |     1 |    16 |     3  (34)| 00:00:01 |

|  12 |       TABLE ACCESS FULL | EMP      |    14 |   224 |     2   (0)| 00:00:01 |

------------------------------------------------------------------------------------



Predicate Information (identified by operation id):

---------------------------------------------------



   2 - access(ROWID="$nso_col_1")

   3 - access("EMPNO"="$nso_col_1")

   6 - filter(COUNT("EMPNO")>1)

  10 - filter(COUNT("EMPNO")>1)





Statistics

----------------------------------------------------------

          1  recursive calls

         15  db block gets

      16254  consistent gets

          0  physical reads

       4256  redo size

        925  bytes sent via SQL*Net to client

       1116  bytes received via SQL*Net from client

          6  SQL*Net roundtrips to/from client

          3  sorts (memory)

          0  sorts (disk)

         14  rows processed

         

--方法二、使用分析函数(只保留ROWID最小的记录):

23:17:57 SQL> rollback;



Rollback complete.



Elapsed: 00:00:00.02

23:19:11 SQL> DELETE FROM emp

23:22:22   2  WHERE  ROWID IN (SELECT ROWID

23:22:28   3                   FROM   (SELECT ROWID row_id,

23:22:32   4                                  row_number() over(PARTITION BY empno ORDER BY ROWID ASC) row_num

23:22:37   5                           FROM   emp e) v

23:22:41   6                   WHERE  v.row_num >= 2);



14 rows deleted.



Elapsed: 00:00:00.04



Execution Plan

----------------------------------------------------------

Plan hash value: 3671588261



-----------------------------------------------------------------------------------------

| Id  | Operation                    | Name     | Rows  | Bytes | Cost (%CPU)| Time     |

-----------------------------------------------------------------------------------------

|   0 | DELETE STATEMENT             |          |     1 |    24 |     5  (40)| 00:00:01 |

|   1 |  DELETE                      | EMP      |       |       |            |          |

|   2 |   NESTED LOOPS               |          |     1 |    24 |     5  (40)| 00:00:01 |

|   3 |    VIEW                      | VW_NSO_1 |    14 |   168 |     3  (34)| 00:00:01 |

|   4 |     SORT UNIQUE              |          |     1 |   350 |            |          |

|*  5 |      VIEW                    |          |    14 |   350 |     3  (34)| 00:00:01 |

|   6 |       WINDOW SORT            |          |    14 |   224 |     3  (34)| 00:00:01 |

|   7 |        TABLE ACCESS FULL     | EMP      |    14 |   224 |     2   (0)| 00:00:01 |

|   8 |    TABLE ACCESS BY USER ROWID| EMP      |     1 |    12 |     1   (0)| 00:00:01 |

-----------------------------------------------------------------------------------------



Predicate Information (identified by operation id):

---------------------------------------------------



   5 - filter("V"."ROW_NUM">=2)





Statistics

----------------------------------------------------------

          1  recursive calls

         18  db block gets

       5434  consistent gets

          0  physical reads

       4448  redo size

        925  bytes sent via SQL*Net to client

       1190  bytes received via SQL*Net from client

          6  SQL*Net roundtrips to/from client

          3  sorts (memory)

          0  sorts (disk)

         14  rows processed



--方法三、

23:22:45 SQL> DELETE FROM emp p WHERE ROWID NOT IN(SELECT MAX(ROWID) FROM emp i WHERE i.empno = p.empno);



0 rows deleted.



Elapsed: 00:00:00.13



Execution Plan

----------------------------------------------------------

Plan hash value: 1987023575



---------------------------------------------------------------------------------

| Id  | Operation             | Name    | Rows  | Bytes | Cost (%CPU)| Time     |

---------------------------------------------------------------------------------

|   0 | DELETE STATEMENT      |         |    13 |   533 |     6  (34)| 00:00:01 |

|   1 |  DELETE               | EMP     |       |       |            |          |

|*  2 |   HASH JOIN           |         |    13 |   533 |     6  (34)| 00:00:01 |

|   3 |    VIEW               | VW_SQ_1 |    14 |   350 |     3  (34)| 00:00:01 |

|   4 |     SORT GROUP BY     |         |    14 |   224 |     3  (34)| 00:00:01 |

|   5 |      TABLE ACCESS FULL| EMP     |    14 |   224 |     2   (0)| 00:00:01 |

|   6 |    TABLE ACCESS FULL  | EMP     |    14 |   224 |     2   (0)| 00:00:01 |

---------------------------------------------------------------------------------



Predicate Information (identified by operation id):

---------------------------------------------------



   2 - access("EMPNO"="P"."EMPNO")

       filter(ROWID<>"VW_COL_1")





Statistics

----------------------------------------------------------

        168  recursive calls

          0  db block gets

      10954  consistent gets

          0  physical reads

          0  redo size

        925  bytes sent via SQL*Net to client

       1008  bytes received via SQL*Net from client

          6  SQL*Net roundtrips to/from client

          2  sorts (memory)

          0  sorts (disk)

          0  rows processed

          

--方法四、和方法三是类似的:

Elapsed: 00:00:00.03

23:28:25 SQL> DELETE FROM emp p WHERE ROWID < (SELECT MAX(ROWID) FROM emp i WHERE i.empno = p.empno);



14 rows deleted.



Elapsed: 00:00:00.06



Execution Plan

----------------------------------------------------------

Plan hash value: 1987023575



---------------------------------------------------------------------------------

| Id  | Operation             | Name    | Rows  | Bytes | Cost (%CPU)| Time     |

---------------------------------------------------------------------------------

|   0 | DELETE STATEMENT      |         |     1 |    41 |     6  (34)| 00:00:01 |

|   1 |  DELETE               | EMP     |       |       |            |          |

|*  2 |   HASH JOIN           |         |     1 |    41 |     6  (34)| 00:00:01 |

|   3 |    VIEW               | VW_SQ_1 |    14 |   350 |     3  (34)| 00:00:01 |

|   4 |     SORT GROUP BY     |         |    14 |   224 |     3  (34)| 00:00:01 |

|   5 |      TABLE ACCESS FULL| EMP     |    14 |   224 |     2   (0)| 00:00:01 |

|   6 |    TABLE ACCESS FULL  | EMP     |    14 |   224 |     2   (0)| 00:00:01 |

---------------------------------------------------------------------------------



--方法五、使用group by,同样类似三四

23:30:01 SQL> DELETE FROM emp p WHERE ROWID NOT IN (SELECT MAX(ROWID) FROM emp i GROUP BY i.empno);



14 rows deleted.



Elapsed: 00:00:00.06



Execution Plan

----------------------------------------------------------

Plan hash value: 3239269824



----------------------------------------------------------------------------------

| Id  | Operation             | Name     | Rows  | Bytes | Cost (%CPU)| Time     |

----------------------------------------------------------------------------------

|   0 | DELETE STATEMENT      |          |     1 |    24 |     6  (34)| 00:00:01 |

|   1 |  DELETE               | EMP      |       |       |            |          |

|*  2 |   HASH JOIN ANTI      |          |     1 |    24 |     6  (34)| 00:00:01 |

|   3 |    TABLE ACCESS FULL  | EMP      |    14 |   168 |     2   (0)| 00:00:01 |

|   4 |    VIEW               | VW_NSO_1 |    14 |   168 |     3  (34)| 00:00:01 |

|   5 |     SORT GROUP BY     |          |    14 |   224 |     3  (34)| 00:00:01 |

|   6 |      TABLE ACCESS FULL| EMP      |    14 |   224 |     2   (0)| 00:00:01 |

----------------------------------------------------------------------------------



Predicate Information (identified by operation id):

---------------------------------------------------



   2 - access(ROWID="$nso_col_1")





Statistics

----------------------------------------------------------

          0  recursive calls

         17  db block gets

      10836  consistent gets

          0  physical reads

       4324  redo size

        926  bytes sent via SQL*Net to client

       1002  bytes received via SQL*Net from client

          6  SQL*Net roundtrips to/from client

          2  sorts (memory)

          0  sorts (disk)

         14  rows processed



--总结,还是自己想出来的方法二效率高,:)



你可能感兴趣的:(oracle)