MySQL中,查找并删除重复数据

-- ----------------------------
-- 黑名单(blacklist)表结构
-- ----------------------------
DROP TABLE IF EXISTS `blacklist`;
CREATE TABLE `blacklist` (
  `id` int(10) NOT NULL AUTO_INCREMENT,
  `imsi` varchar(50) CHARACTER SET utf8 NOT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB;

-- ----------------------------
-- 往数据表中添加记录
-- ----------------------------
INSERT INTO `blacklist`(`imsi`) VALUES 
('460010123456780'),
('460010123456780'),
('460010123456781'),
('460010123456782'),
('460010123456783'),
('460010123456782');

-- ----------------------------
-- 显示所有记录项
-- ----------------------------
SELECT * FROM blacklist;
+----+-----------------+
| id | imsi            |
+----+-----------------+
|  1 | 460010123456780 |
|  2 | 460010123456780 |
|  3 | 460010123456781 |
|  4 | 460010123456782 |
|  5 | 460010123456783 |
|  6 | 460010123456782 |
+----+-----------------+
6 rows in set (0.00 sec)

-- ----------------------------
-- 查找重复项(仅显示id最小的重复项)
-- ----------------------------
SELECT DISTINCT MIN(id) AS id, imsi
FROM blacklist
GROUP BY imsi
HAVING COUNT(*) > 1
ORDER BY imsi;
+------+-----------------+
| id   | imsi            |
+------+-----------------+
|    1 | 460010123456780 |
|    4 | 460010123456782 |
+------+-----------------+
2 rows in set (0.12 sec)

-- ----------------------------
-- 查找重复项(显示所有重复的项)
-- ----------------------------
SELECT blacklist.*
FROM blacklist,
(SELECT *
FROM blacklist
GROUP BY imsi
HAVING COUNT(*) > 1
ORDER BY imsi) as temp
WHERE blacklist.imsi = temp.imsi
ORDER BY blacklist.imsi, temp.id;
+----+-----------------+
| id | imsi            |
+----+-----------------+
|  1 | 460010123456780 |
|  2 | 460010123456780 |
|  4 | 460010123456782 |
|  6 | 460010123456782 |
+----+-----------------+
4 rows in set (0.10 sec)

-- ----------------------------
-- 查找重复项(显示除id最小的所有重复项)
-- ----------------------------
SELECT blacklist.*
FROM blacklist,
(SELECT DISTINCT MIN(id) AS id, imsi
FROM blacklist
GROUP BY imsi
HAVING COUNT(*) > 1
ORDER BY id) AS temp
WHERE blacklist.imsi = temp.imsi
AND blacklist.id != temp.id
ORDER BY blacklist.imsi, blacklist.id;
+----+-----------------+
| id | imsi            |
+----+-----------------+
|  2 | 460010123456780 |
|  6 | 460010123456782 |
+----+-----------------+
2 rows in set (0.06 sec)

-- ----------------------------
-- 删除重复项(仅保留id最小的项)
-- ----------------------------
DELETE blacklist
FROM blacklist,
(SELECT DISTINCT MIN(id) AS id, imsi
FROM blacklist
GROUP BY imsi
HAVING COUNT(*) > 1
ORDER BY id) AS temp
WHERE blacklist.imsi = temp.imsi
AND blacklist.id != temp.id;
Query OK, 2 rows affected (0.11 sec)

-- ----------------------------
-- 显示删除重复项后的所有记录项
-- ----------------------------
SELECT * FROM blacklist;
+----+-----------------+
| id | imsi            |
+----+-----------------+
|  1 | 460010123456780 |
|  3 | 460010123456781 |
|  4 | 460010123456782 |
|  5 | 460010123456783 |
+----+-----------------+
4 rows in set (0.00 sec)
在有辅助表(temp)的情况下,新建了一个查询:
#--0、清空临时表;
DELETE FROM blacklist_temp;

#--1、根据项目名称,查找结果,并插入到临时表
INSERT INTO blacklist_temp
(SELECT DISTINCT m.imsi, p.name AS mark
	FROM machine m, project p
	WHERE m.project=p.id AND p.name like '%fanzhuo_140402'
	GROUP BY imsi
	ORDER BY m.updateDate
	LIMIT 100);

#--2、删除临时表中跟黑名单表中重复的记录项
DELETE blacklist_temp
FROM blacklist_temp, 
(SELECT * FROM
	(SELECT DISTINCT imsi FROM blacklist
		UNION ALL 
	SELECT DISTINCT imsi FROM blacklist_temp) t
GROUP BY imsi having count(*)>1) temp 
WHERE blacklist_temp.imsi = temp.imsi;

#--3、将临时表中的记录,插入到黑名单表中
INSERT INTO blacklist(imsi,mark)
SELECT * FROM blacklist_temp;


你可能感兴趣的:(MySQL中,查找并删除重复数据)