MySQL数据库删除重复数据只保留id最小值脚本


思路解读:
  1. 通过Navicatsql查询语句(以小说章节表为例)查询出出
    现重复小说章节的内容(以url的唯一性为例)
  2. 将数据导入Excel表格中
  3. 读取Excel,通过select查询url出现数据重复的id
  4. 将查询的id导入删除的sql语句中,删除成功

sql查询语句,查询chapter_url在表中出现的次数

SELECT chapter_url,count(chapter_url) as number from chapter GROUP BY chapter_url HAVING number >1 ORDER BY number desc; 
"""sql删除方法"""
DELETE 
from 
chapter 
where id in (
select 
id 
from 
(SELECT 
chapter_url,count(chapter_url) as number,id 
FROM `data_engine`
 GROUP BY chapter_url HAVING number >1 ORDER BY number desc) 
as t)
脚本方法
# -*- coding: utf-8 -*-
import pymysql
import xlrd

#打开excel
def open_excel(file):
    try:
        data = xlrd.open_workbook(file)
        return data
    except Exception as e:
        print(str(e))

#调用打开excel的方法
def excel_table_byindex():
    # 此处为Excel的路劲
    data = open_excel('F:\\workspace\\delete_sql.xls')
    table = data.sheet_by_name(u'Sheet1') #通过名称获取

    db = connectMysql()
    for i in range(415, 65536):
        url = table.cell(i, 0).value  #读取Excel的url
        number = table.cell(i, 1).value
        with db.cursor() as cursor:
            sql = "SELECT id FROM `chapter` where chapter_url='%s';" % (url)
            # yield sql
            print(sql)
            db.ping(reconnect=True)
            cursor.execute(sql)
            data = cursor.fetchall()
            db.commit()
            # return data
            db.close()
            get_id(data)



def get_id(ids):
    new_ids = list(ids)
    if len(new_ids)> 0:
        # 移除id的第0项
        new_ids.remove(new_ids[0])
        #  将id传进行循环删除
        for i in new_ids:
            db = connectMysql()
            with db.cursor() as cursor:
                sql = "DELETE FROM chapter WHERE id ='%s';" % (i[0])
                print(sql)
                db.ping(reconnect=True)
                cursor.execute(sql)
                db.commit()
                db.close()

# 此处填写你的mysql数据库账号及密码
def connectMysql():
    db = pymysql.connect(
        host='localhost',
        database='novellist',
        user='root',
        password='admin123',
        port=3306,
        charset='utf8'
    )
    return db

def main():  # 主方法调用
    con = excel_table_byindex()


if __name__=="__main__":
    table = main()


你可能感兴趣的:(MySQL数据库删除重复数据只保留id最小值脚本)