step1,一个简单的联系人表
CREATE TABLE `contact784` (
`cid` bigint AUTO_INCREMENT NOT NULL,
`uid` bigint NOT NULL,
`email` varchar(128) NOT NULL,
`name` varchar(64) NOT NULL,
`mobile` varchar(16) NULL,
`atime` timestamp NULL,
`type` enum('BLACK','WHITE','NORMAL') NOT NULl default 'NORMAL',
`info` text NULL,
`memo` varchar(1024) NULL,
PRIMARY key(`cid`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT = 100;
ALTER TABLE `contact784` ADD UNIQUE INDEX uniq_uid_email(`uid`,`email`);
step2,插入了100W数据:
# -*- coding: utf-8 -*-
#@author [email protected]
import MySQLdb
import random
import string
import threading
import time
domains = ['org','com.cn','qq.com','yahoo.com','163.com','com','cn','sina.cn','sina.com']
host = "localhost"
user = "xx"
pwd = "xx"
db = "t3"
def getRandomValue():
email = ""
s = ""
for x in range(random.randint(1,10)):
s += random.choice(string.letters)
b = list(s)
domain = ''.join(b)+"."+random.choice(domains)
email = s+"@"+domain
return email,s
def insert(count):
conn=MySQLdb.connect(host=host,user=user,passwd=pwd,db=db)
cursor=conn.cursor()
for cid in xrange(count):
uid = random.randint(1000000000,9999999999)
email,name = getRandomValue()
sql = "insert into contact784(uid,email,name) values (%d,'%s', '%s')" %(uid,email,name)
n=cursor.execute(sql)
cursor.close()
conn.commit ()
conn.close()
if __name__=='__main__':
start = time.clock()
for i in range(100):
worker = threading.Thread(target = insert(10000))
worker.start()
end = time.clock()
print "elsaped:%s" %(end-start)
step3,要重新单线程插入,需要把数据清空.
因为python多线程由于GIL的关系,实际上上面的100个线程只产生了一个连接,需要测试一下纯单线程插入是不是要快些:)
执行:delete from contact784
半小时没有执行完毕!
诊断方式:
1,iostat ,top等查看磁盘io很大
2,inotifywatch发现io的事件非常多
原因:在大表上使用delete from 清空一个表是非常慢的。因为InnoDB必须处理表中的每一行,根据InnoDB的事务设计原则,首先需要把“删除动作”写入“事务日志”,然后写入实际的表。所以,清空大表的时候,最好直接drop table然后重建。
注:
在delete from 执行的过程中:
用:select count(*) from contact784;发现表的数据量一直是100行
用:explain select count(*) from contact784;可以发现数量一直在减少,显示当前
784是是因为前面这个文章的原因“
http://hanyh.iteye.com/blog/431323
”