简单的需求,获取重复次数最多的数据
@沛公
import random _dict ={0:0,} for i in range(1,1000000): rd_num = random.randrange(0,999,1) if rd_num in _dict: _dict[rd_num] = _dict[rd_num]+1 else: _dict[rd_num] = 1 #print(rd_num) print(_dict) items = _dict.items(); print(items[0:10]) items = sorted(items, lambda x,y:cmp(x[1],y[1]),reverse=True) print(items[0:10])
4.0g 虚拟机测试 100w数据0.6s 。。。 一般了 后续添加sqlite
import sqlite3 import random _MAX_VALUE = 100 _DATA_CNT = 1000000 conn = sqlite3.connect('test.db') cur = conn.cursor() def got_cnt(x): cur.execute('''SELECT times FROM sorted WHERE value=%s;''' % x) already = cur.fetchall() length = len(already) if length is not 0: length = already[0][0]; #print('search for %s Fond %s' % (x,length)) return length cur.execute('''CREATE TABLE ramdon_data (id INTEGER PRIMARY KEY NOT NULL,value INTEGER);''') cur.execute('''CREATE TABLE sorted (id INTEGER PRIMARY KEY NOT NULL,value INTEGER, times INTEGER);''') print("create ok") for i in range(1,_DATA_CNT): rd_num = random.randrange(0,_MAX_VALUE,1) #print(rd_num) cur.execute('''INSERT INTO ramdon_data(value) VALUES (%s);''' % rd_num) conn.commit(); print("data ready") cur.execute('''SELECT * FROM ramdon_data;''') ans = cur.fetchall() for d in ans: #print(d) value = d[1]; times = got_cnt(d[1]) + 1 if times is 1: cur.execute('''INSERT INTO sorted(value,times) VALUES(%s,%s);''' % (value,times)) else: cur.execute('''UPDATE sorted SET times=%s WHERE value=%s;''' % (times,value)) conn.commit(); print("calc ready") cur.execute('''SELECT * FROM sorted ORDER BY times;'''); ans = cur.fetchall(); print(''' len is %s''' % len(ans)) for d in ans: print(d) conn.close();
加了sqlite之后,慢多了 100w数据 30s。。。 当然 跟我毫无节操的读写数据有关系。无论如何。sqlite和python以及sql算是入门了