python语法练习

简单的需求,获取重复次数最多的数据

@沛公 


import random

_dict ={0:0,}

for i in range(1,1000000):
    rd_num = random.randrange(0,999,1)
    if rd_num in _dict:
        _dict[rd_num] = _dict[rd_num]+1
    else:
        _dict[rd_num] = 1
    #print(rd_num)

print(_dict)
items = _dict.items();
print(items[0:10])
items = sorted(items, lambda x,y:cmp(x[1],y[1]),reverse=True)
print(items[0:10])


4.0g 虚拟机测试 100w数据0.6s 。。。 一般了 后续添加sqlite 



import sqlite3
import random

_MAX_VALUE = 100
_DATA_CNT = 1000000

conn = sqlite3.connect('test.db')
cur = conn.cursor()

def got_cnt(x):
    cur.execute('''SELECT times FROM sorted WHERE value=%s;''' % x)
    already = cur.fetchall()
    length = len(already)
    if length is not 0:
        length = already[0][0];
    #print('search for %s Fond %s' % (x,length))
    return length


cur.execute('''CREATE TABLE ramdon_data
    (id INTEGER PRIMARY KEY NOT NULL,value INTEGER);''')
cur.execute('''CREATE TABLE sorted
    (id INTEGER PRIMARY KEY NOT NULL,value INTEGER, times INTEGER);''')

print("create ok")

for i in range(1,_DATA_CNT):
    rd_num = random.randrange(0,_MAX_VALUE,1)
    #print(rd_num)
    cur.execute('''INSERT INTO ramdon_data(value) VALUES
        (%s);''' % rd_num)

conn.commit();

print("data ready")


cur.execute('''SELECT * FROM ramdon_data;''')
ans = cur.fetchall()

for d in ans:
    #print(d)
    value = d[1];
    times = got_cnt(d[1]) + 1
    if times is 1:
        cur.execute('''INSERT INTO 
            sorted(value,times) VALUES(%s,%s);''' % (value,times))
    else:
        cur.execute('''UPDATE sorted 
            SET times=%s WHERE value=%s;''' % (times,value))

conn.commit();

print("calc ready")


cur.execute('''SELECT * FROM sorted ORDER BY times;''');
ans = cur.fetchall();
print('''


    len is %s''' % len(ans))
for d in ans:
    print(d)

conn.close();



加了sqlite之后,慢多了 100w数据 30s。。。 当然 跟我毫无节操的读写数据有关系。无论如何。sqlite和python以及sql算是入门了



你可能感兴趣的:(sqlite,python,sort)