1、在Mysql单表上操作
# mysql 单表
select * from
(select uid,sum(gift_point) gift_point
from gift_record_61 where gift_id in(28,29) group by uid)x
order by 2 desc
limit 10
2、在Hive汇总表上计算
hive上的表,是将Mysql上的各分表数据进行合并;数据体量大。
#hive
select uid,gift_point from
(select uid,sum(gift_point) gift_point
from data_chushou_gift_record where gift_id in(28,29) group by uid)x
order by gift_point desc
limit 10;
-- resualt:
uid,gift_point
178236720
11013400
577484623
5305200
367625864
3402100
1126170673
2917400
282417747
2567800
1036752959
1876800
406271
1809900
1094468906
1648500
995497862
1623800
53104325
1592100
3、利用Python从Mysql各分表取数据再进行合并计算
/Users/nisj/PycharmProjects/EsDataProc/Mysql_submete_Data_Group_sort_Limit.py
# -*- coding=utf-8 -*-
import smtplib
import MySQLdb
import warnings
import datetime
import os
import os.path
import shutil
from itertools import groupby
from operator import itemgetter
warnings.filterwarnings("ignore")
db_config = {
'host': 'MysqlIp',
'user': 'MysqlUser',
'passwd': 'MysqlPasswd',
'port': 3306,
'db': 'jellyfish_server'
}
def getDB():
try:
conn = MySQLdb.connect(host=db_config['host'], user=db_config['user'], passwd=db_config['passwd'],
port=db_config['port'])
conn.autocommit(True)
curr = conn.cursor()
curr.execute("SET NAMES utf8");
curr.execute("USE %s" % db_config['db']);
return conn, curr
except MySQLdb.Error, e:
print "Mysql Error %d: %s" % (e.args[0], e.args[1])
return None, None
conn, curr = getDB()
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
tomorrow = today + datetime.timedelta(days=1)
sql_text = "select table_name from information_schema.tables where table_schema='jellyfish_server' and table_name like 'gift_record%';"
curr.execute(sql_text)
tab_list = curr.fetchall()
col_datas_union = []
for tab_name in tab_list:
tab_name = tab_name[0]
sql_text = "select uid,gift_point from jellyfish_server.%s where gift_id in(28,29);" % (tab_name)
curr.execute(sql_text)
col_datas = curr.fetchall()
# col_datas_union = list(set(col_datas_union).(set(col_datas))) #去重UNION
col_datas_union.extend(col_datas)
# col_datas_union.append(col_datas) #append与extend的区别
group_col_datas = groupby(sorted(col_datas_union, key=itemgetter(0)), itemgetter(0))
# group_col_datas = groupby(sorted(col_datas, key=itemgetter(0)), itemgetter(0))
result_list = []
rl = []
for key, item in group_col_datas:
i = 0
j = 0
for jtem in item:
i += float(jtem[1])
j += 1
# print key, i, j
result_list = (key, i, j)
rl.append(result_list)
rl.sort(key=lambda x:x[1],reverse=True)
print rl[0:10]
curr.close()
conn.close()
数据结果:
[(178236720L, 11013400.0, 28486), (577484623L, 5305200.0, 7764), (367625864L, 3402100.0, 9820), (1126170673L, 2917400.0, 12218), (282417747L, 2567800.0, 4852), (1036752959L, 1876800.0, 18165), (406271L, 1809900.0, 4680), (1094468906L, 1648500.0, 3903), (995497862L, 1623800.0, 14015), (53104325L, 1592100.0, 3699)]