番外:QQ群月活跃度Top10

import re
import matplotlib.pyplot as plt
from matplotlib import cm
import numpy as np
import time

'''
QQ群月活跃度排名,聊天记录可从QQ群导出,基于用户发言次数
'''
def compare_time(start_t, end_t):
    s_time = time.mktime(time.strptime(start_t, '%Y-%m-%d'))
    e_time = time.mktime(time.strptime(end_t, '%Y-%m-%d'))
    if float(s_time) >= float(e_time):
        return True
    return False

record_list = []
id_count = {}
with open(r'D:\Program Files\Users\tutu\Desktop\51904457.txt', 'r', encoding='utf-8') as f:
    for line in f.readlines():
        date_ = re.findall(r'\d{4}-\d{2}-\d{2}', line)
        id_ = re.findall(r'\((\d{4,})\)|<(.*?@.*?)>', line)
        nick_name = re.findall(r':\d{2} (.*?)\(|:\d{2} (.*?)<', line)
        if date_ and id_ and nick_name:
            if id_[0][0] != "" and id_[0][0] != "10000":
                record_list.append([date_[0], id_[0][0], nick_name[0][0]])
            elif id_[0][1] != "" and id_[0][1] != "10000":
                record_list.append([date_[0], id_[0][1], nick_name[0][1]])


end_time = '2017-06-01'
for i in record_list:
    if compare_time(i[0], end_time):
        id_count[i[2]] = id_count.setdefault(i[2], 0) + 1

all_nick = sorted(id_count, key=id_count.get, reverse=True)
nick_count = [id_count[n] for n in all_nick]
nick_len = np.arange(10)

color = cm.jet(np.array(nick_count)/max(nick_count))
plt.rc('font', family='STXihei', size=15)
fig = plt.figure(figsize=(8, 6))
plt.barh(nick_len, nick_count[:10], 0.5, color=color)
plt.yticks(nick_len + 0.4, all_nick, rotation=0)
plt.ylabel("人物")
plt.xlabel("活跃度")
plt.title("本群6月活跃Top10")
plt.legend(['活跃度'], loc='upper right')
# 设置背景网格线的颜色,样式,尺寸和透明度
plt.grid(color='#95a5a6', linestyle='--', linewidth=1, axis='y', alpha=0.4)
plt.show()

你可能感兴趣的:(番外:QQ群月活跃度Top10)