利用python进行QQ聊天分析

import re
import jieba
import pandas as pd
import numpy as np
from datetime import datetime
from snownlp import SnowNLP
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from datetime import timedelta


# 解析聊天记录文件
def parse_chat_records(file_path):
    with open(file_path, encoding='utf-8') as f:
        content = f.read()
    all_messages = re.findall(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) (.*?)\n(.*?)\n', content)

    # 筛选2023年之后的聊天记录
    messages = []
    for date_time, user, message in all_messages:
        year = int(date_time[:4])
        if year >= 2023:
            messages.append((date_time, user, message))

    return messages

# 情感分析
def sentiment_analysis(messages):
    sentiment_scores = []
    for _, _, message in messages:
        if message.strip():  # 检查消息是否为空
            sentiment_scores.append(SnowNLP(message).sentiments)
    return sentiment_scores

# 计算时间差值
def time_differences(messages):
    diffs = []
    for i in range(1, len(messages)):
        time1 = datetime.strptime(messages[i-1][0], '%Y-%m-%d %H:%M:%S')
        time2 = datetime.strptime(messages[i][0], '%Y-%m-%d %H:%M:%S')
        delta = time2 - time1

        # 检查时间差是否小于或等于1天
        if delta <= timedelta(hours=8):
            diffs.append(delta.seconds)
    return diffs

# 词频统计
def word_frequency(messages, stopwords_path=None):
    words = []

    # 读取停用词文件
    stopwords = set()
    if stopwords_path:
        with open(stopwords_path, encoding='utf-8') as f:
            for line in f:
                stopwords.add(line.strip())

    for _, _, message in messages:
        for word in jieba.cut(message):
            # 过滤掉长度为1的词汇和停用词
            if len(word) > 1 and word not in stopwords and word not in ['图片', '表情', '这些', '那些', '就是', '那个', '之前', '一个', '现在']:
                words.append(word)
    counter = Counter(words)
    return counter.most_common()

# 生成词云
def generate_wordcloud(words, file_path):
    if len(words) == 0:
        print('没有足够的词汇生成词云。')
        return

    wc = WordCloud(font_path='simhei.ttf', background_color='white', width=1920, height=1080)
    wc.generate_from_frequencies(dict(words))
    plt.imshow(wc, interpolation='bilinear')
    plt.axis('off')
    plt.savefig(file_path)
# 生成圆饼图
def generate_pie_chart(labels, values, file_path):
    plt.pie(values, labels=labels, autopct='%1.1f%%')
    plt.savefig(file_path)
    
    
# 主程序
def analyze_chat_records(file_path):
    messages = parse_chat_records(file_path)
    sentiment_scores = sentiment_analysis(messages)
    time_diffs = time_differences(messages)
    words = word_frequency(messages)
    generate_wordcloud(words, 'wordcloud.png')
    
        

    # 输出结果
    print('情感分析(平均分):', np.mean(sentiment_scores))
    print('每次对话之间的时间差值(平均秒数):', np.mean(time_diffs))
    print('词频统计(前10):', words[:10])



if __name__ == '__main__':
    analyze_chat_records('test.txt')

你可能感兴趣的:(python,python,开发语言)