网易云歌曲情感分析系统可视化——完结

目录

  • 一、界面展示
    • 1.主界面
    • 2.词云图
    • 3.用户地区分布图
    • 4.评论情感得分
    • 5.评论数据浏览
    • 6.评论时间分布图
    • 7.用户信息浏览
  • 二、代码结构展示
    • 1.GUI代码整体
  • 三、核心代码
    • 1.用户爬虫代码
    • 2.评论爬虫代码
    • 3.GUI核心代码
    • 4.传送门:[待加载...](http://www.baidu.com/)

一、界面展示

1.主界面

网易云歌曲情感分析系统可视化——完结_第1张图片

2.词云图

网易云歌曲情感分析系统可视化——完结_第2张图片

3.用户地区分布图

网易云歌曲情感分析系统可视化——完结_第3张图片

4.评论情感得分

网易云歌曲情感分析系统可视化——完结_第4张图片

5.评论数据浏览

网易云歌曲情感分析系统可视化——完结_第5张图片

6.评论时间分布图

网易云歌曲情感分析系统可视化——完结_第6张图片

7.用户信息浏览

网易云歌曲情感分析系统可视化——完结_第7张图片

二、代码结构展示

代码采用模块化处理,每个脚本可以分别运行,也可以使用可视化进行辅助运行

1.GUI代码整体

网易云歌曲情感分析系统可视化——完结_第8张图片

重要包版本:
pyecharts0.5.5
jinja2
3.0.3

三、核心代码

1.用户爬虫代码

# -*- coding:utf8 -*-
from urllib import request
import json
import pymysql
import re

ROOT_URL = 'https://music.163.com/api/v1/user/detail/'
DATABASE = ****
TABLE_USERS = ****
TABLE_COMMENTS = ****
PATTERN = re.compile(r'[\n\t\r\/]')

def getData_user(url):
    if not url:
        return None
    print('Crawling>>> ' + url)
    headers = {
        "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36 Edg/99.0.1150.46',
    }
    try:
        req = request.Request(url, headers=headers)
        content = request.urlopen(req).read().decode("utf-8")
        js = json.loads(content)
        data = {}
        if js['code'] == 200:
            data['userId'] = js['profile']['userId']
            data['userName'] = js['profile']['nickname']
            data['avatar'] = js['profile']['avatarUrl']
            data['gender'] = js['profile']['gender']
            if int(js['profile']['birthday'])<0:
                data['age'] = 0
            else:
                data['age'] =(2018-1970)-(int(js['profile']['birthday'])//(1000*365*24*3600))
            if int(data['age'])<0:
                data['age'] = 0
            data['level'] = js['level']
            data['sign'] = PATTERN.sub(' ', js['profile']['signature'])
            data['eventCount'] = js['profile']['eventCount']
            data['followCount'] = js['profile']['follows']
            data['fanCount'] = js['profile']['followeds']
            data['city'] = js['profile']['city']
            data['recordCount'] = js['listenSongs']
            return data
    except Exception as e:
        print('Down err>>> ', e)
        return None

def saveData_user(data):
    if not data:
        return None
    conn = pymysql.connect(host='localhost', user='root', passwd='qwer', db=DATABASE, charset='utf8mb4') # 注意字符集要设为utf8mb4,以支持存储签名中的emoji表情
    cursor = conn.cursor()
    sql = 'insert into ' + 'users' + '(id,userName,gender,age,level,city,sign,eventCount,followsCount,followedCount,recordCount,avatar,userId) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
    try:
        # cursor.execute('SELECT max(id) FROM '+TABLE_USERS)
        # id_ = cursor.fetchone()[0]
        # cursor.execute(sql)
        cursor.execute(sql,(0,data['userName'],data['gender'],data['age'],data['level'],data['city'],data['sign'],data['eventCount'],data['followCount'],data['fanCount'],data['recordCount'],data['avatar'],data['userId']))
        conn.commit()
    except Exception as e:
        print('mysql err>>> ',data['userId'],e)
        pass
    finally:
        cursor.close()
        conn.close()

def getID_user():
    conn = pymysql.connect(host='localhost', user='root', passwd='qwer', db=DATABASE, charset='utf8mb4')
    cursor = conn.cursor()
    sql = 'SELECT userId FROM '+TABLE_COMMENTS
    try:
        cursor.execute(sql)
        res = cursor.fetchall()
        return res
    except Exception as e:
        print('get err>>> ', e)
        pass
    finally:
        cursor.close()
        conn.close()
    return None

if __name__ == '__main__':
    usersID = getID_user()
    for i in usersID:
        data = getData_user(ROOT_URL+i[0].strip())
        saveData_user(data)

2.评论爬虫代码

# -*- coding:utf8 -*-
from urllib import request
import json
import pymysql
from datetime import datetime
import re

ROOT_URL = 'http://music.163.com/api/v1/resource/comments/R_SO_4_%s?limit=%s&offset=%s'
LIMIT_NUMS = 50  # 每页限制爬取数
DATABASE = ****  # 数据库名
TABLE = ****  # 数据库表名
PATTERN = re.compile(r'[\n\t\r\/]')  


def getData_com(url):
    if not url:
        return None, None
    headers = {
        "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36',
        "Host": "music.163.com",
    }
    print('Crawling>>> ' + url)
    try:
        req = request.Request(url, headers=headers)
        content = request.urlopen(req).read().decode("utf-8")
        js = json.loads(content)
        total = int(js['total'])
        datas = []
        for c in js['comments']:
            data = dict()
            data['commentId'] = c['commentId']
            data['content'] = PATTERN.sub('', c['content'])
            data['time'] = datetime.fromtimestamp(c['time'] // 1000)
            data['likedCount'] = c['likedCount']
            data['userId'] = c['user']['userId']
            datas.append(data)
        return total, datas
    except Exception as e:
        print('Down err>>> ', e)
        pass


def saveData_com(data):
    if not data:
        return None
    conn = pymysql.connect(host='localhost', user='root', passwd='qwer', db='wangyiyun',
                           charset='utf8mb4')  # 注意字符集要设为utf8mb4,以支持存储评论中的emoji表情
    cursor = conn.cursor()
    sql = 'insert into ' + TABLE + ' (id,commentId,content,likedCount,time,userId) VALUES (%s,%s,%s,%s,%s,%s)'
    for d in data:
        try:
            cursor.execute('SELECT max(id) FROM ' + TABLE)
            # id_ = cursor.fetchone()[0]
            cursor.execute(sql, (0,d['commentId'], d['content'], d['likedCount'], d['time'], d['userId']))
            conn.commit()
        except Exception as e:
            print('mysql err>>> ', d['commentId'], e)
            pass

    cursor.close()
    conn.close()


if __name__ == '__main__':
    songId = input('歌曲ID:').strip()
    total, data = getData_com(ROOT_URL % (songId, LIMIT_NUMS, 0))
    saveData_com(data)
    if total:
        for i in range(1, total // num + 1):  #num页数
            _, data = getData_com(ROOT_URL % (songId, LIMIT_NUMS, i * (LIMIT_NUMS)))
            saveData_com(data)

3.GUI核心代码

# -*- codeing = utf-8 -*-
import re
import sys
import time
import pandas as pd
import pymysql
import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from snownlp import SnowNLP
from pyecharts import Bar, Pie, Line, Scatter, Map
import pymysql
from PyQt5.QtCore import QUrl, QFileInfo
from PyQt5.QtCore import *
from PyQt5.QtWidgets import *
from ui_main import Ui_MainWindow
from PyQt5.QtWidgets import *
from PyQt5 import QtGui, QtWidgets
from PyQt5.QtWidgets import QApplication, QMainWindow, QHeaderView
from ui_show_users import Ui_Form2
from ui_show_comments import Ui_Form
from comment import *
from users import *
from PyQt5.QtWebEngineWidgets import *
plt.style.use('ggplot')
plt.rcParams['axes.unicode_minus'] = False
ROOT_URL_com = 'http://music.163.com/api/v1/resource/comments/R_SO_4_%s?limit=%s&offset=%s'
ROOT_URL = 'https://music.163.com/api/v1/user/detail/'
LIMIT_NUMS = 50  # 每页限制爬取数
HOST='localhost'
PORT=3306
USER='root'
PWD='qwer'
DATABASE = 'wangyiyun'  # 数据库名
TABLE = 'comments'  # 数据库表名
TABLE_USERS = 'users'
TABLE_COMMENTS = 'comments'
PATTERN = re.compile(r'[\n\t\r\/]')
class mainwindow(QMainWindow):

# 初始化
    def __init__(self):
        super().__init__()
        # 实例化一个 Ui_MainWindow对象
        self.ui=Ui_MainWindow()
        self.ui.setupUi(self)
        # 这里使用的是 self.show(),和之后的区分一下
        self.show()
        # 实例化状态栏
        self.statusBar = QStatusBar()
        # 设置状态栏,类似布局设置
        self.setStatusBar(self.statusBar)
        self.ui.pushButton.clicked.connect(self.p_comments)
        self.ui.pushButton_2.clicked.connect(self.p_users)
        self.ui.pushButton_4.clicked.connect(self.show_comments)
        self.ui.pushButton_3.clicked.connect(self.show_users)
        self.ui.pushButton_5.clicked.connect(self.show_com_day)
        self.ui.pushButton_6.clicked.connect(self.show_com_week)
        self.ui.pushButton_9.clicked.connect(self.show_com_hour)
        self.ui.pushButton_10.clicked.connect(self.show_user_city)
        self.ui.pushButton_11.clicked.connect(self.show_user_age)
        self.ui.pushButton_12.clicked.connect(self.show_user_emotion)
        self.ui.pushButton_13.clicked.connect(self.show_emotion_label)
        self.ui.pushButton_14.clicked.connect(self.show_could)
        self.ui.pushButton_15.clicked.connect(self.del_all_data)
        self.ui.pushButton_16.clicked.connect(self.admin_ok)

# 管理员权限确认
    def admin_ok(self):
        self.ui.pushButton_15.setEnabled(True)
        self.ui.pushButton_16.setEnabled(False)

# 初始化数据库
    def del_all_data(self):
        self.connect_mysql()
        sql1 = "delete from users"
        sql2="delete from comments"
        self.cur.execute(str(sql1))
        self.cur.execute(str(sql2))
        self.conn.close()
        self.cur.close()
        self.status_msg('全部数据已清空!',10000)

# 用户地区分布分析
# 城市code编码转换
    def city_group(self,cityCode):
        city_map = {
            '11': '北京',
            '12': '天津',
            '31': '上海',
            '50': '重庆',
            '5e': '重庆',
            '81': '香港',
            '82': '澳门',
            '13': '河北',
            '14': '山西',
            '15': '内蒙古',
            '21': '辽宁',
            '22': '吉林',
            '23': '黑龙江',
            '32': '江苏',
            '33': '浙江',
            '34': '安徽',
            '35': '福建',
            '36': '江西',
            '37': '山东',
            '41': '河南',
            '42': '湖北',
            '43': '湖南',
            '44': '广东',
            '45': '广西',
            '46': '海南',
            '51': '四川',
            '52': '贵州',
            '53': '云南',
            '54': '西藏',
            '61': '陕西',
            '62': '甘肃',
            '63': '青海',
            '64': '宁夏',
            '65': '新疆',
            '71': '台湾',
            '10': '其他',
            '0': '其他',
        }

        return city_map[cityCode[:2]]

# 评论爬虫机制
    def p_comments(self):
        try:
            songId = self.ui.lineEdit.text().strip()
            if songId == '':
                return 0
            else:
                total, data = getData_com(ROOT_URL_com % (songId, LIMIT_NUMS, 0))
                saveData_com(data)
                num=self.ui.lineEdit_2.text()
                if num == '':
                    num=10
                if total:
                    for i in range(1, total // int(num) + 1):
                        _, data = getData_com(ROOT_URL_com % (songId, LIMIT_NUMS, i * (LIMIT_NUMS)))
                        saveData_com(data)
        except Exception as e:
            print(str(e))
            self.status_msg(str(e),5000)

# 用户信息爬虫
    def p_users(self):
        try:
            usersID = getID_user()
            for i in usersID:
                data = getData_user(ROOT_URL + i[0].strip())
                saveData_user(data)
        except Exception as e:
            print(str(e))
            self.status_msg(str(e),5000)

# 评论信息展示
    def show_comments(self):
        form = QDialog()
        self.ui2 = Ui_Form()
        self.ui2.setupUi(form)
        form.setWindowModality(Qt.NonModal)  # 非模态,可与其他窗口交互
        # form.setWindowModality(Qt.WindowModal)  # 窗口模态,当前未处理完,阻止与父窗口交互
        # form.setWindowModality(Qt.ApplicationModal)  # 应用程序模态,阻止与任何其他窗口交互
        sql = "select * from comments"
        self.connect_mysql()
        try:
            self.ui2.tableWidget.clearContents()
            self.cur.execute(str(sql))
            data = self.cur.fetchall()
            x = 0
            for i in data:
                y = 0
                for j in i:
                    self.ui2.tableWidget.setItem(x, y, QtWidgets.QTableWidgetItem(str(data[x][y])))
                    y = y + 1
                x = x + 1
        except Exception as e:
            print(e)
        self.conn.close()
        self.cur.close()
        form.show()
        QApplication.processEvents()
        form.exec_()

# 用户信息展示
    def show_users(self):
        form = QDialog()
        self.ui3 = Ui_Form2()
        self.ui3.setupUi(form)
        form.setWindowModality(Qt.NonModal)  # 非模态,可与其他窗口交互
        # form.setWindowModality(Qt.WindowModal)  # 窗口模态,当前未处理完,阻止与父窗口交互
        # form.setWindowModality(Qt.ApplicationModal)  # 应用程序模态,阻止与任何其他窗口交互
        sql = "select * from users"
        self.connect_mysql()
        try:
            self.ui3.tableWidget.clearContents()
            self.cur.execute(str(sql))
            data = self.cur.fetchall()
            x = 0
            for i in data:
                y = 0
                for j in i:
                    self.ui3.tableWidget.setItem(x, y, QtWidgets.QTableWidgetItem(str(data[x][y])))
                    y = y + 1
                x = x + 1
        except Exception as e:
            print(e)
        self.conn.close()
        self.cur.close()
        form.show()
        QApplication.processEvents()
        form.exec_()

# 评论时间分布图(天)展示
    def show_com_day(self):
        self.conn = pymysql.connect(host=HOST, user=USER, passwd=PWD, db=DATABASE, charset='utf8mb4')
        self.sql_users = 'SELECT id,gender,age,city FROM ' + TABLE_USERS
        self.sql_comments = 'SELECT id,time FROM ' + TABLE_COMMENTS
        self.comments = pd.read_sql(self.sql_comments, con=self.conn)
        self.users = pd.read_sql(self.sql_users, con=self.conn)
        # 评论时间(按天)分布分析
        comments_day = self.comments['time'].dt.date
        data = comments_day.index.groupby(comments_day)
        data_key = []
        data_val = []
        for i in data:
            p = 0
            data_key.append(i)
            for j in data[i]:
                p += 1
            data[i] = p
            data_val.append(p)
        print(type(data), data_key, data_val)
        line = Line('评论时间(按天)分布')
        line.use_theme('dark')
        line.add(
            '',
            data_key,
            data_val,
            is_fill=True,
        )
        line.render(r'./output/com_day.html')
        self.conn.close()
        self.img_show(r'./output/com_day.html')

# 评论时间分布图(周)展示
    def show_com_week(self):
        self.conn = pymysql.connect(host=HOST, user=USER, passwd=PWD, db=DATABASE, charset='utf8mb4')
        self.sql_users = 'SELECT id,gender,age,city FROM ' + TABLE_USERS
        self.sql_comments = 'SELECT id,time FROM ' + TABLE_COMMENTS
        self.comments = pd.read_sql(self.sql_comments, con=self.conn)
        self.users = pd.read_sql(self.sql_users, con=self.conn)
        # 评论时间(按周)分布分析
        comments_week = self.comments['time'].dt.dayofweek
        data = comments_week.index.groupby(comments_week)
        data_key = []
        data_val = []
        for i in data:
            p = 0
            data_key.append(i)
            for j in data[i]:
                p += 1
            data[i] = p
            data_val.append(p)
        print(data_key, data_val)
        line = Line('评论时间(按周)分布')
        line.use_theme('dark')
        line.add(
            '',
            data_key,
            data_val,
            is_fill=True,
        )
        line.render(r'./output/com_week.html')
        self.conn.close()
        self.img_show(r'./output/com_week.html')

# 评论时间分布图(时)展示
    def show_com_hour(self):
        self.conn = pymysql.connect(host=HOST, user=USER, passwd=PWD, db=DATABASE, charset='utf8mb4')
        self.sql_users = 'SELECT id,gender,age,city FROM ' + TABLE_USERS
        self.sql_comments = 'SELECT id,time FROM ' + TABLE_COMMENTS
        self.comments = pd.read_sql(self.sql_comments, con=self.conn)
        self.users = pd.read_sql(self.sql_users, con=self.conn)

        # 评论时间(按小时)分布分析
        comments_hour = self.comments['time'].dt.hour
        data = comments_hour.index.groupby(comments_hour)
        data_key = []
        data_val = []
        for i in data:
            p = 0
            data_key.append(i)
            for j in data[i]:
                p += 1
            data[i] = p
            data_val.append(p)
        print(data_key, data_val)
        line = Line('评论时间(按小时)分布')
        line.use_theme('dark')
        line.add(
            '',
            data_key,
            data_val,
            is_fill=True,
        )
        line.render(r'./output/com_hour.html')
        self.conn.close()
        self.img_show(r'./output/com_hour.html')

# 用户所在城市图展示
    def show_user_city(self):
        self.conn = pymysql.connect(host=HOST, user=USER, passwd=PWD, db=DATABASE, charset='utf8mb4')
        self.sql_users = 'SELECT id,gender,age,city FROM ' + TABLE_USERS
        self.sql_comments = 'SELECT id,time FROM ' + TABLE_COMMENTS
        self.comments = pd.read_sql(self.sql_comments, con=self.conn)
        self.users = pd.read_sql(self.sql_users, con=self.conn)
        city = self.users['city'].apply(self.city_group)
        data = city.index.groupby(city)
        data_key = []
        data_val = []
        for i in data:
            p = 0
            data_key.append(i)
            for j in data[i]:
                p += 1
            data[i] = p
            data_val.append(p)
        print(data_key, data_val)
        map_ = Map('用户地区分布图')
        map_.add(
            '',
            data_key,
            data_val,
            maptype='china',
            is_visualmap=True,
            visual_text_color='#000',
            is_label_show=True,
        )
        map_.render(r'./output/user_city.html')
        self.conn.close()
        self.img_show(r'./output/user_city.html')

# 用户年龄分布图展示
    def show_user_age(self):
        self.conn = pymysql.connect(host=HOST, user=USER, passwd=PWD, db=DATABASE, charset='utf8mb4')
        self.sql_users = 'SELECT id,gender,age,city FROM ' + TABLE_USERS
        self.sql_comments = 'SELECT id,time FROM ' + TABLE_COMMENTS
        self.comments = pd.read_sql(self.sql_comments, con=self.conn)
        self.users = pd.read_sql(self.sql_users, con=self.conn)
        # 用户年龄分布分析
        age = self.users[self.users['age'] > 0]  # 清洗掉年龄小于1的数据
        data = age.index.groupby(age['age'])
        data_key = []
        data_val = []
        for i in data:
            p = 0
            data_key.append(i)
            for j in data[i]:
                p += 1
            data[i] = p
            data_val.append(p)
        print(data_key, data_val)
        Bar = Line('用户年龄分布')
        Bar.use_theme('dark')
        Bar.add(
            '',
            data_key,
            data_val,
            is_fill=True,
        )
        Bar.render(r'./output/user_age.html')  # 生成渲染的html文件
        self.conn.close()
        self.img_show(r'./output/user_age.html')

# 获取用户评论数据
    def getText(self):
        self.conn = pymysql.connect(host=HOST, user=USER, passwd=PWD, db=DATABASE, charset='utf8')
        sql = 'SELECT id,content FROM ' + TABLE_COMMENTS
        self.text = pd.read_sql(sql, con=self.conn)
        return self.text

# 用户情感分析展示图
    def show_user_emotion(self):
        text=self.getText()
        text['content'] = text['content'].apply(lambda x: round(SnowNLP(x).sentiments, 2))
        semiscore = text.id.groupby(text['content']).count()
        bar = Bar('评论情感得分')
        bar.use_theme('dark')
        bar.add(
            '',
            y_axis=semiscore.values,
            x_axis=semiscore.index.values,
            is_fill=True,
        )
        bar.render(r'./output/user_emotion.html')
        self.conn.close()
        self.img_show(r'./output/user_emotion.html')

# 用户情感标签图展示
    def show_emotion_label(self):
        text=self.getText()
        text['content'] = text['content'].apply(lambda x: round(SnowNLP(x).sentiments, 2))
        text['content_num'] = text['content'].apply(lambda x: 1 if float(x) > 0.5 else -1)
        semilabel = text.id.groupby(text['content_num']).count()
        bar = Bar('评论情感标签')
        bar.use_theme('dark')
        bar.add(
            '',
            y_axis=semilabel.values,
            x_axis=semilabel.index.values,
            is_fill=True,
        )
        bar.render(r'./output/emotion_label.html')
        self.conn.close()
        self.img_show(r'./output/emotion_label.html')

# 词云图展示
    def show_could(self):
        text=self.getText()
        text = ''.join(str(s) for s in text['content'] if s)
        word_list = jieba.cut(text, cut_all=False)
        print(word_list)
        stopwords = [line.strip() for line in open(r'./StopWords.txt', 'r', encoding='utf-8').readlines()]  # 导入停用词
        clean_list = [seg for seg in word_list if seg not in stopwords]  # 去除停用词
        clean_text = ''.join(clean_list)
        # 生成词云
        cloud = WordCloud(
            font_path=r'C:/Windows/Fonts/msyh.ttc',
            background_color='white',
            max_words=800,
            max_font_size=64
        )
        word_cloud = cloud.generate(clean_text)
        # 绘制词云
        plt.figure(figsize=(12, 12))
        plt.imshow(word_cloud)
        plt.axis('off')
        img_name = str(int(time.mktime(time.localtime())))
        plt.savefig("./output/" + img_name + ".png")
        self.conn.close()
        self.img_show("./output/" + img_name + ".png")

# 数据库连接
    def connect_mysql(self):
        try:
            self.conn = pymysql.connect(host=HOST, port=PORT, user=USER, password=PWD, db=DATABASE)
            self.cur = self.conn.cursor()
        except Exception as e:
            self.status_msg('[-]数据库连接错误!<' + str(e) + '>', 5000)
# html文件展示
    def img_show(self,url):
        self.ui.browser = QWebEngineView()
        # 加载本地html
        # 相对路径:file:///D:/spark/PyQT5_Main/Window/html/aa.html
        # 绝对路径:./html/aa.html
        self.ui.browser.load(QUrl(QFileInfo("./"+str(url)).absoluteFilePath()))
        self.ui.browser.show()

# 刷新界面
    def f5_data(self):
        try:
            self.connect_mysql()
            self.renew_table()
            self.status_msg('[+]刷新界面成功!', 5000)
        except Exception as e:
            self.status_msg('[-]刷新界面有误!<' + str(e) + '>', 5000)
        QApplication.processEvents()

# 状态栏消息
    def status_msg(self, msg, time):
        # 设置状态栏的显示文本以及显示时间
        self.statusBar.showMessage(str(msg), int(time))
        QApplication.processEvents()


if __name__=="__main__":
    app=QApplication(sys.argv)
    window=mainwindow()
    sys.exit(app.exec_())


4.传送门:待加载…


有问题评论区告诉我!

你可能感兴趣的:(Pyqt5,网络爬虫,目标检测,情感分析,网络爬虫,可视化,网易云)