数据链接:https://pan.baidu.com/s/1zzKSJJEhYr20aUtWSPgQWQ
提取码:1234
#导入相关库并读取数据
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = pd.read_csv("D:/天池竞赛/用户情感分析/earphone_sentiment.csv",encoding="gbk")
data
#词云
from wordcloud import WordCloud
import jieba
from PIL import Image
#对列表中每条评论数据进行处理,去掉停用词和符号
from string import punctuation
add_punc=',。、【 】 “”:;()《》‘’{}?!⑦()、%^>℃:.”“^-——=@¥'
stop_words={'你','我','的','了','人','都','和','在','不','比','就','但','也','是','有','吧','很','还','啊','个','说','会','去','用','这','就是','但是','还是','还有','不是','现在','的话','觉得','不过','只是','因为','什么','如果','而且','森林','看看','没有','等','没','要','那','所以','自己','看过','这个','知道','一个','或者','后','吗','看'}
stop=add_punc+punctuation+str(stop_words)
list = data["content"].tolist()
ls = []
for i in list:
words = jieba.lcut(i)
for s in words:
if s.strip() in stop:
pass
else:
ls.append(s.strip())
#导入地图形状,构造词云
mask=np.array(Image.open("D:/天池竞赛/用户情感分析/map.jpg"))
wc =WordCloud(
scale=6,
font_path='simhei.ttf',
background_color='white',
max_font_size=500,
min_font_size=5,
max_words=400,
collocations=False,
font_step=1,
mask=mask
)
s=wc.generate(' '.join(ls))
fig,axes = plt.subplots(figsize=(18,18))
plt.axis('off')
plt.imshow(s)
wc.to_file('wc.jpg')
#柱状图
plt.figure(figsize=(20,8))
data['subject'].value_counts().plot.bar()
plt.figure(figsize=(20,8))
data['sentiment_word'].value_counts().plot.bar()
plt.figure(figsize=(10,8))
#将1,0,-1分别进行替换
data['sentiment_value'].replace(1,"好评",inplace=True)
data['sentiment_value'].replace(-1,"差评",inplace=True)
data['sentiment_value'].replace(0,"未填写或中评",inplace=True)
data['sentiment_value'].value_counts().plot.bar()
#相关性系数热力图
import seaborn as sns
one_hot_1 = pd.get_dummies(data["subject"])
plt.figure(figsize=(15,15))
sns.heatmap(one_hot_1.corr(),cmap='YlGnBu')
one_hot_2 = pd.get_dummies(data["sentiment_word"])
plt.figure(figsize=(15,15))
sns.heatmap(one_hot_2.corr(),cmap='YlGnBu')
one_hot_3 = pd.get_dummies(data['sentiment_value'])
plt.figure(figsize=(10,10))
data['sentiment_value'].replace(1,"好评",inplace=True)
data['sentiment_value'].replace(-1,"差评",inplace=True)
data['sentiment_value'].replace(0,"未填写或中评",inplace=True)
sns.heatmap(one_hot_3.corr(),cmap='YlGnBu')