Python数据可视化---线状图、柱状图、箱型图

词云:

import jieba
from imageio import imread
from numpy import unicode
from wordcloud import  WordCloud,ImageColorGenerator
import  matplotlib.pyplot as plt
jieba.load_userdict("stoplist.txt")
"""seg_list = jieba.cut("我来到北京清华大学", cut_all=True)
print("Full Mode: " + "/ ".join(seg_list)) # 全模式

seg_list = jieba.cut("我来到北京清华大学", cut_all=False)
print("Default Mode: " + "/ ".join(seg_list)) # 精确模式

seg_list = jieba.cut("他来到了网易杭研大厦") # 默认是精确模式
print(", ".join(seg_list))

seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造") # 搜索引擎模式
print(", ".join(seg_list))"""
#***************************-------test***************
back_color=imread('girl.jpg')                     #导入背景图片

wc=WordCloud(background_color='white',           #背景yanse
             max_words=100,                      #允许最大词数
             mask=back_color,                   #忽略width和height
             max_font_size=100,                 #显示字体的最大值
             font_path="D:\\pythonProject2\\simhei.ttf",    #解决显示口字型乱码问题
             random_state=42,                   #为每个词返回一个PIL颜色
             )
text=open('data_m_content.txt').read()   #打开词源文本

def stop_words(texts):
    words_list=[]
    #for i in range(0,len(texts)):
    word_generator=jieba.cut_for_search(texts)   #分词
    with open('stoplist.txt',encoding='utf-8') as f:
        str_text=f.read()
        unicode_text=unicode(str_text)
        f.close()
    for word in word_generator:
        if word.strip() not in unicode_text:        #去除停用词
            words_list.append(word)
    return ' '.join(words_list)

text=stop_words(text)

wc.generate(text)

image_colors=ImageColorGenerator(back_color)        #基于彩色图像生成相应彩色
plt.imshow(wc)                                      #显示图片
plt.axis('off')                                     #关闭坐标轴
plt.figure()                                            #绘制词云
plt.imshow(wc.recolor(color_func=image_colors))
plt.axis('off')
wc.to_file('data_m.png')                            #保存图片

Python数据可视化---线状图、柱状图、箱型图_第1张图片
线状图:

import matplotlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
path="D:\\pythonProject2\\all.csv"
df=pd.read_csv(path)
df.dropna()
data1=[]

matplotlib.rcParams['font.sans-serif'] = ['SimHei']
matplotlib.rcParams['font.family']='sans-serif'                                         #设置中文问题
#解决负号'-'显示为方块的问题
matplotlib.rcParams['axes.unicode_minus'] = False


for x in range(0,len(df['times'])):

    if df["times"][x][9:11]==' ':
        data1.append('blank')
    else:
        data1.append(df["times"][x][9:11].strip(":"))


df['times']=data1
df.value_counts()
path1="D:\pythonProject2\content_hour.txt"
#count={}
with open(path1,encoding='utf-8') as f:
    lines=f.readlines()
    #print(lines)
    for i in range(0,len(lines)):
        lines[i]=lines[i].strip(' ')
        if  len(lines[i])<3:
            lines[32]='no        8'
    #pd1=pd.DataFrame(lines)
#print(pd1)

counts={'0':0,'1':0,'2':0,'3':0,'4':0,'5':0,'6':0,'7':0,'8':0,'9':0,'10':0,'11':0,'12':0,
        '13':0,'14':0,'15':0,'16':0,'17':0,'18':0,'19':0,'20':0,'21':0,'22':0,'23':0}

data3=[]
flag=0


def count(flag):
    a=0
    data2 = []
    for i in range(0,len(lines)):
        #print(lines[i][0:2])
        if lines[i][0:2]=='no':
            continue
        if eval(lines[i][0:2])==flag:

            #print(eval(lines[i][2:-1]))
            data2.append(eval(lines[i][2:]))
        #print(data2)
    for n in range(0,len(data2)):
        a=a+data2[n]


    return a
            #eval(lines[i][2:])
#print(lines)
#print(count(3))
for x in range(0,24):
    data3.append(count(x))
print(data3)

df1=pd.DataFrame(index=counts,columns=['counts'] )
df1['counts']=data3

plt.plot(df1)

for m, n in zip(df1.index,df1['counts']):
    plt.text(m,n,n,ha='center',va='bottom',fontsize=8)

#print(df1)
#print(count(1))
#print(data2)
#print(data2)
#plt.xticks(rotation=90)
plt.xlabel("时间(小时)")
plt.ylabel("评论(数量)")
plt.show()

Python数据可视化---线状图、柱状图、箱型图_第2张图片
柱状图:

import matplotlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from numpy import unicode
import jieba

pd.set_option('display.max_rows',None)

path="D:\\pythonProject2\\all.csv"
df=pd.read_csv(path)
df.dropna()
data1=[]


matplotlib.rcParams['font.sans-serif'] = ['SimHei']
matplotlib.rcParams['font.family']='sans-serif'                                         #设置中文问题
#解决负号'-'显示为方块的问题
matplotlib.rcParams['axes.unicode_minus'] = False

df1=df['citys'].value_counts().drop(['blank'])     #blank是我清洗数据时给城市为空的赋值,因为也许其他列还有用所以没有直接删
df2=pd.DataFrame(df1[:10])

df2.plot(kind='bar')


#plt.plot(df2)


#print(df1)
#print(data2)
#print(data2)
#plt.xticks(rotation=90)
plt.xlabel("城市")
plt.ylabel("评论(数量)")
plt.xticks(rotation=45)
plt.show()


Python数据可视化---线状图、柱状图、箱型图_第3张图片
箱型图:

import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import pandas as pd
from bokeh.plotting import figure, show, output_file
#北京      828
#上海      495
#广东广州    144
#浙江杭州

matplotlib.rcParams['font.sans-serif'] = ['SimHei']
matplotlib.rcParams['font.family']='sans-serif'                                         #设置中文问题
#解决负号'-'显示为方块的问题
matplotlib.rcParams['axes.unicode_minus'] = False

data1=[]
data2=[]
data3=[]
data4=[]

df= pd.read_csv('all.csv')
#df1=df['citys'].value_counts().drop(['blank'])     #blank是我清洗数据时给城市为空的赋值,因为也许其他列还有用所以没有直接删
#df2=pd.DataFrame(df1[:4])
for x in range(0,len(df['scores'])): # 再次清洗去除列表数据中的''
    if df['citys'][x]=='北京':
        data1.append(float(df['scores'][x])/50)
    if df['citys'][x]=='上海':
        data2.append(float(df['scores'][x]) / 50)
    if df['citys'][x] == '广东广州':
        data3.append(float(df['scores'][x]) / 50)
    if df['citys'][x]=='浙江杭州':
        data4.append(float(df['scores'][x]) / 50)
"""for y in range(0,len(data1)):
    if len(data2)
print(data3)
df1=pd.DataFrame({'北京':data1})
df2=pd.DataFrame({'上海':data2})
df3=pd.DataFrame({'广东广州':data3})
df4=pd.DataFrame({'浙江杭州':data4})
print(df3)
plt.figure(figsize=(10,4))
# 创建图表、数据
def draw(df1):
    f = df1.boxplot(return_type='dict')
    plt.title('城市')


    for box in f['boxes']:
        box.set( color='b', linewidth=1)        # 箱体边框颜色
        box.set( alpha=0.5)    # 箱体内部填充颜色
    for whisker in f['whiskers']:
        whisker.set(color='k', linewidth=0.5,linestyle='-')
    for cap in f['caps']:
        cap.set(color='gray', linewidth=2)
    for median in f['medians']:
        median.set(color='DarkBlue', linewidth=2)
    for flier in f['fliers']:
        flier.set(marker='o', color='y', alpha=0.5)
        plt.show()
draw(df3)
# boxes, 箱线
# medians, 中位值的横线,
# whiskers, 从box到error bar之间的竖线.
# fliers, 异常值
# caps, error bar横线
# means, 均值的横线

Python数据可视化---线状图、柱状图、箱型图_第4张图片
Python数据可视化---线状图、柱状图、箱型图_第5张图片
Python数据可视化---线状图、柱状图、箱型图_第6张图片
Python数据可视化---线状图、柱状图、箱型图_第7张图片

你可能感兴趣的:(Python的基础学习,自己写的小东西,爬虫)