对之前所获取的数据源进行数据分析操作
通过读取本地数据源,获取其中省份的相关信息,绘制商家地区分布图,以html格式保存在本地,
浏览器打开可随鼠标移动动态显示地区分布商家数量
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = ''
__author__ = 'jia666666'
"""
from pyecharts.charts import Map
import pyecharts.options as opts
import time
import pandas as pd
# 读取数据
n = '../file/CSV/智能手机' + '-all.csv'
#n = time.strftime("%Y-%m-%d") + "bak.CSV"
data = pd.read_csv(n)
total_data = {}
for item in data['省份']:
#print(item)
if item not in total_data:
#向字典中更新每个省份默认0病例
total_data.update({item:1})
else:
total_data[item]+=1
province=total_data.keys()
num=total_data.values()
list_data=zip(province,num)
#-------------------------------------------------------------------------------------
# 第二步:绘制全国商家地图
#-------------------------------------------------------------------------------------
def map_cn_disease_dis() -> Map:
c = (
Map()
.add('中国', list_data, 'china')
.set_global_opts(
title_opts=opts.TitleOpts(title='全国商家店铺省份分布图'),
visualmap_opts=opts.VisualMapOpts(is_show=True,
split_number=6,
is_piecewise=True, # 是否为分段型
pos_top='center',
pieces=[
{'min': 1000, 'color': '#7f1818'}, #不指定 max
{'min': 400, 'max': 999},
{'min': 200, 'max': 399},
{'min': 100, 'max': 199},
{'min': 10, 'max': 99},
{'min': 0, 'max': 5} ],
),
)
)
return c
#保存html文件
map_cn_disease_dis().render('../file/HTML/全国商家省份分布图.html')
print('文件保存完成')
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import time
import pandas as pd
#----------------------------------------------读取数据----------------------------------------------
# 读取数据
n = '../file/CSV/智能手机' + '-all.csv'
#n = time.strftime("%Y-%m-%d") + "bak.CSV"
data = pd.read_csv(n)
#查看数据维度(行,列)
#print(data.shape)
#取出商品标题,区域,价格,销售四个维度的数据
#data=data[['商品名','价格','销售','省份']]
#对每个标题进行分词,使用jieba分词
#----------------------------------------------s商品名称分词处理----------------------------------------------
import jieba
title=data['商品名']
title_s=[]
#商品名分词
for line in title:
title_cut=jieba.lcut(line)
for i in title_cut:
title_s.append(i)
#print(title_s)
# 导入停用此表
stopwords = [line.strip() for line in open('../file/TXT/StopWords.txt', 'r', encoding='utf-8').readlines()]
#print(stopwords)
# 剔除停用词
title_clean = []
for line in title_s:
if line not in stopwords:
title_clean.append(line)
#print(title_clean)
# 把列表 allwords_clean_dist 转为数据框
df_allwords_clean_dist = pd.DataFrame({
'allwords': title_clean
})
#print(df_allwords_clean_dist)
#
# # 对过滤_去重的词语 进行分类汇总
word_count = df_allwords_clean_dist.allwords.value_counts().reset_index()
word_count.columns = ['word', 'count']
#x[0]: x[1] for x in word_count.head(100).values
#----------------------------------------------词云可视化----------------------------------------------
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import imageio as im
#尺寸大小
plt.figure(figsize=(8, 8))
# 读取图片,用于限制大小
pic = im.imread("../file/PNG/猫.PNG")
#print(pic)
w_c = WordCloud(font_path="simhei.ttf", background_color="black",mask=pic,max_font_size=100, margin=1)
wc = w_c.fit_words({
x[0]: x[1] for x in word_count.head(100).values
})
#显示词云
plt.imshow(wc, interpolation='bilinear')
#坐标刻度隐藏
plt.axis("off")
plt.show()
# 保存到文件
wc.to_file('../file/PNG/01商品名称词云.PNG')
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = ''
__author__ = 'jia666666'
__time__ = '2020/3/24'
"""
import time
import pandas as pd
#----------------------------------------------读取数据----------------------------------------------
# 读取数据
#n = time.strftime("%Y-%m-%d") + "bak.CSV"
n = '../file/CSV/智能手机' + '-all.csv'
data = pd.read_csv(n)
data_sales=data['销量']
#查看数据维度(行,列)
#print(data.shape)
#取出商品标题,区域,价格,销售四个维度的数据
#data=data[['商品名','价格','销售','省份']]
#对每个标题进行分词,使用jieba分词
#----------------------------------------------s商品名称分词处理----------------------------------------------
import jieba
title=data['商品名']
title_s=[]
#商品名分词
for line in title:
title_cut=jieba.lcut(line)
for i in title_cut:
title_s.append(i)
#print(title_s)
# 导入停用此表
stopwords = [line.strip() for line in open('../file/TXT/StopWords.txt', 'r', encoding='utf-8').readlines()]
#print(stopwords)
# 剔除停用词
title_clean = []
for line in title_s:
if line not in stopwords:
title_clean.append(line)
#print(title_clean)
# 把列表 allwords_clean_dist 转为数据框
df_allwords_clean_dist = pd.DataFrame({
'allwords': title_clean
})
#print(df_allwords_clean_dist)
#
# # 对过滤_去重的词语 进行分类汇总
word_count = df_allwords_clean_dist.allwords.value_counts().reset_index()
word_count.columns = ['word', 'count']
#print(word_count)
#------------------------------------------------------------------------------
# 第二步:绘制柱状图
#------------------------------------------------------------------------------
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
#获取数据
nume,sales=[],[]
for shopname,shopsales in word_count.head(30).values:
nume.append(shopname)
sales.append(shopsales)
# names = total_data.keys()
# # nums = total_data.values()
# # print(names)
# # print(nums)
# 绘图
plt.figure(figsize=[10,6])
#plt.bar(names, nums, width=0.3, color='green')
plt.bar(nume,sales, width=0.3, color='green')
# 设置标题
plt.xlabel("卖点", fontproperties='SimHei', size=12)
plt.ylabel("频率", fontproperties='SimHei', rotation=90, size=12)
plt.title("卖点频率关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(nume), fontproperties='SimHei', rotation=-40, size=10)
# 显示数字
for a, b in zip(list(nume), list(sales)):
#x轴,y轴,显示数值,水平居中,垂直底部,字体大小
plt.text(a,b,b, ha='center', va='bottom', size=10)
#保存并显示
plt.savefig('../file/PNG/02卖点频率关系图.png')
plt.show()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = ''
__author__ = 'jia666666'
"""
import time
import pandas as pd
import matplotlib.pyplot as plt
# 读取数据
n = '../file/CSV/智能手机' + '-all.csv'
data = pd.read_csv(n)
huawei,xioami,sanxing,apple,vivo,oppo,other=0,0,0,0,0,0,0
for item in data['商品名']:
#print(item)
if '华为' in item:
huawei+=1
elif '小米' in item:
xioami+=1
elif '三星' in item:
sanxing+=1
elif '苹果' in item:
apple+=1
elif 'vivo' in item:
vivo+=1
elif 'oppo' in item:
oppo+=1
else:
other+=1
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
#获取数据
names = ['华为','小米','三星','苹果','VIVO','OPPO','其他']
nums = [huawei,xioami,sanxing,apple,vivo,oppo,other]
# 绘图
plt.figure(figsize=[10,6])
#plt.bar(names, nums, width=0.3, color='green')
plt.bar(names,nums, width=0.3, color='green')
# 设置标题
plt.xlabel("品牌", fontproperties='SimHei', size=12)
plt.ylabel("商品数量", fontproperties='SimHei', rotation=90, size=12)
plt.title("品牌商品关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(names), fontproperties='SimHei', rotation=-40, size=10)
# 显示数字
for a, b in zip(list(names), list(nums)):
#x轴,y轴,显示数值,水平居中,垂直底部,字体大小
plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.grid(linestyle='-.')
plt.savefig('../file/PNG/08品牌商品关系图.png')
plt.show()
import time
import pandas as pd
import matplotlib.pyplot as plt
# 读取数据
n = '../file/CSV/智能手机' + '-all.csv'
data = pd.read_csv(n)
huawei,xioami,sanxing,apple,vivo,oppo,other=0,0,0,0,0,0,0
for item in data['商品名']:
#print(item)
if '华为' in item:
huawei+=1
elif '小米' in item:
xioami+=1
elif '三星' in item:
sanxing+=1
elif '苹果' in item:
apple+=1
elif 'vivo' in item:
vivo+=1
elif 'oppo' in item:
oppo+=1
else:
other+=1
#获取数据
names = ['华为','小米','三星','苹果','VIVO','OPPO','其他']
nums = [huawei,xioami,sanxing,apple,vivo,oppo,other]
goods_sum=len(data['商品名'])
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
plt.pie(nums,labels=names,autopct='%.2f%%')
plt.title("品牌市场比重", fontproperties='SimHei', size=16)
plt.axis('equal')
plt.legend()
plt.savefig('../file/PNG/09品牌市场比重.png')
plt.show()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = ''
__author__ = 'jia666666'
__time__ = '2020/3/24'
"""
import time
import pandas as pd
# 读取数据
n = '../file/CSV/智能手机' + '-all.csv'
data = pd.read_csv(n)
data.fillna(value=0,inplace=True)
huawei,xioami,sanxing,apple,vivo,oppo,other=0,0,0,0,0,0,0
for item,num in zip(data['商品名'],data['销量']):
if '华为' in item:
huawei+=num
elif '小米' in item:
xioami+=num
elif '三星' in item:
sanxing+=num
elif '苹果' in item:
apple+=num
elif 'vivo' in item:
vivo+=num
elif 'oppo' in item:
oppo+=num
else:
#print(item)
other+=num
#------------------------------------------------------------------------------
# 第二步:绘制柱状图
#------------------------------------------------------------------------------
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
#获取数据
names = ['华为','小米','三星','苹果','VIVO','OPPO','其他']
nums = [huawei,xioami,sanxing,apple,vivo,oppo,other]
# 绘图
plt.figure(figsize=[10,6])
#plt.bar(names, nums, width=0.3, color='green')
plt.bar(names,nums, width=0.3, color='green')
# 设置标题
plt.xlabel("品牌", fontproperties='SimHei', size=12)
plt.ylabel("销量", fontproperties='SimHei', rotation=90, size=12)
plt.title("品牌-销量关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(names), fontproperties='SimHei', rotation=-0, size=10)
# 显示数字
for a, b in zip(list(names), list(nums)):
#x轴,y轴,显示数值,水平居中,垂直底部,字体大小
plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.grid(linestyle='-.')
plt.savefig('../file/PNG/10品牌销量关系图.png')
plt.show()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = ''
__author__ = 'jia666666'
__time__ = '2020/3/24'
"""
import time
import pandas as pd
import matplotlib.pyplot as plt
# 读取数据
n = '../file/CSV/智能手机' + '-all.csv'
data = pd.read_csv(n)
data.fillna(value=0,inplace=True)
huawei,xioami,sanxing,apple,vivo,oppo,other=0,0,0,0,0,0,0
for item,price,num in zip(data['商品名'],data['价格'],data['销量']):
#print(item,price)
price=int(price)
if '华为' in item:
huawei+=price*num
elif '小米' in item:
xioami+=price*num
elif '三星' in item:
sanxing+=price*num
elif '苹果' in item:
apple+=price*num
elif 'vivo' in item:
vivo+=price*num
elif 'oppo' in item:
oppo+=price*num
else:
other+=price*num
#------------------------------------------------------------------------------
# 第二步:绘制柱状图
#------------------------------------------------------------------------------
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
#获取数据
names = ['华为','小米','三星','苹果','VIVO','OPPO','其他']
nums = [huawei,xioami,sanxing,apple,vivo,oppo,other]
# 绘图
plt.figure(figsize=[10,6])
#plt.bar(names, nums, width=0.3, color='green')
plt.bar(names,nums, width=0.3, color='green')
# 设置标题
plt.xlabel("品牌", fontproperties='SimHei', size=12)
plt.ylabel("销售额", fontproperties='SimHei', rotation=90, size=12)
plt.title("品牌销售额关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(names), fontproperties='SimHei', rotation=-0, size=10)
# 显示数字
for a, b in zip(list(names), list(nums)):
#x轴,y轴,显示数值,水平居中,垂直底部,字体大小
plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.grid(linestyle='-.')
plt.savefig('../file/PNG/11品牌销售额关系图.png')
plt.show()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = ''
__author__ = 'jia666666'
__time__ = '2020/3/24'
"""
import time
import matplotlib
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
# 读取数据
n = '../file/CSV/智能手机' + '-all.csv'
data = pd.read_csv(n)
data.fillna(value=0,inplace=True)
print(len(data['销量']))
#data=list()
price_1000,price_2000,price_3000,price_4000,price_5000,price_6000,price_more =0,0,0,0,0,0,0
for i,sale in zip(data['价格'],data['销量']):
i=int(i)
sale=int(sale)
if i<1000:
price_1000+=sale
elif i<2000:
#print(sale)
price_2000+=sale
#print(price_2000)
elif i<3000:
price_3000+=sale
elif i<4000:
price_4000+=sale
elif i<5000:
price_5000+=sale
elif i<6000:
price_6000+=sale
else:
price_more+=sale
print(price_2000)
#------------------------------------------------------------------------------
# 第二步:绘制柱状图
#------------------------------------------------------------------------------
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
nume=['0_1000','1000_2000','2000_3000','3000_4000','4000_5000','5000_6000','6000+']
sales=[price_1000,price_2000,price_3000,price_4000,price_5000,price_6000,price_more ]
# 绘图
plt.figure(figsize=[10,6])
#plt.bar(names, nums, width=0.3, color='green')
plt.bar(nume,sales, width=0.3, color='green')
# 设置标题
plt.xlabel("价格", fontproperties='SimHei', size=12)
plt.ylabel("销量", fontproperties='SimHei', rotation=90, size=12)
plt.title("价格销量关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(nume), fontproperties='SimHei', rotation=0, size=10)
# 显示数字
for a, b in zip(list(nume), list(sales)):
#x轴,y轴,显示数值,水平居中,垂直底部,字体大小
plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.grid(linestyle='-.')
plt.savefig('../file/PNG/04价格销量关系图.png')
plt.show()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = ''
__author__ = 'jia666666'
__time__ = '2020/3/24'
"""
import time
import matplotlib
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
# 读取数据
n = '../file/CSV/智能手机' + '-all.csv'
data = pd.read_csv(n)
data.fillna(value=0,inplace=True)
print(len(data['销量']))
#data=list()
price_1000,price_2000,price_3000,price_4000,price_5000,price_6000,price_more =0,0,0,0,0,0,0
for i,sale in zip(data['价格'],data['销量']):
i=int(i)
sale=i*sale
if i<1000:
price_1000+=sale
elif i<2000:
#print(sale)
price_2000+=sale
#print(price_2000)
elif i<3000:
price_3000+=sale
elif i<4000:
price_4000+=sale
elif i<5000:
price_5000+=sale
elif i<6000:
price_6000+=sale
else:
price_more+=sale
print(price_2000)
#------------------------------------------------------------------------------
# 第二步:绘制柱状图
#------------------------------------------------------------------------------
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
nume=['0_1000','1000_2000','2000_3000','3000_4000','4000_5000','5000_6000','6000+']
sales=[price_1000,price_2000,price_3000,price_4000,price_5000,price_6000,price_more ]
# 绘图
plt.figure(figsize=[10,6])
#plt.bar(names, nums, width=0.3, color='green')
plt.bar(nume,sales, width=0.3, color='green')
# 设置标题
plt.xlabel("价格", fontproperties='SimHei', size=12)
plt.ylabel("销售额", fontproperties='SimHei', rotation=90, size=12)
plt.title("价格销售额关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(nume), fontproperties='SimHei', rotation=0, size=10)
# 显示数字
for a, b in zip(list(nume), list(sales)):
#x轴,y轴,显示数值,水平居中,垂直底部,字体大小
plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.grid(linestyle='-.')
plt.savefig('../file/PNG/05价格销售额关系图.png')
plt.show()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = ''
__author__ = 'jia666666'
__time__ = '2020/3/25'
"""
import time
import pandas as pd
# 读取数据
n = '../file/CSV/智能手机' + '-all.csv'
data = pd.read_csv(n)
data.fillna(value=0,inplace=True)
#数据定义
count_1000,count_2000,count_3000,count_4000,count_5000,count_6000,count_more =0,0,0,0,0,0,0
sale_1000,sale_2000,sale_3000,sale_4000,sale_5000,sale_6000,sale_more =0,0,0,0,0,0,0
money_1000,money_2000,money_3000,money_4000,money_5000,money_6000,money_more =0,0,0,0,0,0,0
#数据处理
for i,j in zip(data['价格'],data['销量']):
i=int(i)
#print(i)
if i<1000:
count_1000+=1
sale_1000+=j
money_1000+=i*j
elif i<2000:
count_2000 += 1
sale_2000 += j
money_2000 += i * j
elif i<3000:
count_3000 += 1
sale_3000 += j
money_3000 += i * j
elif i<4000:
count_4000 += 1
sale_4000 += j
money_4000 += i * j
elif i<5000:
count_5000 += 1
sale_5000 += j
money_5000 += i * j
elif i<6000:
count_6000 += 1
sale_6000 += j
money_6000 += i * j
else:
count_more += 1
sale_more += j
money_more += i * j
#print(count_2000)
#------------------------------------------------------------------------------
# 初始化
#------------------------------------------------------------------------------
import matplotlib.pyplot as plt
plt.figure(figsize=[18,18])
plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
#------------------------------------------------------------------------------
# 数据获取
#------------------------------------------------------------------------------
index=['0_1000','1000_2000','2000_3000','3000_4000','4000_5000','5000_6000','6000+']
count=[count_1000,count_2000,count_3000,count_4000,count_5000,count_6000,count_more ]
sale=[sale_1000,sale_2000,sale_3000,sale_4000,sale_5000,sale_6000,sale_more]
money=[money_1000,money_2000,money_3000,money_4000,money_5000,money_6000,money_more]
#------------------------------------------------------------------------------
# p1价格商品关系条形图
#------------------------------------------------------------------------------
p1=plt.subplot(221)
plt.bar(index,count, width=0.3, color='green')
# 设置标题
plt.xlabel("价格", fontproperties='SimHei', size=12)
plt.ylabel("商品数量", fontproperties='SimHei', rotation=90, size=12)
plt.title("价格商品数量关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(index), fontproperties='SimHei', rotation=0, size=10)
# 显示数字
for a, b in zip(list(index), list(count)):
#x轴,y轴,显示数值,水平居中,垂直底部,字体大小
plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.grid(linestyle='-.')
plt.sca(p1)
#------------------------------------------------------------------------------
# 价格商品数量饼图
#------------------------------------------------------------------------------
p2=plt.subplot(222)
plt.pie(count,labels=index,autopct='%.2f%%')
plt.title("价格商品总量占比", fontproperties='SimHei', size=16)
plt.axis('equal')
plt.legend()
plt.sca(p2)
#------------------------------------------------------------------------------
# 价格销量关系
#------------------------------------------------------------------------------
p3=plt.subplot(223)
plt.bar(index,sale, width=0.3, color='black')
# 设置标题
plt.xlabel("价格", fontproperties='SimHei', size=12)
plt.ylabel("销量", fontproperties='SimHei', rotation=90, size=12)
plt.title("价格销量关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(index), fontproperties='SimHei', rotation=0, size=10)
plt.grid(linestyle='-.')
# 显示数字
for a, b in zip(list(index), list(sale)):
#x轴,y轴,显示数值,水平居中,垂直底部,字体大小
plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.sca(p3)
#------------------------------------------------------------------------------
# 价格销售额
#------------------------------------------------------------------------------
p4=plt.subplot(224)
plt.bar(index,money, width=0.3, color='red')
# 设置标题
plt.xlabel("价格", fontproperties='SimHei', size=12)
plt.ylabel("销售额", fontproperties='SimHei', rotation=90, size=12)
plt.title("价格销售额关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(index), fontproperties='SimHei', rotation=0, size=10)
# 显示数字
for a, b in zip(list(index), list(money)):
#x轴,y轴,显示数值,水平居中,垂直底部,字体大小
plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.sca(p4)
#------------------------------------------------------------------------------
# 数据显示
#------------------------------------------------------------------------------
plt.grid(linestyle='-.')
plt.savefig('../file/PNG/06价格影响关系图.png')
plt.show()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = ''
__author__ = 'jia666666'
__time__ = '2020/3/25'
"""
import time
import pandas as pd
# 读取数据
n = '../file/CSV/智能手机' + '-all.csv'
data = pd.read_csv(n)
data.fillna(value=0,inplace=True)
#数据定义
count_1000,count_2000,count_3000,count_4000,count_5000,count_6000,count_more =0,0,0,0,0,0,0
sale_1000,sale_2000,sale_3000,sale_4000,sale_5000,sale_6000,sale_more =0,0,0,0,0,0,0
money_1000,money_2000,money_3000,money_4000,money_5000,money_6000,money_more =0,0,0,0,0,0,0
#数据处理
for item,i,j in zip(data['商品名'],data['价格'],data['销量']):
i=int(i)
#print(i)
if '华为' in item:
count_1000+=1
sale_1000+=j
money_1000+=i*j
elif '小米' in item:
count_2000 += 1
sale_2000 += j
money_2000 += i * j
elif '三星' in item:
count_3000 += 1
sale_3000 += j
money_3000 += i * j
elif '苹果' in item:
count_4000 += 1
sale_4000 += j
money_4000 += i * j
elif 'vivo' in item or 'VIVO' in item :
count_5000 += 1
sale_5000 += j
money_5000 += i * j
elif 'oppo' in item or 'OPPO' in item:
count_6000 += 1
sale_6000 += j
money_6000 += i * j
else:
count_more += 1
sale_more += j
money_more += i * j
#print(count_2000)
#------------------------------------------------------------------------------
# 初始化
#------------------------------------------------------------------------------
import matplotlib.pyplot as plt
plt.figure(figsize=[20,18])
plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
#------------------------------------------------------------------------------
# 数据获取
#------------------------------------------------------------------------------
index=['华为','小米','三星','苹果','VIVO','OPPO','其他']
count=[count_1000,count_2000,count_3000,count_4000,count_5000,count_6000,count_more ]
sale=[sale_1000,sale_2000,sale_3000,sale_4000,sale_5000,sale_6000,sale_more]
money=[money_1000,money_2000,money_3000,money_4000,money_5000,money_6000,money_more]
#------------------------------------------------------------------------------
# p1价格商品关系条形图
#------------------------------------------------------------------------------
p1=plt.subplot(221)
plt.bar(index,count, width=0.3, color='green')
# 设置标题
plt.xlabel("品牌", fontproperties='SimHei', size=12)
plt.ylabel("商品数量", fontproperties='SimHei', rotation=90, size=12)
plt.title("品牌商品数量关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(index), fontproperties='SimHei', rotation=0, size=10)
# 显示数字
for a, b in zip(list(index), list(count)):
#x轴,y轴,显示数值,水平居中,垂直底部,字体大小
plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.grid(linestyle='-.')
plt.sca(p1)
#------------------------------------------------------------------------------
# 品牌商品数量饼图
#------------------------------------------------------------------------------
p2=plt.subplot(222)
plt.pie(count,labels=index,autopct='%.2f%%')
plt.title("品牌商品总量占比", fontproperties='SimHei', size=16)
plt.axis('equal')
plt.legend()
plt.sca(p2)
#------------------------------------------------------------------------------
# 品牌销量关系
#------------------------------------------------------------------------------
p3=plt.subplot(223)
plt.bar(index,sale, width=0.3, color='black')
# 设置标题
plt.xlabel("品牌", fontproperties='SimHei', size=12)
plt.ylabel("销量", fontproperties='SimHei', rotation=90, size=12)
plt.title("品牌销量关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(index), fontproperties='SimHei', rotation=0, size=10)
# 显示数字
for a, b in zip(list(index), list(sale)):
#x轴,y轴,显示数值,水平居中,垂直底部,字体大小
plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.grid(linestyle='-.')
plt.sca(p3)
#------------------------------------------------------------------------------
# 品牌销售额
#------------------------------------------------------------------------------
p4=plt.subplot(224)
plt.bar(index,money, width=0.3, color='red')
# 设置标题
plt.xlabel("品牌", fontproperties='SimHei', size=12)
plt.ylabel("销售额", fontproperties='SimHei', rotation=90, size=12)
plt.title("品牌销售额关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(index), fontproperties='SimHei', rotation=0, size=10)
# 显示数字
for a, b in zip(list(index), list(money)):
#x轴,y轴,显示数值,水平居中,垂直底部,字体大小
plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.grid(linestyle='-.')
plt.sca(p4)
#------------------------------------------------------------------------------
# 数据显示
#------------------------------------------------------------------------------
plt.savefig('../file/PNG/12品牌影响关系图.png')
plt.show()