作为一名云南人,作为一名数据分析人员,我将在本次用Python来给大家介绍云南的相关景点!
欢迎大家来云南旅游哦!资料含各地景点的数据,希望大家学习之后进行相关的练习,学有所成!
所需的资料我放在这里,自行提取即可:
链接:https://pan.baidu.com/s/16ziypbHZL-ZNNxnVQ2-iXg
提取码:yunn
使用工具:Jupyter Notebooks 推荐查看链接自主下载和学习:Jupyter Notebooks的安装和使用介绍_LarsCheng的博客-CSDN博客_jupyter
“彩云之南”旅游景点分析
!pip install --upgrade pyecharts
#升级 pyecharts 包,地图显示部分需要用到 pyecharts==1.9.0以上版本
import jieba#中文分词处理
import pandas as pd #数据处理库
from collections import Counter#数据统计库
from pyecharts.charts import Line,Pie,Scatter,Bar,Map,Grid#pyecharts数据可视化
from pyecharts.charts import WordCloud
from pyecharts import options as opts
from pyecharts.globals import ThemeType
from pyecharts.globals import SymbolType
from pyecharts.commons.utils import JsCode
Yunnan= pd.read_excel('云南旅游景点.xlsx')
Yunnan.head()
Yunnan.info()
Yunnan.shape#处理前总共有75行,11列
Yunnan.describe()
Yunnan.loc[Yunnan['销量']==0,:].head()
Yunnan.loc[Yunnan['销量']>0,:].head(75)
Yunnan.isnull().sum()
Yunnan['星级'].fillna('待定', inplace=True)
Yunnan.isnull().sum()
Yunnan.sort_values('销量', ascending=False).head(75)
# 线性渐变
color_js = """new echarts.graphic.LinearGradient(0, 0, 1, 0,
[{offset: 0, color: '#009ad6'}, {offset: 1, color: '#ed1941'}], false)"""
sort_info = Yunnan.sort_values(by='销量', ascending=True)
b1 = (
Bar()
.add_xaxis(list(sort_info['名称'])[-20:])
.add_yaxis('云南热门景点销量', sort_info['销量'].values.tolist()[-20:], itemstyle_opts=opts.ItemStyleOpts(color=JsCode(color_js)))
.reversal_axis()
.set_global_opts(
title_opts=opts.TitleOpts(title='云南热门景点销量数据'),
yaxis_opts=opts.AxisOpts(name='景点名称'),
xaxis_opts=opts.AxisOpts(name='销量'),
)
.set_series_opts(label_opts=opts.LabelOpts(position="right"))
)
# 将图形整体右移
g1 = (
Grid()
.add(b1, grid_opts=opts.GridOpts(pos_left='20%', pos_right='5%'))
)
g1.render_notebook()
Yunnan_tmp1 = Yunnan[['城市','销量']]
Yunnan_counts = Yunnan_tmp1.groupby('城市').sum()
m1 = (
Map()
.add('云南假期出行分布', [list(z) for z in zip(Yunnan_counts.index.values.tolist(), Yunnan_counts.values.tolist())], '云南')
.set_global_opts(
title_opts=opts.TitleOpts(title='云南假期出行数据地图分布'),
visualmap_opts=opts.VisualMapOpts(max_=100000, is_piecewise=False,range_color=["white", "#fa8072", "#ed1941"]),
)
)
m1.render_notebook()
# 线性渐变
color_js = """new echarts.graphic.LinearGradient(0, 1, 0, 0,
[{offset: 0, color: '#009ad6'}, {offset: 1, color: '#ed1941'}], false)"""
Yunnan_tmp2 =Yunnan[Yunnan['星级'].isin(['4A', '5A'])]
Yunnan_counts = Yunnan_tmp2.groupby('城市').count()['星级']
b2 = (
Bar()
.add_xaxis(Yunnan_counts.index.values.tolist())
.add_yaxis('4A-5A景区数量', Yunnan_counts.values.tolist(),itemstyle_opts=opts.ItemStyleOpts(color=JsCode(color_js)))
.set_global_opts(
title_opts=opts.TitleOpts(title='云南省各城市4A-5A景区数量'),
datazoom_opts=[opts.DataZoomOpts(), opts.DataZoomOpts(type_='inside')],
)
)
b2.render_notebook()
Yunnan0 = Yunnan_counts.copy()
Yunnan0.sort_values(ascending=False, inplace=True)
c1 = (
Pie()
.add('', [list(z) for z in zip(Yunnan0.index.values.tolist(), Yunnan0.values.tolist())],
radius=['30%', '100%'],
center=['50%', '60%'],
rosetype='area',
)
.set_global_opts(title_opts=opts.TitleOpts(title='地区景点数量'),
legend_opts=opts.LegendOpts(is_show=False),
toolbox_opts=opts.ToolboxOpts())
.set_series_opts(label_opts=opts.LabelOpts(is_show=True, position='inside', font_size=12,
formatter='{b}: {c}', font_style='italic',
font_weight='bold', font_family='Microsoft YaHei'
))
)
c1.render_notebook()
item_style = {'normal': {'shadowColor': '#000000',
'shadowBlur': 20,
'shadowOffsetX':5,
'shadowOffsetY':15
}
}
s1 = (
Scatter()
.add_xaxis(Yunnan_counts.index.values.tolist())
.add_yaxis('4A-5A景区数量', Yunnan_counts.values.tolist(),symbol_size=50,itemstyle_opts=item_style)
.set_global_opts(visualmap_opts=opts.VisualMapOpts(is_show=False,
type_='size',
range_size=[5,50]))
)
s1.render_notebook()
Yunnan_tmp3 = Yunnan[Yunnan['星级'].isin(['4A', '5A'])]
Yunnan_counts = Yunnan_tmp3.groupby('城市').count()['星级']
m2 = (
Map()
.add('云南省4A-5A景区分布', [list(z) for z in zip(Yunnan_counts.index.values.tolist(), Yunnan_counts.values.tolist())], '云南')
.set_global_opts(
title_opts=opts.TitleOpts(title='云南省地图数据分布'),
visualmap_opts=opts.VisualMapOpts(max_=12, is_piecewise=True),
)
)
m2.render_notebook()
price_level = [0, 50, 100, 150, 200, 250, 300, 350, 400, 500]
label_level = ['0-50', '50-100', '100-150', '150-200', '200-250', '250-300', '300-350', '350-400', '400-500']
jzmj_cut = pd.cut(Yunnan['价格'], price_level, labels=label_level)
Yunnan_price = jzmj_cut.value_counts()
Yunnan_price #门票价格文本统计
p1 = (
Pie(init_opts=opts.InitOpts(
width='800px', height='600px',
)
)
.add(
'',
[list(z) for z in zip(Yunnan_price.index.tolist(), Yunnan_price.values.tolist())],
radius=['20%', '60%'],
center=['40%', '50%'],
rosetype='radius',
label_opts=opts.LabelOpts(is_show=True),
)
.set_global_opts(title_opts=opts.TitleOpts(title='门票价格占比',pos_left='33%',pos_top="5%"),
legend_opts=opts.LegendOpts(type_='scroll', pos_left="80%",pos_top="25%",orient="vertical")
)
.set_series_opts(label_opts=opts.LabelOpts(formatter='{b}: {c} ({d}%)'),position='outside')
)
p1.render_notebook()#门票价格占比玫瑰图
color_js = """new echarts.graphic.RadialGradient(
0.5, 0.5, 1,
[{offset: 0,
color: '#009ad6'},
{offset: 1,
color: '#ed1941'}
])"""
s2 = (
Scatter()
.add_xaxis(Yunnan_price.index.tolist())
.add_yaxis('门票价格区间', Yunnan_price.values.tolist(),symbol_size=50,itemstyle_opts=opts.ItemStyleOpts(color=JsCode(color_js)))
.set_global_opts(
yaxis_opts=opts.AxisOpts(name='数量'),
xaxis_opts=opts.AxisOpts(name='价格区间(元)'))
.set_global_opts(visualmap_opts=opts.VisualMapOpts(is_show=False,
# 设置通过图形大小来表现数据
type_='size',
# 图形大小映射范围
range_size=[5,50]))
)
s2.render_notebook()
contents = "".join('%s' % i for i in Yunnan['简介'].values.tolist())
contents_list = jieba.cut(contents)
ac = Counter(contents_list)
stopwords = []
with open('stopwords.txt', "r",encoding='utf-8') as f: # 打开文件
data = f.read() # 读取文件
stopwords = data.split('\n')
for i in stopwords:
del ac[i]
w1 = (
WordCloud()
.add("",
ac.most_common(150),
word_size_range=[5, 100],
textstyle_opts=opts.TextStyleOpts(font_family="cursive"),
shape='star')
.set_global_opts(title_opts=opts.TitleOpts(title="景点简介词云"))
)
w1.render_notebook()
欢迎大家学代码的同时来云南参观旅游!