df.groupby(by="省份").sum().sort_values(by="总分")[::-1][:10]
df.groupby(by="省份").sum().sort_values(by="总分", ascending=False)[:10]
df.groupby(by="省份").count().sort_values(by="总分", ascending=False)[:10]
province_cat = df["省份"].astype("category")
# print(province_cat)
print(province_cat.values, "\n")
print(type(province_cat.values))
province_cat.values.categories
province_cat.values.codes
province_cat.value_counts()[:10]
province_sum = df.groupby(by="省份").sum().sort_values(by="总分", ascending=False)["总分"] #
province_num = df["省份"].astype("category").value_counts()
组合"总分"&"个数"数组
province = pd.DataFrame({"总分": province_sum,
"个数": province_num})
添加“平均分”列数据
province_mean = province["总分"] / province["个数"]
province["平均分"] = pd.Series(province_mean) #添加“平均分”列数据
province.sort_values("平均分")[::-1][:10]
top10_sum = province.sort_values("总分")[::-1][:10]
top10_num = province.sort_values("个数")[::-1][:10]
top10_mean = province.sort_values("平均分")[::-1][:10]
top10_sum
top10_num
top10_mean
使用pyecharts绘制地图
数据可视化
top10.index.tolist()
top10["总分"].tolist()
Emmm
top10["总分"].astype('int').tolist()
from pyecharts.charts import Bar
from pyecharts import options as opts
bar = (
Bar()
.add_xaxis(top10_sum.index.tolist())
.add_yaxis("总分", top10_sum["总分"].astype('int').tolist())
.set_global_opts(title_opts=opts.TitleOpts(title="中国最好大学TOP10(各省份)" ,subtitle="总分"))
)
bar.render_notebook()
from pyecharts.charts import Line
from pyecharts import options as opts
line = (
Line()
.add_xaxis(top10_sum.index.tolist())
.add_yaxis("总分", top10_sum["总分"].astype('int').tolist())
.set_global_opts(title_opts=opts.TitleOpts(title="中国最好大学TOP10(各省份)" ,subtitle="总分"))
)
line.render_notebook()
from pyecharts.charts import Pie
from pyecharts import options as opts
pie =(
Pie()
.add("", [list(z) for z in zip(top10_sum.index.tolist(), top10_sum["总分"].astype('int').tolist())],
radius=["30%", "75%"],
center=["40%", "50%"],
rosetype="radius")
.set_global_opts(
title_opts=opts.TitleOpts(title="中国最好大学TOP10(各省份)" ,subtitle="总分"),
legend_opts=opts.LegendOpts(
type_="scroll", pos_left="80%", orient="vertical"
)
)
)
pie.render_notebook()
from pyecharts.charts import Scatter
from pyecharts import options as opts
scatter = (
Scatter()
.add_xaxis(top10_sum.index.tolist())
.add_yaxis("总分", top10_sum["总分"].astype('int').tolist())
.set_global_opts(title_opts=opts.TitleOpts(title="中国最好大学TOP10(各省份)" ,subtitle="总分"))
)
scatter.render_notebook()
去掉"省"字
print(top10_sum.index.tolist())
print(top10_sum.index.tolist()[1])
print(top10_sum.index.tolist()[1][:-1])
top10_sum_index = []
for i in range(10):
_ = top10_sum.index.tolist()[i][:-1]
top10_sum_index.append(_)
top10_sum_index
city = top10_sum_index
value = top10_sum["总分"].astype('int').tolist()
from pyecharts import options as opts
from pyecharts.charts import Geo
from pyecharts.globals import ChartType, SymbolType
geo = (
Geo()
.add_schema(maptype="china")
.add("高校分布图", [list(z) for z in zip(city, value)])
.set_global_opts(visualmap_opts=opts.VisualMapOpts(is_piecewise=True,max_=1300),
title_opts=opts.TitleOpts(title="各省高校总分排名"))
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
)
geo.render_notebook()
from pyecharts import options as opts #负责图表配置的模块
from pyecharts.charts import Map #地图主要用于地理区域数据的可视化
_map = (
Map()
.add("高校分布图", [list(z) for z in zip(city, value)], "china")
.set_global_opts(title_opts=opts.TitleOpts(title="各省高校总分排名"),
visualmap_opts=opts.VisualMapOpts(max_=1300))
)
_map.render_notebook()