目录
文章目录
一、Pandas数据处理
1.1 读取数据
1.2 查看表格数据描述
1.3 查看表格是否有数据缺失
1.4 查看电梯共有几种类型值
1.5 缺失值填充
1.6 查看房屋朝向数据
1.7 统计各城区二手房数量
二、Pyecharts 可视化
2.1 北京各个城区二手房数量地图分布
2.2 各城区二手房数量-平均价格柱状图
2.3 二手房价格最高Top15
2.4 二手房总价与面积散点图
2.5 房屋朝向饼图
2.6 装修情况/有无电梯玫瑰图
2.7 二手房楼层分布柱状图
2.8 房屋面积分布柱状图
提示:pandas 是基于NumPy 的一种工具,该工具是为了解决数据分析任务而创建的。
import pandas as pd
df = pd.read_csv("二手房数据.csv",encoding='gbk')
df.head()#查看前5行数据
df.describe()
df.isnull().sum()
df['电梯'].unique()
df['电梯'].fillna('未知',inplace=True)
df.isnull().sum()
df['电梯'].unique()
df['朝向'].unique()
df['朝向'] = df['朝向'].str.replace('南西','西南')
df['朝向'].unique()
g = df.groupby('市区')
num = g.count()['小区']
print(type(num))
num1 = num.values.tolist()#把dataframe转换成列表
num1
df['市区'].unique()
from pyecharts.charts import Geo
from pyecharts import options as opts
c = (
Geo()
.add_schema(maptype='北京',itemstyle_opts=opts.ItemStyleOpts(color='#A60B63',border_color='#FFFF22'))
.add(
"",
[list(z) for z in zip(num.keys(),num1)]
)
.set_series_opts(
label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(
visualmap_opts=opts.VisualMapOpts(min_=0,max_=3000),
title_opts=opts.TitleOpts(title="北京各个城区二手房数量"),
)
)
c.render_notebook()
c = num.keys().tolist()#获取城市
num1 = num.values.tolist()#把dataframe转换成列表
df_price = g.mean()['价格(万元)'].round(decimals=2)
df_price
price1 = df_price.values.tolist()
from pyecharts.charts import Bar, Line
from pyecharts.globals import ThemeType
bar = (
Bar(init_opts=opts.InitOpts(width="1700px", height="800px",theme=ThemeType.MACARONS))
.add_xaxis(xaxis_data=c)
.add_yaxis(
series_name="数量",
y_axis=num1,
category_gap='50%',#设置柱状图柱形宽度
label_opts=opts.LabelOpts(is_show=True),#方法1 柱状图上面的数字显示
)
.extend_axis(
yaxis=opts.AxisOpts(
name="价格",
type_="value",
min_=200,
max_=900,
interval=100,
# axislabel_opts=opts.LabelOpts(formatter="{value} °C"),
)
)
.set_global_opts(
tooltip_opts=opts.TooltipOpts(
is_show=True, trigger="axis", axis_pointer_type="cross"
),
xaxis_opts=opts.AxisOpts(
type_="category",
axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow"),
),
yaxis_opts=opts.AxisOpts(
name="数量",
type_="value",
min_=0,
max_=3000,
interval=500,
#axislabel_opts=opts.LabelOpts(formatter="{value} ml"),
axistick_opts=opts.AxisTickOpts(is_show=True),
#splitline_opts=opts.SplitLineOpts(is_show=True),
),
)
#方法二.set_series_opts(label_opts=opts.LabelOpts(is_show=True))#柱状图上面的数字显示
)
line = (
Line()
.add_xaxis(xaxis_data=c)
.add_yaxis(
series_name="价格",
yaxis_index=1,
y_axis=price1,
z=10,
label_opts=opts.LabelOpts(is_show=True),#折线图每个转折点上面的数字显示
)
)
bar.overlap(line).render_notebook()
#或者用 bar.overlap(line).render("mixed_bar_and_line.html")
top_price = df.sort_values(by="价格(万元)",ascending=False)[:15]
top_price
data1 = top_price['小区'].values.tolist()
data2 = top_price['价格(万元)'].values.tolist()
c = (
Bar()
.add_xaxis(data1)
.add_yaxis('数量',data2,label_opts=opts.LabelOpts(is_show=True),category_gap='50%',color='green')
.set_global_opts(
xaxis_opts=opts.AxisOpts(
name='城区'),
yaxis_opts=opts.AxisOpts(
name='价格(万元)')
)
)
c.render_notebook()
from pyecharts.charts import Scatter
s = (
Scatter()
.add_xaxis(df['面积(㎡)'].values.tolist())
.add_yaxis('',df['价格(万元)'].values.tolist(),color='blue')
.set_global_opts(
xaxis_opts=opts.AxisOpts(type_='value',name='面积(㎡)'),
yaxis_opts=opts.AxisOpts(type_='value',name='价格(万元)')
)
)
s.render_notebook()
d = df.groupby('朝向')
direction = d.count()['小区']
direction
s = direction.values.sum()
s
from pyecharts.charts import Pie
c = (
Pie()
.add(
"",
[list(z) for z in zip(direction.keys().tolist(),direction.values.tolist())],
radius=["30%", "75%"],
)
.set_global_opts(
title_opts=opts.TitleOpts(title="房屋朝向比"),
legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%"),
)
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c} ({d}%)"))
)
c.render_notebook()
g1 = df.groupby('装修情况')
g = g1.count()['小区']
l1 = g.keys().tolist()
l2 = g.values.tolist()
print(l1)
print(l2)
g2 = df.groupby('电梯')
c1 = g2.count()['小区']
l3 = c1.keys().tolist()
l4 = c1.values.tolist()
s = c1.values.sum()
print(s)
bar=(
Bar()
.add_xaxis(l1)
.add_yaxis('装修情况',l2,category_gap='50%',color="#54DADB")
.reversal_axis()
.set_series_opts(label_opts=opts.LabelOpts(position="right"))
.set_global_opts(title_opts=opts.TitleOpts(title="装修情况/有无电梯"),
xaxis_opts=opts.AxisOpts(name='数量'),
yaxis_opts=opts.AxisOpts(name='装修情况'))
)
bar.render_notebook()
from pyecharts.charts import Pie
c = (
Pie()
.add(
"",
[list(z) for z in zip(l3,l4)],
radius=["30%", "75%"],
#center=["25%", "50%"],
rosetype="area",
label_opts=opts.LabelOpts(is_show=True,formatter="{b}: {c} ({d}%)"),
)
.set_global_opts(title_opts=opts.TitleOpts(title="有无电梯玫瑰图"))
)
c.render_notebook()
f = df.groupby('楼层')
floor = f.count()['小区']
floor
f1 = floor.keys().tolist()
f2 = floor.values.tolist()
c = (
Bar()
.add_xaxis(f1)
.add_yaxis("数量", f2,category_gap='50%',color="#AED54C")
.set_global_opts(
title_opts=opts.TitleOpts(title="二手房楼层分布柱状缩放图"),
datazoom_opts=opts.DataZoomOpts(),
xaxis_opts=opts.AxisOpts(name='楼层'),
yaxis_opts=opts.AxisOpts(name='数量')
)
)
c.render_notebook()
area_level = [0, 50, 100, 150, 200, 250, 300, 350, 400, 1500]
label_level = ['50-100','100-150','150-200','200-250','250-300','300-350','350-400','400-450']
p1 = pd.cut(df['面积(㎡)'],area_level,label_level)
p2 = p1.value_counts()
p3 = p2.values.tolist()
p3
c = (
Bar()
.add_xaxis(label_level)
.add_yaxis("面积(㎡)",p3,color="#7944B7")
.reversal_axis()
.set_series_opts(
label_opts=opts.LabelOpts(position='right')
)
.set_global_opts(
title_opts=opts.TitleOpts(title="房屋面积分布柱状图"),
xaxis_opts=opts.AxisOpts(name='数量'),
yaxis_opts=opts.AxisOpts(name='面积(㎡)')
)
)
c.render_notebook()