最近遇到一个需求,需要将指定输入的各个标签列表及其各值的比例通过以下相似图形展示出来,下面这种图有点像扇形图,又有点像玫瑰图。可以通过plotly库画出来,参考官方文档:
https://plot.ly/python/sunburst-charts/
首先需要去看官方文档看这种图应该如何画,每个扇形上的字就是label,每个扇形对应唯一的id,每个label对应的parent就是画在其parent后面的扇形。
但是我们此次的需求有些不一样: 需要根据以下标签及其比重画扇形图,且第一层必须全部画满所有的label,第二层可以根据输入的值max_factors 来画最大label,最大label数不超过total(feature)-1,第三层的label数在第二层的基础上减去1,以此类推,同时输入的值 max_floor来画最多的层。同时每个parent的value等于其下面的children的value总数之和。
features = {“Destination”: 90, “Follow”: 61, “Forward”: 59, “Real”: 50, “Brave”: 47,
“Elegant”: 42, “Confident”: 28, “Generous”: 27, “Beautiful”: 51}
整个思路如下:
# -*- coding: utf-8 -*-
import pandas as pd
import plotly.graph_objects as go
import plotly.offline as py # 设置离线画图
def return_results2(parent, children, floor_cnt, max_floor, max_factors, all_parents, all_ids, all_labels, all_values):
"""
:param parent: Tuple (parent_id, parent_label, parent_value)
:param children: List of Tuple (child_label, child_value)
:param floor_cnt:
:param max_floor:
:param max_factors:
:param all_parents:
:param all_ids:
:param all_labels:
:param all_values:
:param value_percent:
:return:
"""
if floor_cnt > max_floor:
return
total_child_value = sum([x for _, x in children])
for elem_idx, elem in enumerate(children):
if floor_cnt>0 and elem_idx == max_factors-floor_cnt+1:
break
elem_id = parent[0]+elem[0]
# 赋值
all_parents.append(parent[0])
all_labels.append(elem[0])
elem_val = parent[2] / total_child_value * elem[1] * 0.9999999999
all_values.append(elem_val)
all_ids.append(elem_id)
if len(children) == 1:
return
new_parent = (elem_id, elem[0], elem_val)
new_children = children.copy()
new_children.pop(elem_idx)
return_results2(new_parent, new_children, floor_cnt+1, max_floor, max_factors, all_parents, all_ids, all_labels, all_values)
def go_through_factors(factor_list, max_floor, max_factors, value_percent, present_map):
# 加上第一层的根节点的信息
all_ids = ["RO"]
all_labels = [present_map]
all_values = [100]
all_parents = [""]
floor_cnt = 0
# 递归函数
return_results2(("RO", present_map, 100), [(lb, val) for lb, val in zip(factor_list, value_percent)], floor_cnt, max_floor, max_factors, all_parents, all_ids, all_labels,
all_values)
return all_parents, all_ids, all_labels, all_values
def plot_Sunburst(all_parents, all_ids, all_labels, all_values, all_colors):
fig = go.Figure()
fig.add_trace(go.Sunburst(
ids=all_ids,
labels=all_labels,
values=all_values,
parents=all_parents,
# branchvalues="remainder",
branchvalues="total",
marker=dict(colors=all_colors,
),
hovertext=['%' for _ in range(len(all_values))],
hoverinfo="label+value+text",
))
fig.update_layout(
# title='Feature_importance_sunburst plotly',
margin=dict(t=0, l=0, r=0, b=0)
)
fig.show()
py.plot(fig, filename='data/Lizzie_brief.html', # 会生成一个网页文件
image='png', ) #
def main():
# 遍历所有的factor
features = {"Destination": 90, "Follow": 61, "Forward": 59, "Real": 50, "Brave": 47,
"Elegant": 42, "Confident": 28, "Generous": 27, "Beautiful": 51}
features_sorted = sorted(features.items(), key=lambda x: x[1], reverse=True)
features_dict = dict(features_sorted)
# 存储初始标签
orginal_label = list(features_dict.keys())
# 存储初始value
original_value = list(features_dict.values())
# 存储初始value所占比重
value_percent = [(_ / sum(original_value) * 100) for _ in original_value]
# 每个扇形区域表示的颜色
colors = {"Destination": "#5466e8", "Follow": "#e55757", "Forward": "#60d778", "Real": "#ea8b54", "Brave": "#35e1f9",
"Elegant": "#de5b9f", "Confident": "#96ea43", "Generous": "#e297f6", "Beautiful": "#8b7bd9"}
max_floor = 3
max_factors = 5
present_map = "荔枝小姐"
# 遍历所有的因子
all_parents, all_ids, all_labels, all_values = go_through_factors(orginal_label, max_floor, max_factors,
value_percent, present_map)
all_colors = ['white']
for each in all_labels:
if each in list(colors.keys()):
all_colors.append(colors[each])
new_pd = pd.DataFrame(columns=['ids', 'labels', 'parents', 'values'])
new_pd['ids'] = all_ids
new_pd['labels'] = all_labels
new_pd['parents'] = all_parents
new_pd['values'] = all_values
new_pd.to_csv("data/Lizzie_factors.csv", index=False)
plot_Sunburst(all_parents, all_ids, all_labels, all_values, all_colors)
if __name__ == "__main__":
main()