数据可视化图表-带边界的气泡图(Bubble plot with Encircling)

import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import patches
from scipy.spatial import ConvexHull
import warnings; warnings.simplefilter('ignore')
sns.set_style('white')
% matplotlib inline

#step 1: prepare data
midwest = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/midwest_filter.csv')

# as many colors as there are unique midwest['category']
categories = np.unique(midwest['category'])
colors = [plt.cm.tab10(i/float(len(categories)-1)) for i in range(len(categories))]

# step 2;draw scatterplot with unique color for each category
fig = plt.figure(figsize = (16,10),dpi = 80,facecolor = 'w',edgecolor = 'k')

for i ,category in enumerate(categories):
    plt.scatter('area','poptotal',data = midwest.loc[midwest.category ==category,:],s = 'dot_size',c = colors[i],label = str(category),edgecolors = 'black',linewidths = .5)
    
# step 3:encircling
# https://stackoverflow.com/questions/44575681/how-do-i-encircle-different-data-sets-in-scatter-plot

def encircle(x,y,ax = None ,**kw):
    if not ax: ax = plt.gca()
    p = np.c_[x,y]
    hull = ConvexHull(p)
    poly = plt.Polygon(p[hull.vertices,:],**kw)
    ax.add_patch(poly)
    
# select data to be encircled
midwest_encircle_data = midwest.loc[midwest.state =='IN',:]

# draw polygon surrounding vertices
encircle(midwest_encircle_data.area,midwest_encircle_data.poptotal,ec = 'k',fc = 'gold',alpha = 0.1)
encircle(midwest_encircle_data.area,midwest_encircle_data.poptotal,ec='firebrick', fc='none', linewidth=1.5)

# step 4: decorations
plt.gca().set(xlim=(0.0, 0.1), ylim=(0, 90000), xlabel='Area', ylabel='Population')
plt.xticks(fontsize=12); plt.yticks(fontsize=12)
plt.title("Bubble Plot with Encircling", fontsize=22)
plt.legend(fontsize=12)    
plt.show()

数据可视化图表-带边界的气泡图(Bubble plot with Encircling)_第1张图片

你可能感兴趣的:(python)