一直以来对matplotlib以及seaborn的学习都停留在复制与粘贴与调参,因此下定决心整理一套适合自己的绘图模板以及匹配特定的应用场景,便于自己的查找与更新
目的:抛弃繁杂的参数设置学习,直接看齐优秀的模板
解释plt.plot(),plt.scatter(),plt.legend参数
seaborn.set()
在kaggle看到一个用ploty这个库做的饼图,可以实现在jupyter notebook中进行交互,鼠标浮动即可显示饼图信息,也可下载png。数据集用 的是一个二分类的蘑菇数据集
import plotly.figure_factory as ff
import plotly.express as px
import plotly.graph_objects as go
labels = ['Edible', 'Poisonous'] # 两种分类
# mashroom_dataset.describe()['class']的意思是数据集的类别这一栏的分布数据, 后面加上['count']即为取类别分布的总数,['freq']代表class中freq的数量
values = [mashroom_dataset.describe()['class']['freq'], mashroom_dataset.describe()['class']['count']-mashroom_dataset.describe()['class']['freq']]
colors = ['green', 'red'] # 饼图颜色
fig = go.Figure(data=[go.Pie(labels=labels, values=values, opacity=0.8)])
fig.update_traces(textinfo='percent+label', marker=dict(line=dict(color='#000000', width=2), colors=colors))
fig.update_layout(title_text='Distribution of the Mushrooms by their Classes', title_x=0.5, title_font=dict(size=28))
fig.show()
#画散点图,第一维的数据作为x轴和第二维的数据作为y轴
# iris.target_names = array(['setosa', 'versicolor', 'virginica'], dtype='
x_index=0
y_index=1
colors=['blue','red','green'] # 颜色可调换, 与类别数量相匹配
for label,color in zip(range(len(iris.target_names)),colors):
plt.scatter(iris.data[iris.target==label,x_index], # 横坐标数据来源
iris.data[iris.target==label,y_index], # 纵坐标数据来源
label=iris.target_names[label], # 这里的label即iris.target_names的index: 0,1,2
c=color) # 颜色参数
plt.xlabel(iris.feature_names[x_index]) # 横坐标的label
plt.ylabel(iris.feature_names[y_index]) # 纵坐标的label
plt.legend(loc='upper left') # 设置图例的位置
plt.show()
seaborn版本
sns.set(style="darkgrid")# 添加背景
chart = sns.FacetGrid(iris_df, hue="species") .map(plt.scatter, "sepal length (cm)", "sepal width (cm)") .add_legend()
chart.fig.set_size_inches(12,6)
sns.set(style="white", color_codes=True)
sns.jointplot(x="sepal length (cm)", y="sepal width (cm)", data=iris_df, size=5)
# 没加阴影
sns.set(style="white", color_codes=True)
sns.jointplot(x='petal length (cm)', y='sepal width (cm)',data= iris_df,
kind="kde",
hue='species' # 按照鸢尾花的类别进行了颜色区分
)
plt.show()
sns.jointplot(x='petal length (cm)', y='sepal width (cm)',data= iris_df,
kind="kde",
hue='species',
joint_kws=dict(alpha =0.6,shade = True,),
marginal_kws=dict(shade=True)
)
参考文章
sns.pairplot(iris_df, hue="species", size=3)
iris_df.plot.area(y=['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width
(cm)'],alpha=0.4,figsize=(12, 6));
其实就是热力图:
y_pred_grid_knn = knn_grid_search.predict(X_test)
y_pred_grid_logi = logistic_grid_search.predict(X_test)
y_pred_grid_nb = naive_bayes.predict(X_test)
matrix_1 = confusion_matrix(y_test, y_pred_grid_knn)
matrix_2 = confusion_matrix(y_test, y_pred_grid_logi)
matrix_3 = confusion_matrix(y_test, y_pred_grid_nb)
df_1 = pd.DataFrame(matrix_1,
index = ['setosa','versicolor','virginica'],
columns = ['setosa','versicolor','virginica'])
df_2 = pd.DataFrame(matrix_2,
index = ['setosa','versicolor','virginica'],
columns = ['setosa','versicolor','virginica'])
df_3 = pd.DataFrame(matrix_3,
index = ['setosa','versicolor','virginica'],
columns = ['setosa','versicolor','virginica'])
plt.figure(figsize=(20,5))
plt.subplots_adjust(hspace = .25)
plt.subplot(1,3,1)
plt.title('confusion_matrix(KNN)')
sns.heatmap(df_1, annot=True,cmap='Blues')
plt.subplot(1,3,2)
plt.title('confusion_matrix(logistic regression)')
sns.heatmap(df_2, annot=True,cmap='Greens')
plt.subplot(1,3,3)
plt.title('confusion_matrix(naive_bayes)')
sns.heatmap(df_3, annot=True,cmap='Reds')
plt.show()
在kaggle 偶然看到的,这也太好看了,用到了yellowbrick这个库
from yellowbrick.classifier import PrecisionRecallCurve, ROCAUC, ConfusionMatrix
from yellowbrick.style import set_palette
from yellowbrick.cluster import KElbowVisualizer
from yellowbrick.model_selection import LearningCurve, FeatureImportances
from yellowbrick.contrib.wrapper import wrap
# --- LR Accuracy ---
LRAcc = accuracy_score(y_pred_grid_logi, y_test)
print('.:. Logistic Regression Accuracy:'+'\033[35m\033[1m {:.2f}%'.format(LRAcc*100)+' \033[0m.:.')
# --- LR Classification Report ---
print('\033[35m\033[1m\n.: Classification Report'+'\033[0m')
print('*' * 25)
print(classification_report(y_test, y_pred_grid_logi))
# --- Performance Evaluation ---
print('\033[35m\n\033[1m'+'.: Performance Evaluation'+'\033[0m')
print('*' * 26)
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize = (14, 12))
#--- LR Confusion Matrix ---
logmatrix = ConfusionMatrix(logistic_grid_search, ax=ax1, cmap='RdPu', title='Logistic Regression Confusion Matrix')
logmatrix.fit(X_train, y_train)
logmatrix.score(X_test, y_test)
logmatrix.finalize()
# --- LR ROC AUC ---
logrocauc = ROCAUC(logistic_grid_search, ax = ax2, title = 'Logistic Regression ROC AUC Plot')
logrocauc.fit(X_train, y_train)
logrocauc.score(X_test, y_test)
logrocauc.finalize()
# --- LR Learning Curve ---
loglc = LearningCurve(logistic_grid_search, ax = ax3, title = 'Logistic Regression Learning Curve')
loglc.fit(X_train, y_train)
loglc.finalize()
# --- LR Precision Recall Curve ---
logcurve = PrecisionRecallCurve(logistic_grid_search, ax = ax4, ap_score = True, iso_f1_curves = True,
title = 'Logistic Regression Precision-Recall Curve')
logcurve.fit(X_train, y_train)
logcurve.score(X_test, y_test)
logcurve.finalize()
plt.tight_layout();
plt.figure(figsize = (11,11))
corr = df.corr()
mask = np.triu(np.ones_like(corr, dtype=bool))
sns.heatmap(corr, mask = mask, robust = True, center = 0,square = True, cmap="viridis",linewidths = .6)
plt.title('Correlation Table')
plt.show()