matplotlib官网
基于matplotlib的seaborn官网
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline # notebook使用
import seaborn as sns
sns.set_style("whitegrid")
# 设置主题,可选项有darkgrid , whitegrid , dark , white ,和 ticks
sns.set(style="dark", palette="muted", color_codes=True)
# 通过参数设置主题
g = sns.FacetGrid(train_data, col='Survived')
# col,row,hue参数用于定义绘制数据的子集。由于survived有两个参数,会有两个图
g.map(plt.hist, 'Age', bins=20)
# hist直方图, ‘Age’为横坐标, bins绘制的直方数量
plt.show() #左图为在Survived=0的子集内,Age和数量的分布图。
grid = sns.FacetGrid(train_data, hue='Survived')
# hue参数的把两个直方图画在同一张图上,其中有size参数决定高度,aspect长宽比,width=size*aspect
grid.map(plt.hist, 'Age', alpha=0.5, bins=20)
grid.add_legend()
# 将hue标签显示在图上
plt.show()
#inter1_num为数值数组
plt.hist(inter1_num)
#对字符串数组画直方图
import pandas
from collections import Counter
a = ['a', 'a', 'a', 'a', 'b', 'b', 'c', 'c', 'c', 'd', 'e', 'e', 'e', 'e', 'e']
letter_counts = Counter(a)
df = pandas.DataFrame.from_dict(letter_counts, orient='index')
df.plot(kind='bar')
显示数据的均值和偏差
grid = sns.FacetGrid(train_data, row='Embarked', size=2.2, aspect=1.6)
grid.map(sns.pointplot, 'Pclass', 'Survived', 'Sex', palette='deep')
# map函数将grid的子集应用于pointplot函数,其后面参数是pointplot的参数,分别为横坐标,纵坐标,hue。palette是调色板折线的颜色。
grid.add_legend()
plt.show()
sns.pointplot(x="Pclass", y="Survived", hue="Sex", data=data_train,palette={"male": "blue", "female": "pink"},markers=["*", "o"], linestyles=["-", "--"]);
import pandas as pd
import seaborn as sns
import matplotlib.ticker as ticker
name = '5.csv'
size = 15
linewidth = 1
marker_size = 40
plt.rcParams["axes.labelsize"] = size # label大小
plt.rcParams["lines.linewidth"] = linewidth # line大小
fig,a = plt.subplots(figsize=(10,4))
data = pd.read_csv(name)
data = data[data['epoch']<71]
ax = sns.pointplot(x="epoch", y="test-accuracy", hue="method", data=data,
palette="deep",dodge=True, scale=0.5, markers=["o", "x", "^", "<"]);
ax.xaxis.set_major_locator(ticker.MultipleLocator(10)) #控制x轴单元距离
ax.xaxis.set_major_formatter(ticker.ScalarFormatter())
ax.set(ylim=(70,95))
plt.gca().legend().set_title('') #除掉legend标题
# plt.setp(ax.get_legend().get_texts(), fontsize=size) # 设置legend大小
for lh in plt.legend(fontsize=size).legendHandles: # 设置marker大小
lh.set_sizes([marker_size+20])
plt.setp(ax.collections, sizes=[marker_size]) # 设置legend marker大小
ax.tick_params(labelsize=size) # 设置tick大小
# plt.ylabel('train-accuracy (%)')
ax.set(xlabel='迭代次数', ylabel='正确率/%')
plt.show()
fig.savefig(name+'.svg', format='svg', dpi=1200, bbox_inches='tight')
import matplotlib
matplotlib.use('Agg')
# mpl.rcParams['figure.dpi'] = 600
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('seaborn-bright')
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['figure.figsize'] = 8, 8
# plt.rcParams['xtick.labelsize'] = 24
# plt.rcParams['ytick.labelsize'] = 24
tick_label_size = 14
label_size = 14
data_name = datatv
x_axis = [10, 72, 134, 196, 258, 320]
line1 = [0.8239,
0.8315,
0.8394,
0.8350, #-0.8374-0.8372
0.8414,
0.8418,
]
line2 = [0.8211-0.006,
0.8168,
0.8218,
0.8143, #-0.8178
0.8177,
0.8171,
]
line3 = [0.8416,
0.8513,
0.8534,
0.8495, #-0.8520-0.8548
0.8536,
0.8567,
]
line4 = [0.8343,
0.8359,
0.8384,
0.8374, #-0.8392
0.8384,
0.8452,
]
xlim = (4, 326)
xticks = [10, 72, 134, 196, 258, 320]
xtickslabel = [10, 20, 40, 80, 160, 320]
xlabel = 'Number of hiddeng units'
ylim = (0.813, 0.858)
yticks = [0.82,0.83, 0.84, 0.85,]
ytickslabel = [0.82,0.83, 0.84, 0.85, ]
ylabel = 'AUC(test)'
fig, ax = plt.subplots(1, 1)
ax.grid(color='gray', linestyle='--', linewidth=1, alpha=0.3)
ax.set_facecolor('white')
ax.set_title(data_name)
# fig.suptitle(datael, fontsize=label_size)
ax.set_xlim(xlim[0], xlim[1])
ax.set_xticks(xticks)
ax.set_xticklabels(xtickslabel, fontsize=tick_label_size)
ax.set_xlabel(xlabel, fontsize=label_size)
ax.set_ylim(ylim)
ax.set_yticks(yticks)
ax.set_yticklabels(ytickslabel, fontsize=tick_label_size)
ax.set_ylabel(ylabel, fontsize=label_size)
ax.plot(x_axis, line1, 'bs--', markersize=9, linewidth=2, label=dcin_col)
ax.plot(x_axis, line2, 'g^--', markersize=10, linewidth=2, label=dcin_smi)
ax.plot(x_axis, line3, 'ro--', markersize=9, linewidth=2, label=dcin_col_app)
# ax.plot(x_axis, line4, 'cv--', markersize=10, linewidth=2, label=dcin_smi_app)
plt.legend(bbox_to_anchor=(0.89, 0.40), borderaxespad=0., fontsize=label_size, fancybox=False)
# plt.tight_layout(w_pad=-8)
# plt.savefig('test.jpg', bbox_inches='tight', dpi=200)
显示数据的均值和偏差
grid = sns.FacetGrid(train_data, col='Embarked', hue='Survived', palette={0: 'k', 1: 'w'})
grid.map(sns.barplot, 'Sex', 'Fare', alpha=.5)
#ci=sd表示用竖线显示标准偏差,=None不显示
grid.add_legend()
plt.show()
查看数据相关性。
_ = sns.pairplot(df[:50], vars=[8, 11, 12, 14, 19], hue="class", size=1.5)
# vars需绘制的列名,hue使用的是二分类的标记
# diag_kind="kde"能将对角线图换成kde
图中可以看出特征11和特征14几乎将分类目标class的分布线分了。而特质12和特征19负相关,特征14与特征19强相关,注意对有些模型(朴素贝叶斯分类器假设特征独立)会有影响。
主要用于分析特征的两两相关。
sns.pairplot(iris)
主要用于分析特征的两两相关。
colormap = plt.cm.viridis
plt.figure(figsize=(12,12))
plt.title('Pearson Correlation of Features', y=1.05, size=15)
sns.heatmap(train.astype(float).corr(),linewidths=0.1,vmax=1.0, square=True, cmap=colormap, linecolor='white', annot=True)
# linewidths,linecolor划分线的宽度和颜色,annot是否在方格里注释数据,vmin, vmax相关度最大值和最小值,此处限制1.0最大,可见图中图谱最大为1.0
查看数据分布和离群点
sns.jointplot(x="SepalLengthCm", y="SepalWidthCm", data=iris, size=5)
sns.FacetGrid(iris, hue="Species", size=5) \
.map(plt.scatter, "SepalLengthCm", "SepalWidthCm") \
.add_legend()
# 可用于查看分布,离散点
plt.scatter(range(mergepred.shape[0]), np.sort(mergepred), color="red")
分析单一特征。
sns.boxplot(x="Species", y="PetalLengthCm", data=iris)
ax = sns.boxplot(x="Species", y="PetalLengthCm", data=iris)
ax = sns.stripplot(x="Species", y="PetalLengthCm", data=iris, jitter=True, edgecolor="gray")
# jitter不让点分布在一条垂直线上
类似于boxplot,分析单一变量。
sns.violinplot(x="Species", y="PetalLengthCm", data=iris, size=6)
分析单一变量。
sns.FacetGrid(iris, hue="Species", size=6) \
.map(sns.kdeplot, "PetalLengthCm") \
.add_legend()
#直方图+核密度估计图
sns.distplot(inter1_num)
sns.distplot(Einter1_num)
plt.show()
#二维核密度估计图
sns.kdeplot(inter1_num,Einter1_num,shade=True)
plt.show()
# Andrews Curves involve using attributes of samples as coefficients for Fourier series
# and then plotting these
from pandas.tools.plotting import andrews_curves
andrews_curves(iris.drop("Id", axis=1), "Species")
# 将特征作为横坐标,绘制每个样例取值
from pandas.tools.plotting import parallel_coordinates
parallel_coordinates(iris.drop("Id", axis=1), "Species")
将特征作为一个点绘制,通过相对权值将样例绘制成点。
from pandas.tools.plotting import radviz
radviz(iris.drop("Id", axis=1), "Species")
用于分析文本中word出现的频率,越高,显示越大。
clean_mask=np.array(Image.open("../input/imagesforkernal/safe-zone.png"))
clean_mask=clean_mask[:,:,1]
#wordcloud for clean comments
subset=train[train.clean==True]
text=subset.comment_text.values
wc= WordCloud(background_color="black",max_words=2000,mask=clean_mask,stopwords=stopword)
wc.generate(" ".join(text))
plt.figure(figsize=(20,10))
plt.axis("off")
plt.title("Words frequented in Clean Comments", fontsize=20)
plt.imshow(wc.recolor(colormap= 'viridis' , random_state=17), alpha=0.98)
plt.show()
plt.plot(inter1_num)
def showimg(im1,im2,match_rate):
plt.figure(figsize=(5, 1)) # 设置图像大小
plt.subplot(1, 3, 1) # 在1*3图格的第1格画上下面图片
plt.imshow(cv2.imread(im1)[: , : , : : -1])
plt.subplot(1, 3, 2) # 在1*3图格的第2格画上下面图片
plt.text(0.5, 0.5, match_rate, ha='center', va='center',) #画上文字
plt.setp(plt.gca(), frame_on=False, xticks=(), yticks=()) #除去外框
plt.subplot(1, 3, 3) # 在1*3图格的第3格画上下面图片
plt.imshow(cv2.imread(im2)[: , : , : : -1])
plt.show() # 显示
for qunum in range(500):
# if random.random()>0.5:
# continue
scores, indexes = tree.kneighbors([query_fea[qunum]])
scores, indexes = scores[0][::-1], indexes[0][::-1]
print("query", query_cut[qunum])
plt.figure(figsize=(20, 20))
for ind, score in enumerate(scores):
im = Image.open(image_dir+image_files[indexes[ind]]).resize((240,240))
plt.subplot(1, 5, ind+1)
plt.title(str(indexes[ind]) +"——"+ str(score))
plt.imshow(im)
plt.show()
print()
from matplotlib_venn import venn2,venn2_circles,venn3,venn3_circles
from matplotlib_venn import venn3_unweighted
venn3([label_set, image_set, text_set], ("label", "cnn", "bert"))
venn3_unweighted([merge_set, image_set, text_set], ("merge", "cnn", "bert"))
c = venn3_circles([merge_set, image_set, text_set], linewidth=1)
c[0].set_edgecolor('red')
c[1].set_edgecolor('blue')
c[1].set_edgecolor('green')