下载地址:Hyperspectral Remote Sensing Scenes
包含:Indiana pines,Pavia University,KSC,Salinas的mat文件
以Indiana pines为例:Indian_pines.mat、Indian_pines_gt.mat
Band:220
pixel:145*145
class:16
adaboost(基学习器:决策树)
评价指标:OA(总体精度、混淆矩阵)
import numpy as np
import scipy.io as scio
from sklearn.model_selection import train_test_split,GridSearchCV
from matplotlib import pyplot as plt
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import cohen_kappa_score,confusion_matrix,accuracy_score
作用:
1.读取mat文件
2.统计各类像元数
3.由于数据集中0代表未知类型、因此去掉0这个类别
4.重构数据(特征、标签)
5.切分数据(默认分层抽样、0.3测试数据)
def pre_process(f1_name, f2_name, f1_str, f2_str,):
# 处理数据:返回训练数据、测试数据
#f1_name,f2_name:特征mat数据,标签mat数据,f1_str,f2_str:数据的字典键名
x_data = scio.loadmat(f1_name)[f1_str]#读取数据
y_data = scio.loadmat(f2_name)[f2_str]
cls = len(np.unique(y_data))
# 统计各类像元数
cls_count = {}
for i in range(y_data.shape[0]):
for j in range(y_data.shape[1]):
if y_data[i][j] in [x for x in range(cls)]:
if y_data[i][j] not in cls_count:
cls_count[y_data[i][j]] = 0
cls_count[y_data[i][j]] += 1
need_label = np.zeros([y_data.shape[0], y_data.shape[1]])
# 除掉 0 这个非分类的类,把所有需要分类的元素提取出来
for i in range(y_data.shape[0]):
for j in range(y_data.shape[1]):
if y_data[i][j] != 0:
need_label[i][j] = y_data[i][j]
#重构数据:整合特征与标签
data_with_label = []
for i in range(y_data.shape[0]):
for j in range(y_data.shape[1]):
if need_label[i][j] != 0:
pixel_band_value = list(x_data[i][j])
pixel_band_value.append(y_data[i][j])
data_with_label.append(pixel_band_value)
data_with_label = np.array(data_with_label)
x = data_with_label[:, :-1]
y = data_with_label[:, -1]
# 划分训练集、测试集(test_size=0.3,分层抽样)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=8, stratify=y)
return x_train, x_test, y_train, y_test
这里选择RandomizedSearch,速度快(默认迭代10次)
指标:精度‘accuracy’
作用
返回最佳参数、最佳分数
def tuning_params(param_grid, model):
# 参数调节:返回最佳参数、最佳分数
# param_grid,参数字典; model,调参模型;
grid_search = RandomizedSearchCV(model, param_grid, scoring='accuracy')
grid_search.fit(x_train, y_train)
print('bes2 t_params:', grid_search.best_params_)
print('best_score:', grid_search.best_score_)
def plot_confusion_matrix(cm, labels_name, title):
# 混淆矩阵图像
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # 归一化
plt.imshow(cm, interpolation='nearest') # 在特定的窗口上显示图像
plt.title(title) # 图像标题
plt.colorbar()
num_local = np.array(range(len(labels_name)))
plt.xticks(num_local, labels_name, rotation=90) # 将标签印在x轴坐标上
plt.yticks(num_local, labels_name) # 将标签印在y轴坐标上
plt.ylabel('True label')
plt.xlabel('Predicted label')
x_train, x_test, y_train, y_test = pre_process(r'C:\Indian_pines.mat', r'C:\Indian_pines_gt.mat' , 'indian_pines', 'indian_pines_gt')
#x_test :(3075, 220), x_train: (7174, 220), y_test: 3075, y_train :7174
默认参数训练、预测
model=AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
model.fit(x_train,y_train)
y_pre=model.predict(x_test)
print('kappa:%f'%(cohen_kappa_score(y_pre,y_test)))
print('OA:%f' % (accuracy_score(y_test, y_pre)))
# kappa:0.714406
# OA:0.749268
调参
n_estimators,max_depth,min_samples_split,min_samples_leaf,learning_rate
应先调基学习器的参数、再调adaboost参数(n_estimators)
params={'max_depth':range(5,10),'min_samples_split':range(10,50),'min_samples_leaf':range(5,20)}
tuning_params(params,DecisionTreeClassifier())
# bes2 t_params: {'min_samples_split': 37, 'min_samples_leaf': 6, 'max_depth': 9}
# best_score: 0.879678015307469
model=AdaBoostClassifier(base_estimator=DecisionTreeClassifier(min_samples_split=37,min_samples_leaf=6,max_depth=9))
params={'n_estimators':range(40,120,5)}
tuning_params(params,model)
# bes2 t_params: {'n_estimators': 60}
# best_score: 0.8808920559514384
最终模型、评价
model=AdaBoostClassifier(base_estimator=DecisionTreeClassifier(min_samples_split=37,min_samples_leaf=6,max_depth=9),n_estimators=60)
model.fit(x_train, y_train)
y_pre = model.predict(x_test)
print('kappa:%f' % (cohen_kappa_score(y_pre, y_test)))
print('OA:%f' % (accuracy_score(y_test, y_pre)))
confusion_mat = confusion_matrix(y_test, y_pre)
print(confusion_mat)
labels_name = [ 'Alfalfa', 'Corn-notill', 'Corn-mintill', 'Corn', 'Grass-pasture', 'Grass-trees', 'Grass-pasture-mowed', 'Hay-windrowed', 'Oats', 'Soybean-notill', 'Soybean-mintill', 'Soybean-clean', 'Wheat', 'Woods', 'Buildings-Grass-Trees-Drives', 'Stone-Steel-Towers']
plot_confusion_matrix(confusion_mat, labels_name, " Confusion Matrix")
plt.show()
# kappa:0.891912
# OA:0.919504
# [[1909 1 9 0 0 8 15 47 0]
# [ 1 5486 0 44 0 63 0 1 0]
# [ 38 4 465 0 0 1 0 122 0]
# [ 0 83 0 835 0 1 0 0 0]
# [ 3 0 0 0 400 0 0 0 0]
# [ 8 354 0 2 0 1144 0 1 0]
# [ 118 0 2 0 0 0 277 2 0]
# [ 42 3 55 0 0 4 1 1000 0]
# [ 0 0 0 0 0 0 0 0 284]]
1.基于高光谱遥感影像的分类python