目录
1. 模拟退火算法之特征选择的python实现(类封装)
2. 实验结果
按照模拟退火算法基本流程的python实现,可以参考模拟退火算法之特征选择的python实现(一)
特此申明:代码是作者辛辛苦苦码的, 转载请注明出处
import numpy as np
from sklearn.metrics import mean_squared_error
import numpy as np
from sklearn.neural_network import MLPRegressor
import scipy.io as sio
from sklearn.model_selection import train_test_split
class SimulatedAnnealing(object):
"""Feature selection with simulated annealing algorithm.
parameters
----------
initT: int or float, default: 100
The maximum temperature
minT: int or float, default: 1
The minimum temperature
alpha:float, default:0.98
Decay coefficient of temperature
iteration: int, default:50
Balance times at present temperature
features: int
The number of attributes in the original data
init_features_sel: int
The index of selected fatures
estimator: object
A supervised learning estimator with a `fit` method.
Attributes
----------
temp_history: array
record the temperatures
best_cost_history: array
record the MSEs
best_solution_history: array
record the solutions
"""
def __init__(self, features, init_features_sel, estimator, initT=100, minT=1, alpha=0.98, iteration=50):
self.initT = initT
self.minT = minT
self.alpha = alpha
self.iteration = iteration
self.feature_size = features
self.init_feature_sel = init_features_sel
self.estimator = estimator
def get_initial_solution(self):
sol = np.arange(self.feature_size - 1)
np.random.shuffle(sol)
return sol[:self.init_feature_sel]
def get_cost(self, solution, x_train, x_test, y_train, y_test):
""" compute the evaluated results of current solution
:param solution: array of shape (selected, )
:param x_train: array of shape (n_samples, n_features)
:param x_test: array of shape (n_samples, n_features)
:param y_train: array of shape (n_samples, )
:param y_test: array of shape (n_samples, n_features)
:return: mse
"""
limited_train_data = self.get_data_subset(x_train, solution)
limited_test_data = self.get_data_subset(x_test, solution)
estimator = self.estimator.fit(limited_train_data, y_train)
y_test_pred = estimator.predict(limited_test_data)
return round(mean_squared_error(y_test, y_test_pred), 4)
@staticmethod
def get_data_subset(x_data, soln):
return x_data[:, soln]
def get_neighbor(self, current_solution, temperature):
"""
:param current_solution: array of shape (selected, )
:param temperature: int or float.
:return: selected :the index of selected features, array of shape (selected, ).
"""
all_features = range(self.feature_size-1)
selected = current_solution
not_selected = np.setdiff1d(all_features, selected)
# swap one selected feature with one non-selected feature
num_swaps = int(
min(np.ceil(np.abs(np.random.normal(0, 0.1 * len(selected) * temperature))), np.ceil(0.1 * len(selected))))
feature_out = np.random.randint(0, len(selected), num_swaps) # 产生num_swaps个样本索引(从range(len(selected))中)
selected = np.delete(selected, feature_out)
feature_in = np.random.randint(0, len(not_selected), num_swaps) # 产生num_swaps个样本索引(从range(len(not_selected))中)
selected = np.append(selected, not_selected[feature_in])
return selected
@staticmethod
def get_probability(temperature, delta_cost):
return np.exp(delta_cost/temperature)
def fit(self, x_train, x_test, y_train, y_test):
"""
:param x_train: array of shape (n_samples, n_features)
:param x_test: array of shape (n_samples, n_features)
:param y_train: array of shape (n_samples, )
:param y_test: array of shape (n_samples, )
:return:
best_solution: the index of final selected attributes, array of shape (selected, )
best_cost : minimum mse
"""
temperature = self.initT # 当前温度
solution = self.get_initial_solution()
cost = self.get_cost(solution, x_train, x_test, y_train, y_test)
temp_history = [temperature]
best_cost_history = []
best_solution_history = []
best_cost = cost
best_solution = solution
while temperature > self.minT:
for k in range(self.iteration):
next_solution = self.get_neighbor(solution, temperature)
next_cost = self.get_cost(next_solution, x_train, x_test, y_train, y_test)
probability = 0
if next_cost > cost: # 计算向差方向移动的概率 (即移动后的解比当前解要差)
probability = self.get_probability(temperature, cost-next_cost)
if next_cost < cost or np.random.random() < probability: # 朝着最优解移动或以一定概率向差方向移动
cost = next_cost
solution = next_solution
if next_cost < best_cost: # 最优值和最优解
best_cost = cost
best_solution = solution
print("当前温度:", round(temperature, 2))
print("当前温度下最好的得分:", best_cost)
print("当前温度下波长数量:", len(solution))
temperature *= self.alpha
temp_history.append(temperature)
best_cost_history.append(best_cost)
best_solution_history.append(best_solution)
self.temp_history_ = temp_history
self.best_cost_history_ = best_cost_history
self.best_solution_history = best_solution_history
return best_solution, best_cost
# 1.数据获取
mat = sio.loadmat('NDFNDF_smote.mat')
data = mat['NDFNDF_smote']
x, y = data[:, :1050], data[:, 1050]
print('原始数据大小:', x.shape, y.shape)
# 2.样本集划分和预处理
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)
print("训练集;", x_train.shape, y_train.shape)
print("测试集:", x_test.shape, y_test.shape)
# 3.使用模拟退火算法优化特征数量
feature_size = x.shape[1]
sel_feature = 10
estimator = MLPRegressor(hidden_layer_sizes=43)
sa = SimulatedAnnealing(initT=100,
minT=1,
alpha=0.95,
iteration=50,
features=feature_size,
init_features_sel=sel_feature,
estimator=estimator)
sa.fit(x_train, x_test, y_train, y_test)