模拟退火算法之特征选择的python实现(二)

目录

1. 模拟退火算法之特征选择的python实现(类封装)

2. 实验结果

按照模拟退火算法基本流程的python实现,可以参考模拟退火算法之特征选择的python实现(一)

特此申明:代码是作者辛辛苦苦码的, 转载请注明出处

1. 模拟退火算法之特征选择的python实现(类封装)

import numpy as np
from sklearn.metrics import mean_squared_error
import numpy as np
from sklearn.neural_network import MLPRegressor
import scipy.io as sio
from sklearn.model_selection import train_test_split


class SimulatedAnnealing(object):
    """Feature selection with simulated annealing algorithm.

    parameters
    ----------
    initT: int or float, default: 100
        The maximum temperature
    minT: int or float, default: 1
        The minimum temperature
    alpha:float, default:0.98
        Decay coefficient of temperature
    iteration: int, default:50
        Balance times at present temperature
    features: int
        The number of attributes in the original data
    init_features_sel: int
        The index of selected fatures
    estimator: object
        A supervised learning estimator with a `fit` method.

    Attributes
    ----------
    temp_history: array
        record the temperatures
    best_cost_history: array
        record the MSEs
    best_solution_history: array
        record the solutions
    """

    def __init__(self, features, init_features_sel, estimator, initT=100, minT=1, alpha=0.98, iteration=50):

        self.initT = initT
        self.minT = minT
        self.alpha = alpha
        self.iteration = iteration
        self.feature_size = features
        self.init_feature_sel = init_features_sel
        self.estimator = estimator

    def get_initial_solution(self):
        sol = np.arange(self.feature_size - 1)
        np.random.shuffle(sol)
        return sol[:self.init_feature_sel]

    def get_cost(self, solution, x_train, x_test, y_train, y_test):
        """ compute the evaluated results of current solution

        :param solution: array of shape (selected, )
        :param x_train: array of shape (n_samples, n_features)
        :param x_test: array of shape (n_samples, n_features)
        :param y_train: array of shape (n_samples, )
        :param y_test: array of shape (n_samples, n_features)
        :return: mse
        """
        limited_train_data = self.get_data_subset(x_train, solution)
        limited_test_data = self.get_data_subset(x_test, solution)
        estimator = self.estimator.fit(limited_train_data, y_train)
        y_test_pred = estimator.predict(limited_test_data)
        return round(mean_squared_error(y_test, y_test_pred), 4)

    @staticmethod
    def get_data_subset(x_data, soln):
        return x_data[:, soln]

    def get_neighbor(self, current_solution, temperature):
        """

        :param current_solution: array of shape (selected, )
        :param temperature: int or float.
        :return: selected :the index of selected features, array of shape (selected, ).
        """
        all_features = range(self.feature_size-1)
        selected = current_solution
        not_selected = np.setdiff1d(all_features, selected)

        # swap one selected feature with one non-selected feature
        num_swaps = int(
            min(np.ceil(np.abs(np.random.normal(0, 0.1 * len(selected) * temperature))), np.ceil(0.1 * len(selected))))
        feature_out = np.random.randint(0, len(selected), num_swaps)  # 产生num_swaps个样本索引(从range(len(selected))中)
        selected = np.delete(selected, feature_out)
        feature_in = np.random.randint(0, len(not_selected), num_swaps)  # 产生num_swaps个样本索引(从range(len(not_selected))中)
        selected = np.append(selected, not_selected[feature_in])
        return selected

    @staticmethod
    def get_probability(temperature, delta_cost):
        return np.exp(delta_cost/temperature)

    def fit(self, x_train, x_test, y_train, y_test):
        """

        :param x_train: array of shape (n_samples, n_features)
        :param x_test: array of shape (n_samples, n_features)
        :param y_train: array of shape (n_samples, )
        :param y_test: array of shape (n_samples, )
        :return:
        best_solution: the index of final selected attributes, array of shape (selected, )
        best_cost : minimum mse
        """
        temperature = self.initT  # 当前温度
        solution = self.get_initial_solution()
        cost = self.get_cost(solution, x_train, x_test, y_train, y_test)

        temp_history = [temperature]
        best_cost_history = []
        best_solution_history = []

        best_cost = cost
        best_solution = solution

        while temperature > self.minT:
            for k in range(self.iteration):
                next_solution = self.get_neighbor(solution, temperature)
                next_cost = self.get_cost(next_solution, x_train, x_test, y_train, y_test)

                probability = 0
                if next_cost > cost:  # 计算向差方向移动的概率 (即移动后的解比当前解要差)
                    probability = self.get_probability(temperature, cost-next_cost)
                if next_cost < cost or np.random.random() < probability:  # 朝着最优解移动或以一定概率向差方向移动
                    cost = next_cost
                    solution = next_solution
                if next_cost < best_cost:  # 最优值和最优解
                    best_cost = cost
                    best_solution = solution

            print("当前温度:", round(temperature, 2))
            print("当前温度下最好的得分:", best_cost)
            print("当前温度下波长数量:", len(solution))

            temperature *= self.alpha
            temp_history.append(temperature)
            best_cost_history.append(best_cost)
            best_solution_history.append(best_solution)

        self.temp_history_ = temp_history
        self.best_cost_history_ = best_cost_history
        self.best_solution_history = best_solution_history
        return best_solution, best_cost


# 1.数据获取
mat = sio.loadmat('NDFNDF_smote.mat')
data = mat['NDFNDF_smote']
x, y = data[:, :1050], data[:, 1050]
print('原始数据大小:', x.shape, y.shape)

# 2.样本集划分和预处理
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)
print("训练集;", x_train.shape, y_train.shape)
print("测试集:", x_test.shape, y_test.shape)

# 3.使用模拟退火算法优化特征数量
feature_size = x.shape[1]
sel_feature = 10
estimator = MLPRegressor(hidden_layer_sizes=43)
sa = SimulatedAnnealing(initT=100,
                        minT=1,
                        alpha=0.95,
                        iteration=50,
                        features=feature_size,
                        init_features_sel=sel_feature,
                        estimator=estimator)
sa.fit(x_train, x_test, y_train, y_test)

2. 实验结果

模拟退火算法之特征选择的python实现(二)_第1张图片

 

你可能感兴趣的:(近红外光谱,数据挖掘,机器学习)