《统计学习方法》第8章习题答案

8.1

class Adaboost_tree:
    def __init__(self, X, Y):
        self.X = np.array(X)
        self.Y = np.array(Y)
        self.N = len(X)
        self.feature_num = len(X[0])
        self.w = np.array([1 / self.N] * self.N)
        self.g_x = []
        self.get_feature_dict()
        
    def compute_error(self, y):
        y = np.array(y)
        return np.sum(self.w[y != self.Y])
    
    def compute_am(self, em):
        return 1 / 2 * math.log((1 - em) / em)
    
    def get_feature_dict(self):
        self.f_dict = {}
        for i in range(self.feature_num):
            self.f_dict[i] = list(set(x[i] for x in self.X))
            #给每的特征收集所有的可能值,比如特征0只有0 1;特征1 有1 2 3
    def fit(self, max_iter = 20):
        for iter in range(max_iter):
            index_list = []
            error_list1 = []
            error_list2 = []
            pred_y_list1 = []
            pred_y_list2 = []
            
            for i in range(self.feature_num):
                for j in self.f_dict[i]:
                    y1 = [1 if m[i] <= j else -1 for m in self.X]
                    y2 = [-1 if m[i] <= j else 1 for m in self.X]
                    error1 = self.compute_error(y1)
                    error2 = self.compute_error(y2)
                    index_list.append((i, j))
                    error_list1.append(error1)
                    error_list2.append(error2)
                    pred_y_list1.append(y1)
                    pred_y_list2.append(y2)
                        #对每个类别,尝试选定一个结果为1/-1,计算对应的y、error
            if min(error_list1) <=  min(error_list2):
                min_index = error_list1.index(min(error_list1))
                split_f_index, split_value = index_list[min_index]
                pred_y = pred_y_list1[min_index]
                positive = 1
            
            else:
                min_index = error_list2.index(min(error_list2))
                split_f_index, split_value = index_list[min_index]
                pred_y = pred_y_list2[min_index]
                positive = -1
                
            em = self.compute_error(pred_y)
            if em == 0:
                print('em is zero')
                break
            am = self.compute_am(em)
          #  print(split_f_index, split_value, positive, am)
            self.g_x.append((split_f_index, split_value, positive, am))
            w_list = self.w * np.exp(-am * self.Y * np.array(pred_y))
            self.w = w_list / np.sum(w_list) #归一化
         #   print(self.w)
            
    def predict_single(self, x):
        result = 0
        for split_f_index, split_value, positive, am in self.g_x:
            if x[split_f_index] <= split_value:
                result += positive * am
            else:
                result -= positive * am
        
        return np.sign(result)
    
    def predict(self, X):
        result = [self.predict_single(x) for x in X]
        print("predict:", result)
        return result
            
def main():
    X = np.array([[0, 1, 3], [0, 3, 1], [1, 2, 2], [1, 1, 3], [1, 2, 3],
                  [0, 1, 2], [1, 1, 2], [1, 1, 1], [1, 3, 1], [0, 2, 1]])
    Y = np.array([-1, -1, -1, -1, -1, -1, 1, 1, -1, -1])
    Adaboost_tree_ = Adaboost_tree(X, Y)
    Adaboost_tree_.fit(20)
    print(Adaboost_tree_.predict(X))

if __name__ == '__main__':
    main()

参考:统计学习方法第八章作业:分类问题的AdaBoost算法和回归问题的提升树算法,代码,实现

8.2

题目:比较支持向量机、 AdaBoost、逻辑斯谛回归模型的学习策略与算法。
         \;\;\;\; 学习策略通常来讲只有结构风险最小化和经验风险最小化两类;算法查询书本即可。

学习策略 算法
支持向量机 结构风险最小化 SMO序列
AdaBoost 经验风险最小化 前向分布算法的二分类学习算法
逻辑斯蒂回归 经验风险最小化 改进的迭代尺度法,梯度下降法,牛顿法,拟牛顿法

你可能感兴趣的:(统计学习方法,机器学习)