捕捉一只Diu

机器学习：分类决策树（Python）

一、各种熵的计算

entropy_utils.py

import numpy as np  # 数值计算
import math  # 标量数据的计算


class EntropyUtils:
    """
    决策树中各种熵的计算，包括信息熵、信息增益、信息增益率、基尼指数。
    统一要求：按照信息增益最大、信息增益率最大、基尼指数增益最大
    """
    @staticmethod
    def _set_sample_weight(sample_weight, n_samples):
        """
        扩展到集成学习，此处为样本权重的设置
        :param sample_weight: 各样本的权重
        :param n_samples: 样本量
        :return:
        """
        if sample_weight is None:
            sample_weight = np.asarray([1.0] * n_samples)
        return sample_weight

    def cal_info_entropy(self, y_labels, sample_weight=None):
        """
        计算样本的信息熵
        :param y_labels: 递归样本子集中类别集合或特征取值
        :param sample_weight: 各样本的权重
        :return:
        """
        y = np.asarray(y_labels)
        sample_weight = self._set_sample_weight(sample_weight, len(y))
        y_values = np.unique(y)  # 样本中不同类别值
        ent_y = 0.0
        for val in y_values:
            p_i = len(y[y == val]) * np.mean(sample_weight[y == val]) / len(y)
            ent_y += -p_i * math.log2(p_i)
        return ent_y

    def conditional_entropy(self, feature_x, y_labels, sample_weight=None):
        """
        计算条件熵，给定特征属性的情况下，信息熵的计算
        :param feature_x: 某个样本特征
        :param y_labels: 递归样本子集中的类别集合
        :param sample_weight: 各样本的权重
        :return:
        """
        x, y = np.asarray(feature_x), np.asarray(y_labels)
        sample_weight = self._set_sample_weight(sample_weight, len(y))
        cond_ent = 0.0
        for x_val in np.unique(x):
            x_idx = np.where(x == x_val)  # 某个特征取值的样本索引集合
            sub_x, sub_y = x[x_idx], y[x_idx]
            sub_sample_weight = sample_weight[x_idx]
            p_k = len(sub_y) / len(y)
            cond_ent += p_k * self.cal_info_entropy(sub_y, sub_sample_weight)
        return cond_ent

    def info_gain(self, feature_x, y_labels, sample_weight=None):
        """
        计算信息增益
        :param feature_x:
        :param y_labels:
        :param sample_weight:
        :return:
        """
        return self.cal_info_entropy(y_labels, sample_weight) - \
            self.conditional_entropy(feature_x, y_labels, sample_weight)

    def info_gain_rate(self, feature_x, y_labels, sample_weight=None):
        """
        计算信息增益率
        :param feature_x:
        :param y_labels:
        :param sample_weight:
        :return:
        """
        return self.info_gain(feature_x, y_labels, sample_weight) / \
            self.cal_info_entropy(feature_x, sample_weight)

    def cal_gini(self, y_label, sample_weight=None):
        """
        计算当前特征或类别集合的基尼值
        :param y_label: 递归样本子集中类别集合或特征取值
        :param sample_weight:
        :return:
        """
        y = np.asarray(y_label)
        sample_weight = self._set_sample_weight(sample_weight, len(y))
        y_values = np.unique(y)
        gini_val = 1.0
        for val in y_values:
            p_k = len(y[y == val]) * np.mean(sample_weight[y == val]) / len(y)
            gini_val -= p_k ** 2
        return gini_val

    def conditional_gini(self, feature_x, y_labels, sample_weight=None):
        """
        计算条件基尼指数
        :param feature_x:
        :param y_labels:
        :param sample_weight:
        :return:
        """
        x, y = np.asarray(feature_x), np.asarray(y_labels)
        sample_weight = self._set_sample_weight(sample_weight, len(y))
        cond_gini = 0.0
        for x_val in np.unique(x):
            x_idx = np.where(x == x_val)  # 某个特征取值的样本索引集合
            sub_x, sub_y = x[x_idx], y[x_idx]
            sub_sample_weight = sample_weight[x_idx]
            p_k = len(sub_y) / len(y)
            cond_gini += p_k * self.cal_gini(sub_y, sub_sample_weight)
        return cond_gini

    def gini_gain(self, feature_x, y_labels, sample_weight=None):
        """
        计算基尼指数增益
        :param feature_x:
        :param y_labels:
        :param sample_weight:
        :return:
        """
        return self.cal_gini(y_labels, sample_weight) - \
            self.conditional_gini(feature_x, y_labels, sample_weight)


# if __name__ == '__main__':
#     y = np.random.randint(0, 2, 50)
#     entropy = EntropyUtils()
#     ent = entropy.cal_info_entropy(y)
#     print(ent)

二、连续特征数据的离散分箱

data_bin_wrapper.py

import numpy as np


class DataBinsWrapper:
    """
    连续特征数据的离散化，分箱（分段）操作，根据用户传参max_bins，计算分位数，以分位数分箱（分段）
    然后根据样本特征取值所在区间段（哪个箱）位置索引标记当前值
    1. fit(x)根据样本进行分箱
    2. transform(x)根据已存在的箱，把数据分成max_bins类
    """
    def __init__(self, max_bins=10):
        self.max_bins = max_bins  # 分箱数：10%，20%，...，90%
        self.XrangeMap = None  # 箱（区间段）

    def fit(self, x_samples):
        """
        根据样本进行分箱
        :param x_samples: 样本（二维数组 n * k），或一个特征属性的数据（二维 n * 1）
        :return:
        """
        if x_samples.ndim == 1:  # 一个特征属性，转换为二维数组
            n_features = 1
            x_samples = x_samples[:, np.newaxis]  # 添加一个轴，转换为二维数组
        else:
            n_features = x_samples.shape[1]

        # 构建分箱，区间段
        self.XrangeMap = [[] for _ in range(n_features)]
        for idx in range(n_features):
            x_sorted = sorted(x_samples[:, idx])  # 按特征索引取值，并从小到大排序
            for bin in range(1, self.max_bins):
                p = (bin / self.max_bins) * 100 // 1
                p_val = np.percentile(x_sorted, p)
                self.XrangeMap[idx].append(p_val)
            self.XrangeMap[idx] = sorted(list(set(self.XrangeMap[idx])))

    def transform(self, x_samples, XrangeMap=None):
        """
        根据已存在的箱，把数据分成max_bins类
        :param x_samples: 样本（二维数组 n * k），或一个特征属性的数据（二维 n * 1）
        :return:
        """
        if x_samples.ndim == 1:
            if XrangeMap is not None:
                return np.asarray(np.digitize(x_samples, XrangeMap[0])).reshape(-1)
            else:
                return np.asarray(np.digitize(x_samples, self.XrangeMap[0])).reshape(-1)
        else:
            return np.asarray([np.digitize(x_samples[:, i], self.XrangeMap[i])
                              for i in range(x_samples.shape[1])]).T



# if __name__ == '__main__':
#     x = np.random.randn(10, 5)
#     print(x)
#     dbw = DataBinsWrapper(max_bins=5)
#     dbw.fit(x)
#     print(dbw.XrangeMap)
#     print(dbw.transform(x))

三、可视化分类边界函数

plt_decision_funtion.py

import matplotlib.pylab as plt
import numpy as np


def plot_decision_function(X, y, clf, acc=None, title_info=None, is_show=True, support_vectors=None):
    """
    可视化分类边界函数
    :param X, y: 测试样本与类别
    :param clf: 分类模型
    :param acc: 模型分类正确率
    :param title_info: 可视化标题title的额外信息
    :param is_show: 是否在当前显示图像，用于父函数绘制子图
    :param support_vectors: 扩展支持向量机
    :return:
    """
    if is_show:
        plt.figure(figsize=(7, 5))
    # 根据特征变量的最小值和最大值，生成二维网络，用于绘制等值线
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xi, yi = np.meshgrid(np.arange(x_min, x_max, 0.02),
                         np.arange(y_min, y_max, 0.02))
    y_pred = clf.predict(np.c_[xi.ravel(), yi.ravel()])  # 模型预测值
    y_pred = y_pred.reshape(xi.shape)
    plt.contourf(xi, yi, y_pred, alpha=0.4)
    plt.scatter(X[:, 0], X[:, 1], alpha=0.8, c=y, edgecolors="k")
    plt.xlabel("Feature 1", fontdict={"fontsize": 12})
    plt.ylabel("Feature 2", fontdict={"fontsize": 12})
    if acc:
        if title_info:
            plt.title("Model Classification Boundary %s \n(accuracy = %.5f)"
                      % (title_info, acc), fontdict={"fontsize": 14})
        else:
            plt.title("Model Classification Boundary (accuracy = %.5f)"
                      % acc, fontdict={"fontsize": 14})
    else:
        if title_info:
            plt.title("Model Classification Boundary %s"
                      % title_info, fontdict={"fontsize": 14})
        else:
            plt.title("Model Classification Boundary", fontdict={"fontsize": 14})
    if support_vectors is not None:  # 可视化支持向量，针对SVM
        plt.scatter(X[support_vectors, 0], X[support_vectors, 1],
                    s=50, c="None", alpha=0.7, edgecolors="red")
    if is_show:
        plt.show()

四、熵计算的测试

test_entropy.py

import numpy as np
import pandas as pd
from utils.entropy_utils import EntropyUtils
from utils.data_bin_wrapper import DataBinsWrapper


data = pd.read_csv("data/watermelon.csv").iloc[:, 1:]
feat_names = data.columns[:6]
y = data.iloc[:, -1]
ent_obj = EntropyUtils()

print("各特征的信息增益如下：")
for feat in feat_names:
    print(feat, ":", ent_obj.info_gain(data.loc[:, feat], y))

print("=" * 60)
print("各特征的信息增益率如下：")
for feat in feat_names:
    print(feat, ":", ent_obj.info_gain_rate(data.loc[:, feat], y))

print("=" * 60)
print("各特征的基尼指数增益如下：")
for feat in feat_names:
    print(feat, ":", ent_obj.gini_gain(data.loc[:, feat], y))

print("=" * 60)
x1 = np.asarray(data.loc[:, ["密度", "含糖率"]])
print(x1)
dbw = DataBinsWrapper(max_bins=8)
dbw.fit(x1)
print(dbw.transform(x1))

五、树的结点信息封装

tree_node.py


class TreeNode_C:
    """
    决策树分类算法，树的结点信息封装，实体类：setXXX()、getXXX()
    """
    def __init__(self, feature_idx: int = None, feature_val=None, criterion_val: float = None,
                 n_samples: int = None, target_dist: dict = None, weight_dist: dict = None,
                 left_child_Node=None, right_child_Node=None):
        """
        决策树结点信息封装
        :param feature_idx: 特征索引，如果指定特征属性的名称，可以按照索引取值
        :param feature_val: 特征取值
        :param criterion_val: 划分结点的标准：信息增益（率）、基尼指数增益
        :param n_samples: 当前结点所包含的样本量
        :param target_dist: 当前结点类别分布：0-25%，1-50%，2-25%
        :param weight_dist: 当前结点所包含的样本权重分布
        :param left_child_Node: 左子树
        :param right_child_Node: 右子树
        """
        self.feature_idx = feature_idx
        self.feature_val = feature_val
        self.criterion_val = criterion_val
        self.n_samples = n_samples
        self.target_dist = target_dist
        self.weight_dist = weight_dist
        self.left_child_Node = left_child_Node  # 递归
        self.right_child_Node = right_child_Node  # 递归

    def level_order(self):
        """
        按层次遍历树...
        :return:
        """
        pass

    # def get_feature_idx(self):
    #     return self.get_feature_idx()
    #
    # def set_feature_idx(self, feature_idx):
    #     self.feature_idx = feature_idx

六、分类决策树算法的实现

decision_tree_C.py

import numpy as np
from utils.entropy_utils import EntropyUtils
from utils.tree_node import TreeNode_C
from utils.data_bin_wrapper import DataBinsWrapper


class DecisionTreeClassifier:
    """
    分类决策树算法实现：无论是ID3、C4.5或CART，统一按照二叉树构造
    1. 划分标准：信息增益（率）、基尼指数增益，都按照最大值选择特征属性
    2. 创建决策树fit()，递归算法实现，注意出口条件
    3. 预测predict_proba()、predict() --> 对树的搜索
    4. 数据的预处理操作，尤其是连续数据的离散化，分箱
    5. 剪枝处理
    """
    def __init__(self, criterion="CART", is_feature_all_R=False, dbw_feature_idx=None,
                 max_depth=None, min_sample_split=2, min_sample_leaf=1,
                 min_impurity_decrease=0, max_bins=10):
        self.utils = EntropyUtils()  # 结点划分类
        self.criterion = criterion  # 结点的划分标准
        if criterion.lower() == "cart":
            self.criterion_func = self.utils.gini_gain  # 基尼指数增益
        elif criterion.lower() == "c45":
            self.criterion_func = self.utils.info_gain_rate  # 信息增益率
        elif criterion.lower() == "id3":
            self.criterion_func = self.utils.info_gain  # 信息增益
        else:
            raise ValueError("参数criterion仅限cart、c45或id3...")
        self.is_feature_all_R = is_feature_all_R  # 所有样本特征是否全是连续数据
        self.dbw_feature_idx = dbw_feature_idx  # 混合类型数据，可指定连续特征属性的索引
        self.max_depth = max_depth  # 树的最大深度，不传参，则一直划分下去
        self.min_sample_split = min_sample_split  # 最小的划分结点的样本量，小于则不划分
        self.min_sample_leaf = min_sample_leaf  # 叶子结点所包含的最小样本量，剩余的样本小于这个值，标记叶子结点
        self.min_impurity_decrease = min_impurity_decrease  # 最小结点不纯度减少值，小于这个值，不足以划分
        self.max_bins = max_bins  # 连续数据的分箱数，越大，则划分越细
        self.root_node: TreeNode_C() = None  # 分类决策树的根节点
        self.dbw = DataBinsWrapper(max_bins=max_bins)  # 连续数据离散化对象
        self.dbw_XrangeMap = {}  # 存储训练样本连续特征分箱的端点
        self.class_values = None  # 样本的类别取值

    def _data_bin_wrapper(self, x_samples):
        """
        针对特定的连续特征属性索引dbw_feature_idx，分别进行分箱，考虑测试样本与训练样本使用同一个XrangeMap
        :param x_samples: 样本：即可以是训练样本，也可以是测试样本
        :return:
        """
        self.dbw_feature_idx = np.asarray(self.dbw_feature_idx)
        x_samples_prop = []  # 分箱之后的数据
        if not self.dbw_XrangeMap:
            # 为空，即创建决策树前所做的分箱操作
            for i in range(x_samples.shape[1]):
                if i in self.dbw_feature_idx:  # 说明当前特征是连续数值
                    self.dbw.fit(x_samples[:, i])
                    self.dbw_XrangeMap[i] = self.dbw.XrangeMap
                    x_samples_prop.append(self.dbw.transform(x_samples[:, i]))
                else:
                    x_samples_prop.append(x_samples[:, i])
        else:  # 针对测试样本的分箱操作
            for i in range(x_samples.shape[1]):
                if i in self.dbw_feature_idx:  # 说明当前特征是连续数值
                    x_samples_prop.append(self.dbw.transform(x_samples[:, i], self.dbw_XrangeMap[i]))
                else:
                    x_samples_prop.append(x_samples[:, i])
        return np.asarray(x_samples_prop).T

    def fit(self, x_train, y_train, sample_weight=None):
        """
        决策树的创建，递归操作前的必要信息处理
        :param x_train: 训练样本：ndarray，n * k
        :param y_train: 目标集：ndarray，（n， ）
        :param sample_weight: 各样本的权重，（n， ）
        :return:
        """
        x_train, y_train = np.asarray(x_train), np.asarray(y_train)
        self.class_values = np.unique(y_train)  # 样本的类别取值
        n_samples, n_features = x_train.shape  # 训练样本的样本量和特征属性数目
        if sample_weight is None:
            sample_weight = np.asarray([1.0] * n_samples)
        self.root_node = TreeNode_C()  # 创建一个空树
        if self.is_feature_all_R:  # 全部是连续数据
            self.dbw.fit(x_train)
            x_train = self.dbw.transform(x_train)
        elif self.dbw_feature_idx:
            x_train = self._data_bin_wrapper(x_train)
        self._build_tree(1, self.root_node, x_train, y_train, sample_weight)
        # print(x_train)

    def _build_tree(self, cur_depth, cur_node: TreeNode_C, x_train, y_train, sample_weight):
        """
        递归创建决策树算法，核心算法。按先序（中序、后序）创建的
        :param cur_depth: 递归划分后的树的深度
        :param cur_node: 递归划分后的当前根结点
        :param x_train: 递归划分后的训练样本
        :param y_train: 递归划分后的目标集合
        :param sample_weight: 递归划分后的各样本权重
        :return:
        """
        n_samples, n_features = x_train.shape  # 当前样本子集中的样本量和特征属性数目
        target_dist, weight_dist = {}, {}  # 当前样本类别分布和权重分布  0-->30%，1-->70%
        class_labels = np.unique(y_train)  # 不同的类别值
        for label in class_labels:
            target_dist[label] = len(y_train[y_train == label]) / n_samples
            weight_dist[label] = np.mean(sample_weight[y_train == label])
        cur_node.target_dist = target_dist
        cur_node.weight_dist = weight_dist
        cur_node.n_samples = n_samples

        # 递归出口判断
        if len(target_dist) <= 1:  # 所有的样本全属于同一个类别，递归出口1
            # 如果为0，则表示当前样本集合为空，递归出口3
            return
        if n_samples < self.min_sample_split:  # 当前结点所包含的样本量不足以划分
            return
        if self.max_depth is not None and cur_depth > self.max_depth:  # 树的深度达到最大深度
            return

        # 划分标准，选择最佳的划分特征及其取值
        best_idx, best_val, best_criterion_val = None, None, 0.0
        for k in range(n_features):  # 对当前样本集合中每个特征计算划分标准
            for f_val in np.unique(x_train[:, k]):  # 当前特征的不同取值
                feat_k_values = (x_train[:, k] == f_val).astype(int)  # 是当前取值f_val就是1，否则就是0
                criterion_val = self.criterion_func(feat_k_values, y_train, sample_weight)
                if criterion_val > best_criterion_val:
                    best_criterion_val = criterion_val  # 最佳的划分标准值
                    best_idx, best_val = k, f_val  # 当前最佳特征索引以及取值

        # 递归出口的判断
        if best_idx is None: # 当前属性为空，或者所有样本在所有属性上取值相同，无法划分
            return
        if best_criterion_val <= self.min_impurity_decrease:  # 小于最小不纯度阈值，不划分
            return
        cur_node.criterion_val = best_criterion_val
        cur_node.feature_idx = best_idx
        cur_node.feature_val = best_val

        # print("当前划分的特征索引：", best_idx, "取值：", best_val, "最佳标准值：", best_criterion_val)
        # print("当前结点的类别分布：", target_dist)

        # 创建左子树，并递归创建以当前结点为子树根节点的左子树
        left_idx = np.where(x_train[:, best_idx] == best_val)  # 左子树所包含的样本子集索引
        if len(left_idx) >= self.min_sample_leaf:  # 小于叶子结点所包含的最少样本量，则标记为叶子结点
            left_child_node = TreeNode_C()  # 创建左子树空结点
            # 以当前结点为子树根结点，递归创建
            cur_node.left_child_Node = left_child_node
            self._build_tree(cur_depth + 1, left_child_node, x_train[left_idx],
                             y_train[left_idx], sample_weight[left_idx])

        right_idx = np.where(x_train[:, best_idx] != best_val)  # 右子树所包含的样本子集索引
        if len(right_idx) >= self.min_sample_leaf:  # 小于叶子结点所包含的最少样本量，则标记为叶子结点
            right_child_node = TreeNode_C()  # 创建右子树空结点
            # 以当前结点为子树根结点，递归创建
            cur_node.right_child_Node = right_child_node
            self._build_tree(cur_depth + 1, right_child_node, x_train[right_idx],
                             y_train[right_idx], sample_weight[right_idx])

    def _search_tree_predict(self, cur_node: TreeNode_C, x_test):
        """
        根据测试样本从根结点到叶子结点搜索路径，判定类别
        搜索：按照后续遍历
        :param x_test: 单个测试样本
        :return:
        """
        if cur_node.left_child_Node and x_test[cur_node.feature_idx] == cur_node.feature_val:
            return self._search_tree_predict(cur_node.left_child_Node, x_test)
        elif cur_node.right_child_Node and x_test[cur_node.feature_idx] != cur_node.feature_val:
            return self._search_tree_predict(cur_node.right_child_Node, x_test)
        else:
            # 叶子结点，类别，包含有类别分布
            # print(cur_node.target_dist)
            class_p = np.zeros(len(self.class_values))  # 测试样本的类别概率
            for i, c in enumerate(self.class_values):
                class_p[i] = cur_node.target_dist.get(c, 0) * cur_node.weight_dist.get(c, 1.0)
            class_p / np.sum(class_p)  # 归一化
        return class_p

    def predict_proba(self, x_test):
        """
        预测测试样本x_test的类别概率
        :param x_test: 测试样本ndarray、numpy数值运算
        :return:
        """
        x_test = np.asarray(x_test)  # 避免传递DataFrame、list...
        if self.is_feature_all_R:
            if self.dbw.XrangeMap is not None:
                x_test = self.dbw.transform(x_test)
            else:
                raise ValueError("请先创建决策树...")
        elif self.dbw_feature_idx is not None:
            x_test = self._data_bin_wrapper(x_test)
        prob_dist = []  # 用于存储测试样本的类别概率分布
        for i in range(x_test.shape[0]):
            prob_dist.append(self._search_tree_predict(self.root_node, x_test[i]))
        return np.asarray(prob_dist)

    def predict(self, x_test):
        """
        预测测试样本的类别
        :param x_test: 测试样本
        :return:
        """
        x_test = np.asarray(x_test)  # 避免传递DataFrame、list...
        return np.argmax(self.predict_proba(x_test), axis=1)

    def _prune_node(self, cur_node: TreeNode_C, alpha):
        """
        递归剪枝，针对决策树中的内部结点，自底向上，逐个考察
        方法：后序遍历
        :param cur_node: 当前递归的决策树的内部结点
        :param alpha: 剪枝阈值
        :return:
        """
        # 若左子树存在，递归左子树进行剪枝
        if cur_node.left_child_Node:
            self._prune_node(cur_node.left_child_Node, alpha)
        # 若右子树存在，递归右子树进行剪枝
        if cur_node.right_child_Node:
            self._prune_node(cur_node.right_child_Node, alpha)

        # 针对决策树的内部结点剪枝，非叶结点
        if cur_node.left_child_Node is not None or cur_node.right_child_Node is not None:
            for child_node in [cur_node.left_child_Node, cur_node.right_child_Node]:
                if child_node is None:
                    # 可能存在左右子树之一为空的情况，当左右子树划分的样本子集数小于min_samples_leaf
                    continue
                if child_node.left_child_Node is not None or child_node.right_child_Node is not None:
                    return
            # 计算剪枝前的损失值，2表示当前结点包含两个叶子结点
            pre_prune_value = 2 * alpha
            for child_node in [cur_node.left_child_Node, cur_node.right_child_Node]:
                # 计算左右叶子结点的经验熵  
                if child_node is None:
                    # 可能存在左右子树之一为空的情况，当左右子树划分的样本子集数小于min_samples_leaf
                    continue
                for key, value in child_node.target_dist.items():  # 对每个叶子结点的类别分布
                    pre_prune_value += -1 * child_node.n_samples * value * np.log(value) * \
                        child_node.weight_dist.get(key, 1.0)
            # 计算剪枝后的损失值，当前结点即是叶子结点
            after_prune_value = alpha
            for key, value in cur_node.target_dist.items():  # 当前待剪枝的结点的类别分布
                after_prune_value += -1 * cur_node.n_samples * value * np.log(value) * \
                                   cur_node.weight_dist.get(key, 1.0)
            if after_prune_value <= pre_prune_value:  # 进行剪枝操作
                cur_node.left_child_Node = None
                cur_node.right_child_Node = None
                cur_node.feature_idx, cur_node.feature_val = None, None

    def prune(self, alpha=0.01):
        """
        决策树后剪枝算法（李航）C(T) + alpha * |T|
        :param alpha: 剪枝阈值，权衡模型对训练数据的拟合程度与模型的复杂度
        :return:
        """
        self._prune_node(self.root_node, alpha)
        return self.root_node

七、分类决策树算法的测试

test_decision_tree_C.py

import pandas as pd
from decision_tree_C import DecisionTreeClassifier
from sklearn.datasets import load_iris, load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder


# data = pd.read_csv("data/watermelon.csv").iloc[:, 1:]
# X = data.iloc[:, :-1]
# y = data.iloc[:, -1]

# iris = load_iris()
# X, y = iris.data, iris.target

# bc_data = load_breast_cancer()
# X, y = bc_data.data, bc_data.target

nursery = pd.read_csv("data/nursery.csv").dropna()
X, y = np.asarray(nursery.iloc[:, :-1]), np.asarray(nursery.iloc[:, -1])

y = LabelEncoder().fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, stratify=y)

depth = np.linspace(2, 12, 11, dtype=np.int64)
accuracy = []

for d in depth:
    dtc = DecisionTreeClassifier(is_feature_all_R=False, max_depth=d)
    dtc.fit(X_train, y_train)
    y_pred_labels = dtc.predict(X_test)
    acc = accuracy_score(y_test, y_pred_labels)
    # print(acc)
    accuracy.append(acc)
# dtc = DecisionTreeClassifier(dbw_feature_idx=[6, 7], max_bins=8, max_depth=2)
# dtc.fit(X, y)
# y_pred_prob = dtc.predict_proba(X)
# print(y_pred_prob)

# print(classification_report(y_test, y_pred_labels))

plt.figure(figsize=(7, 5))
plt.plot(depth, accuracy, "ko-", lw=1)
plt.show()

test_decision_tree_C_2.py

import numpy as np
import matplotlib.pyplot as plt
from decision_tree_C import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.metrics import classification_report, accuracy_score
from utils.plt_decision_function import plot_decision_function


# 生成数据
data, target = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=1, n_redundant=0,
                                   n_clusters_per_class=1, class_sep=0.8, random_state=21)
# print(data)
# print(target)

cart_tree = DecisionTreeClassifier(is_feature_all_R=True)
cart_tree.fit(data, target)
y_test_pred = cart_tree.predict(data)
print(classification_report(target, y_test_pred))
plt.figure(figsize=(14, 10))
plt.subplot(221)
acc = accuracy_score(target, y_test_pred)
plot_decision_function(data, target, cart_tree, acc=acc, is_show=False, title_info="By CART UnPrune")

# 剪枝处理
alpha = [1, 3, 5]
for i in range(3):
    cart_tree.prune(alpha=alpha[i])
    y_test_pred = cart_tree.predict(data)
    acc = accuracy_score(target, y_test_pred)
    plt.subplot(222 + i)
    plot_decision_function(data, target, cart_tree, acc=acc, is_show=False,
                           title_info="By CART Prune α = %.1f" % alpha[i])
plt.tight_layout()
plt.show()

test_decision_tree_C_3.py

import copy

import numpy as np
import matplotlib.pyplot as plt
from decision_tree_C import DecisionTreeClassifier
from sklearn.datasets import load_breast_cancer, load_iris
from sklearn.metrics import classification_report, accuracy_score
from utils.plt_decision_function import plot_decision_function
from sklearn.model_selection import StratifiedKFold


bc_data = load_breast_cancer()
X, y = bc_data.data, bc_data.target
alphas = np.linspace(0, 10, 30)
accuracy_scores = []  # 存储每个alpha阈值下的交叉验证均分
cart = DecisionTreeClassifier(criterion="cart", is_feature_all_R=True, max_bins=10)
for alpha in alphas:
    scores = []
    k_fold = StratifiedKFold(n_splits=10).split(X, y)
    for train_idx, test_idx in k_fold:
        tree = copy.deepcopy(cart)
        tree.fit(X[train_idx], y[train_idx])
        tree.prune(alpha=alpha)
        y_test_pred = tree.predict(X[test_idx])
        scores.append(accuracy_score(y[test_idx], y_test_pred))
        del tree
    print(alpha, ":", np.mean(scores))
    accuracy_scores.append(np.mean(scores))

plt.figure(figsize=(7, 5))
plt.plot(alphas, accuracy_scores, "ko-", lw=1)
plt.grid(ls=":")
plt.xlabel("Alpha", fontdict={"fontsize": 12})
plt.ylabel("Accuracy Scores", fontdict={"fontsize": 12})
plt.title("Cross Validation Scores under different Prune Alpha", fontdict={"fontsize": 14})
plt.show()

你可能感兴趣的:(python,机器学习,决策树,笔记)

Python, Rust 开发机关事业单位公文写作助手APP
#机关事业单位公文写作助手应用设计基于Python和Rust开发机关事业单位公文写作APP的方案如下：##系统架构设计```桌面端/Web应用(Tauri/Vue.js)|RESTAPI/WebSocket|Rust核心服务(Actix-web/Axum)←───PythonNLP服务(FastAPI)|公文要素提取/模板生成/智能校对PostgreSQL数据库|Redis缓存(模板缓存/会话管理
[Pytest][Part 3]检测python package状态思则变 pytest pytest
目录实现需求1：检查pythonpackage状态——pkg_resourcehook实现自动检测包状态conftest.pyhook钩子函数Part1:https://blog.csdn.net/x1987200567/article/details/144915315?spm=1001.2014.3001.5501从这里开始逐个实现Part1中的需求实现需求1：测试开始前检查测试需要的pyth
PYTHON从入门到实践11-控制台实现商品管理系统
**********************************************************************输入序号代表你要做的操作：1.查询所有商品2.增加商品3.修改商品4.删除商品5.退出系统2请输入增加商品信息：XXXXXX(示例：香蕉5元)鸡蛋1元恭喜，添加新商品成功*********************************************
基于python+django+vue.js开发的停车管理系统源码+运行
功能介绍平台采用B/S结构，后端采用主流的Python语言进行开发，前端采用主流的Vue.js进行开发。技术学习共同进步功能包括：车位管理、会员管理、停车场管理、违规管理、用户管理、日志管理、系统信息模块。源码地址https://github.com/geeeeeeeek/python_parking演示地址http://parking.gitapp.cn演示帐号：用户名：admin123密码：a
基于python+django+vue.js开发的学生成绩管理系统西门吹雪1998 毕业设计合集 python django vue.js
功能介绍平台采用B/S结构，后端采用主流的Python语言进行开发，前端采用主流的Vue.js进行开发。功能包括：成绩管理、学生管理、课程管理、班级管理、用户管理、日志管理、系统信息模块。源码地址https://github.com/geeeeeeeek/python_score演示地址http://score.gitapp.cn演示帐号：用户名：admin123密码：admin123主要使用技术
【课程设计】基于python+django+vue.js开发的健身房管理系统
功能介绍平台采用B/S结构，后端采用主流的Python语言进行开发，前端采用主流的Vue.js进行开发。给师妹的课程作业。功能包括：教练管理、会员管理、场地管理、设备管理、用户管理、日志管理、系统信息模块。源码地址https://github.com/geeeeeeeek/python_fitness演示地址http://fitness.gitapp.cn演示帐号：用户名：admin123密码：a
基于python+django+vue.js开发的健身房管理系统源码+运行
功能介绍平台采用B/S结构，后端采用主流的Python语言进行开发，前端采用主流的Vue.js进行开发。技术学习共同进步功能包括：教练管理、会员管理、场地管理、设备管理、用户管理、日志管理、系统信息模块。源码地址https://github.com/geeeeeeeek/python_fitness演示地址http://fitness.gitapp.cn演示帐号：用户名：admin123密码：ad
【Python小工具】使用 OpenCV 获取视频时长的详细指南
【Python小工具】使用OpenCV获取视频时长的详细指南在处理视频数据时，获取视频的时长是一项常见且基础的需求。无论是进行视频分析、编辑，还是在视频处理项目中进行预处理，了解视频的时长都是不可或缺的一步。在Python中，借助强大的OpenCV库，我们可以轻松实现这一功能。本文将详细介绍如何使用Python和OpenCV获取视频时长，并对每一行代码进行深入解析。一、代码实现importcv2d
22. Java JUC源码分析系列笔记-JDK1.8的ConcurrentHashMap Thinker QAQ Java JUC源码分析 java 笔记开发语言
文章目录1.是什么2.如何使用3.原理分析3.1.构造方法3.1.1.Node3.2.put方法【有加锁】3.2.1.计算key的hash3.2.2.死循环3.2.3.第一次进来table为空，所以需要初始化table3.2.3.1.使用CAS加锁防止多线程同时初始化table3.2.3.2.其他线程让出CPU直到扩容完毕3.2.4.第二次进来table不为空，链表肯定为空【头节点为空】，那么CA
Finnhub Python API 客户端项目常见问题解决方案柏滢凝Wayne
FinnhubPythonAPI客户端项目常见问题解决方案finnhub-pythonFinnhubPythonAPIClient.FinnhubAPIprovidesinstitutional-gradefinancialdatatoinvestors,fintechstartupsandinvestmentfirms.Wesupportreal-timestockprice,globalfun
【大模型面试】大模型Prompt Engineer面试题及参考答案大模型知识 prompt 人工智能开发语言 python chatgpt 深度学习大模型
一、基础概念类1.什么是大模型？大模型通常指具有庞大参数规模的机器学习模型，尤其是在自然语言处理（NLP）和计算机视觉等领域。这些模型能够学习到大量数据中的复杂模式和特征，具备强大的泛化能力，可在多种任务上表现出色，如GPT系列、BERT等。2.大模型与传统机器学习模型的区别是什么？传统机器学习模型参数规模相对较小，往往针对特定任务进行设计和训练，需要较多人工特征工程。而大模型参数数量庞大，通过在
C#串口通信上位机笔记（modbus协议）指针刺客 c#笔记开发语言
C#串口通信上位机笔记（modbus协议）提示：这里可以添加系列文章的所有文章的目录，目录需要自己手动添加例如：第一章Python机器学习入门之pandas的使用提示：写完文章后，目录可以自动生成，如何生成可参考右边的帮助文档文章目录C#串口通信上位机笔记（modbus协议）前言一、新建工程二、使用步骤1.引入库2.串口初始化总结前言提示：这里可以添加本文要记录的大概内容：记录自己工作的上位机经验
项目管理自动化：如何用技术优化资源日历？项目管理实战手册自动化运维 ai
项目管理自动化：如何用技术优化资源日历？关键词：项目管理自动化、资源日历、智能调度、冲突检测、资源分配优化摘要：本文从项目管理中“资源日历”的核心痛点出发，结合自动化技术原理与实战案例，详细讲解如何通过技术手段（如AI算法、工具集成、低代码开发）优化资源日历管理。文章用“餐厅订座”“快递分拣”等生活案例类比技术概念，帮助读者理解复杂流程，最后通过Python代码实战演示自动化实现过程，为项目管理者
RAG实战指南 Day 4：LlamaIndex框架实战指南在未来等你 RAG实战指南 RAG LlamaIndex 检索增强生成大语言模型 AI开发
【RAG实战指南Day4】LlamaIndex框架实战指南文章标签RAG,LlamaIndex,检索增强生成,大语言模型,AI开发文章简述本文是"RAG实战指南"系列的第4天，聚焦LlamaIndex框架的核心功能与实战应用。我们将深入解析LlamaIndex在RAG系统中的定位，详细讲解其数据连接器、索引构建和查询引擎三大核心组件的工作原理。文章包含完整的Python代码实现，展示如何从零构建一
从零用java实现小红书 springboot vue uniapp （7）im 在线聊天功能关注功能顽疲 java仿写小红书 java spring boot vue.js uni-app
前言移动端演示http://8.146.211.120:8081/#/前面的文章我们主要完成了笔记的点赞和收藏及留言功能今天我们讲解点赞关注im聊天功能关注我们需要有一个关注的操作这里我们复用个人中心页面按钮会有三种形式关注取消关注互相关注三种样式取消关注回关关注关注和点赞功能实现原理大致相同只不过有一个互相关注后台先创建一个关注表CREATETABLE`business_follow`(`ID`
破译AI黑箱：如何用20行Python理解ChatGPT？ Ven% 简单入门pytorch 人工智能 python chatgpt
文章目录一、核心概念：大模型本质二、代码逐行解析（以线性回归为例）三、关键概念详解四、与大模型的本质联系五、大模型训练核心思想六、如何扩展成真实大模型七、总结：AI训练的本质一、核心概念：大模型本质大模型=复杂数学函数+数据驱动训练现实任务（如图像识别、语言翻译）过于复杂，人类无法直接编写数学函数解决。解决方案：构建参数化的数学模型（如神经网络）用大量数据训练，自动寻找最优参数得到能解决特定任务的
python for android api,python-for-android
python-for-androidpython-for-androidisapackagingtoolforPythonappsonAndroid.YoucancreateyourownPythondistributionincludingthemodulesanddependenciesyouwant,andbundleitinanAPKalongwithyourowncode.Feature
Python国内镜像地址及配置使用方式源图客工具应用 Python python 开发语言
便捷设置pip全局镜像源pipconfigsetglobal.index-urlhttps://pypi.tuna.tsinghua.edu.cn/simple一、国内镜像地址清华源：https://pypi.tuna.tsinghua.edu.cn/simple/阿里云：http://mirrors.aliyun.com/pypi/simple/清华大学：https://pypi.tuna.ts
python学习之路 - python的文件操作
目录一、python文件操作1、文件的编码a、概念b、编码分类2、文件的读取a、打开文件a、读取文件字节c、读取文件行d、for循环的读取3、文件的关闭4、文件的写入5、文件的追加6、文件操作综合a、案例一：读取字符个数b、案例二：复制文件一、python文件操作1、文件的编码a、概念计算机只能识别0和1，所以需要用编码技术将内容翻译成0和1b、编码分类UTF-8除非有特殊要求，否则一般都用此编码
Pytorch Lightning使用：【LightningModule、LightningDataModule、Trainer、ModelCheckpoint】 LeapMay pytorch lightning pytorch python 人工智能
pytorchlightning官方手册pytorchlightning官方手册Welcometo⚡PyTorchLightning—PyTorchLightning2.1.0devdocumentationhttps://lightning.ai/docs/pytorch/latest/PytorchLightning简介PyTorchLightning是面向专业AI研究人员和机器学习工程师的深
【华为OD机试真题 2025B卷】770、周末爬山 | 机试真题+思路参考+代码解析（C++、Java、Py、C语言、JS） KJ.JK OJ+最新华为OD机试 (C++Java Py C JS)华为od c++java 华为OD机试真题 2025B卷 javascript 周末爬山
文章目录一、题目题目描述输入输出样例1样例2二、代码与思路参考C++语言思路C++代码Java语言思路Java代码Python语言思路Python代码C语言思路C代码JS语言思路JS代码作者：KJ.JK订阅本专栏后即可解锁在线OJ刷题权限专栏介绍：最新的华为OD机试题目总结，使用C++、Java、Python、C语言、JS五种语言进行解答，每个题目的思路分析都非常详细，支持在线OJ评测刷题！！！！
匹配一切学习笔记2025 AI算法网奇 python宝典计算机视觉人工智能
目录匹配一切MASAdemo:图像匹配roma匹配一切MASAMatchingAnythingBySegmentingAnything[CVPR24Highlight]git地址：https://github.com/siyuanliii/masaMethodBaseNovelmodelTETAAssocATETAAssocAOVTrack(CVPR23)35.536.927.833.6-<
orb-slam run rgbd data hetongqiyue 计算机视觉 slam
TUM数据集准备+RGB-D运行从这个网址下载tum数据集[http://vision.in.tum.de/data/datasets/rgbd-dataset/download]并且解压缩。使用python脚本关联RGB图像和深度图像[associate.py],[http://vision.in.tum.de/data/datasets/rgbd-dataset/tools].我们已经提供了一
Blender glTF 2.0 导入导出插件技术文档花锨潜Praised
BlenderglTF2.0导入导出插件技术文档glTF-Blender-IOBlenderglTF2.0importerandexporter项目地址:https://gitcode.com/gh_mirrors/gl/glTF-Blender-IO1.安装指南1.1系统要求Blender2.8及以上版本Python3.x1.2安装步骤Blender内置插件：Blender2.8及以上版本已经内
Python 开发安卓Android及IOS应用库Kivy安装尝试 dingcb168 WIFI 开关 KIVY
yeayee------>更多技巧------>更多源码------>www.yeayee.com转载：https://www.cnblogs.com/yeayee/p/5420858.htmlPython开发安卓Android及IOS应用库Kivy安装尝试Python开发安卓Android及IOS应用库Kivy安装尝试：先来看看这货可以用来制作什么应用：CreateapackageforWind
Python数据解析与图片下载工具：从JSON到本地文件的自动化流程龙潜月七 python json 自动化
Python数据解析与图片下载工具：从JSON到本地文件的自动化流程在日常开发和数据处理中，我们经常需要从JSON数据中提取信息，并根据其中的URL下载相关资源。本文将介绍一个实用的Python工具，它可以解析JSON数据，提取关键信息，并批量下载图片资源，适用于数据采集、内容管理和自动化测试等场景。一、工具功能概述这个多功能工具包含三个核心模块：JSON数据解析：从输入的JSON字符串中提取指定
Python时间管理工具：实现定时任务的时间点循环更新龙潜月七 python java 服务器
Python时间管理工具：实现定时任务的时间点循环更新在自动化任务、定时推送等场景中，我们常常需要按照预设的时间点循环执行操作。本文将介绍一个实用的Python工具，它可以根据预设的时间点列表自动计算下一个执行时间，并将其保存到文件中，适用于定时任务调度、内容发布等场景。一、工具功能与应用场景这个时间管理工具的核心功能是：维护一个预设时间点列表（如每天6:00、12:00、18:00、22:00）
Termux备份与恢复龙潜月七 python
转载自：https://bbs.zsxwz.com/thread-3473.htm好不容易才安装了各种软件，环境等等，因此做一个备份有时候还是必要的，以免一时手贱。备份方法很多，比如dd，rsync，restic，比较简单的就是使用tar打包。（主要是因为学生党，没流量每次下载400m的python了）爬虫在手，天下我有。https://wiki.termux.com/wiki/Backing_u
Python Selenium搭建UI自动化测试框架测试界霄霄软件测试 python selenium ui 功能测试软件测试自动化测试程序人生
自动化测试是软件测试中非常重要的一部分，可以提高测试效率和测试覆盖率。在UI自动化测试中，Selenium是非常流行的工具。本文将介绍如何使用Python和Selenium搭建UI自动化测试框架。一、环境准备在开始搭建UI自动化测试框架之前，需要先安装Python和Selenium。可以从Python官网下载Python安装包，并使用pip命令安装Selenium。二、框架目录结构在搭建UI自动化
python-文件操作 qq_512720272 python python 开发语言
1文件操作1.1文件打开与关闭1.1.1打开文件-open#打开文件（默认为只读模式）file_path='example.txt'withopen(file_path,'r')asfile:#执行文件操作，例如读取文件内容file_content=file.read()print(file_content)#文件在with块结束后会自动关闭，无需显式关闭文件在上述示例中：'example.txt
Js函数返回值 _wy_ js return
一、返回控制与函数结果，语法为：return 表达式;作用: 结束函数执行，返回调用函数，而且把表达式的值作为函数的结果二、返回控制语法为：return;作用: 结束函数执行，返回调用函数，而且把undefined作为函数的结果在大多数情况下,为事件处理函数返回false,可以防止默认的事件行为.例如,默认情况下点击一个<a>元素,页面会跳转到该元素href属性
MySQL 的 char 与 varchar bylijinnan mysql
今天发现，create table 时，MySQL 4.1有时会把 char 自动转换成 varchar 测试举例： CREATE TABLE `varcharLessThan4` ( `lastName` varchar(3) ) ; mysql> desc varcharLessThan4; +----------+---------+------+-
Quartz——TriggerListener和JobListener eksliang TriggerListener JobListener quartz
转载请出自出处：http://eksliang.iteye.com/blog/2208624 一.概述 listener是一个监听器对象，用于监听scheduler中发生的事件，然后执行相应的操作；你可能已经猜到了，TriggerListeners接受与trigger相关的事件，JobListeners接受与jobs相关的事件。二.JobListener监听器 j
oracle层次查询 18289753290 oracle；层次查询；树查询
.oracle层次查询(connect by) oracle的emp表中包含了一列mgr指出谁是雇员的经理，由于经理也是雇员，所以经理的信息也存储在emp表中。这样emp表就是一个自引用表，表中的mgr列是一个自引用列，它指向emp表中的empno列，mgr表示一个员工的管理者， select empno,mgr,ename,sal from e
通过反射把map中的属性赋值到实体类bean对象中酷的飞上天空 javaee 泛型类型转换
使用过struts2后感觉最方便的就是这个框架能自动把表单的参数赋值到action里面的对象中但现在主要使用Spring框架的MVC，虽然也有@ModelAttribute可以使用但是明显感觉不方便。好吧，那就自己再造一个轮子吧。原理都知道，就是利用反射进行字段的赋值，下面贴代码主要类如下： import java.lang.reflect.Field; imp
SAP HANA数据存储：传统硬盘的瓶颈问题蓝儿唯美 HANA
SAPHANA平台有各种各样的应用场景，这也意味着客户的实施方法有许多种选择，关键是如何挑选最适合他们需求的实施方案。在《Implementing SAP HANA》这本书中，介绍了SAP平台在现实场景中的运作原理，并给出了实施建议和成功案例供参考。本系列文章节选自《Implementing SAP HANA》，介绍了行存储和列存储的各自特点，以及SAP HANA的数据存储方式如何提升空间压
Java Socket 多线程实现文件传输随便小屋 java socket
高级操作系统作业，让用Socket实现文件传输，有些代码也是在网上找的，写的不好，如果大家能用就用上。客户端类： package edu.logic.client; import java.io.BufferedInputStream; import java.io.Buffered
java初学者路径 aijuans java
学习Java有没有什么捷径?要想学好Java，首先要知道Java的大致分类。自从Sun推出Java以来，就力图使之无所不包，所以Java发展到现在，按应用来分主要分为三大块：J2SE,J2ME和J2EE,这也就是Sun ONE(Open Net Environment)体系。J2SE就是Java2的标准版，主要用于桌面应用软件的编程；J2ME主要应用于嵌入是系统开发，如手机和PDA的编程；J2EE
APP推广 aoyouzi APP 推广
一，免费篇 1，APP推荐类网站自主推荐最美应用、酷安网、DEMO8、木蚂蚁发现频道等,如果产品独特新颖，还能获取最美应用的评测推荐。PS：推荐简单。只要产品有趣好玩，用户会自主分享传播。例如足迹APP在最美应用推荐一次，几天用户暴增将服务器击垮。 2，各大应用商店首发合作老实盯着排期，多给应用市场官方负责人献殷勤。 3，论坛贴吧推广百度知道，百度贴吧，猫扑论坛，天涯社区，豆瓣（
JSP转发与重定向百合不是茶 jsp servlet Java Web jsp转发
在servlet和jsp中我们经常需要请求,这时就需要用到转发和重定向; 转发包括;forward和include 例子;forwrad转发; 将请求装法给reg.html页面关键代码; req.getRequestDispatcher("reg.html
web.xml之jsp-config bijian1013 java web.xml servlet jsp-config
1.作用：主要用于设定JSP页面的相关配置。 2.常见定义： <jsp-config> <taglib> <taglib-uri>URI(定义TLD文件的URI,JSP页面的tablib命令可以经由此URI获取到TLD文件)</tablib-uri> <taglib-location> TLD文件所在的位置
JSF2.2 ViewScoped Using CDI sunjing CDI JSF 2.2 ViewScoped
JSF 2.0 introduced annotation @ViewScoped; A bean annotated with this scope maintained its state as long as the user stays on the same view(reloads or navigation - no intervening views). One problem w
【分布式数据一致性二】Zookeeper数据读写一致性 bit1129 zookeeper
很多文档说Zookeeper是强一致性保证，事实不然。关于一致性模型请参考http://bit1129.iteye.com/blog/2155336 Zookeeper的数据同步协议 Zookeeper采用称为Quorum Based Protocol的数据同步协议。假如Zookeeper集群有N台Zookeeper服务器(N通常取奇数，3台能够满足数据可靠性同时
Java开发笔记白糖_ java开发
1、Map<key,value>的remove方法只能识别相同类型的key值 Map<Integer,String> map = new HashMap<Integer,String>(); map.put(1,"a"); map.put(2,"b"); map.put(3,"c"
图片黑色阴影 bozch 图片
.event{ padding:0; width:460px; min-width: 460px; border:0px solid #e4e4e4; height: 350px; min-heig
编程之美-饮料供货-动态规划 bylijinnan 动态规划
import java.util.Arrays; import java.util.Random; public class BeverageSupply { /** * 编程之美饮料供货 * 设Opt（V’，i）表示从i到n-1种饮料中，总容量为V’的方案中，满意度之和的最大值。 * 那么递归式就应该是：Opt（V’，i）=max{ k * Hi+Op
ajax大参数（大数据）提交性能分析 chenbowen00 Web Ajax 框架浏览器 prototype
近期在项目中发现如下一个问题项目中有个提交现场事件的功能，该功能主要是在web客户端保存现场数据（主要有截屏，终端日志等信息）然后提交到服务器上方便我们分析定位问题。客户在使用该功能的过程中反应点击提交后反应很慢，大概要等10到20秒的时间浏览器才能操作，期间页面不响应事件。根据客户描述分析了下的代码流程，很简单，主要通过OCX控件截屏，在将前端的日志等文件使用OCX控件打包，在将之转换为
[宇宙与天文]在太空采矿,在太空建造 comsci
我们在太空进行工业活动...但是不太可能把太空工业产品又运回到地面上进行加工,而一般是在哪里开采,就在哪里加工,太空的微重力环境,可能会使我们的工业产品的制造尺度非常巨大.... 地球上制造的最大工业机器是超级油轮和航空母舰,再大些就会遇到困难了,但是在空间船坞中,制造的最大工业机器,可能就没
ORACLE中CONSTRAINT的四对属性 daizj oracle CONSTRAINT
ORACLE中CONSTRAINT的四对属性 summary:在data migrate时,某些表的约束总是困扰着我们,让我们的migratet举步维艰,如何利用约束本身的属性来处理这些问题呢?本文详细介绍了约束的四对属性: Deferrable/not deferrable, Deferred/immediate, enalbe/disable, validate/novalidate,以及如
Gradle入门教程 dengkane gradle
一、寻找gradle的历程一开始的时候，我们只有一个工程，所有要用到的jar包都放到工程目录下面，时间长了，工程越来越大，使用到的jar包也越来越多，难以理解jar之间的依赖关系。再后来我们把旧的工程拆分到不同的工程里，靠ide来管理工程之间的依赖关系，各工程下的jar包依赖是杂乱的。一段时间后，我们发现用ide来管理项程很不方便，比如不方便脱离ide自动构建，于是我们写自己的ant脚本。再后
C语言简单循环示例 dcj3sjt126com c
# include <stdio.h> int main(void) { int i; int count = 0; int sum = 0; float avg; for (i=1; i<=100; i++) { if (i%2==0) { count++; sum += i; } } avg
presentModalViewController 的动画效果 dcj3sjt126com controller
系统自带(四种效果)： presentModalViewController模态的动画效果设置： [cpp] view plain copy UIViewController *detailViewController = [[UIViewController al
java 二分查找 shuizhaosi888 二分查找 java二分查找
需求：在排好顺序的一串数字中，找到数字T 一般解法：从左到右扫描数据，其运行花费线性时间O(N)。然而这个算法并没有用到该表已经排序的事实。 /** * * @param array * 顺序数组 * @param t * 要查找对象 * @return */ public stati
Spring Security（07）——缓存UserDetails 234390216 ehcache 缓存 Spring Security
Spring Security提供了一个实现了可以缓存UserDetails的UserDetailsService实现类，CachingUserDetailsService。该类的构造接收一个用于真正加载UserDetails的UserDetailsService实现类。当需要加载UserDetails时，其首先会从缓存中获取，如果缓存中没
Dozer 深层次复制 jayluns VO maven po
最近在做项目上遇到了一些小问题，因为架构在做设计的时候web前段展示用到了vo层，而在后台进行与数据库层操作的时候用到的是Po层。这样在业务层返回vo到控制层，每一次都需要从po-->转化到vo层，用到BeanUtils.copyProperties(source, target)只能复制简单的属性，因为实体类都配置了hibernate那些关联关系，所以它满足不了现在的需求，但后发现还有个很
CSS规范整理（摘自懒人图库） a409435341 html UI css 浏览器
刚没事闲着在网上瞎逛，找了一篇CSS规范整理，粗略看了一下后还蛮有一定的道理，并自问是否有这样的规范，这也是初入前端开发的人一个很好的规范吧。一、文件规范 1、文件均归档至约定的目录中。具体要求通过豆瓣的CSS规范进行讲解：所有的CSS分为两大类：通用类和业务类。通用的CSS文件，放在如下目录中：基本样式库 /css/core
C++动态链接库创建与使用你不认识的休道人 C++dll
一、创建动态链接库 1.新建工程test中选择”MFC [dll]”dll类型选择第二项"Regular DLL With MFC shared linked"，完成 2.在test.h中添加 extern “C” 返回类型 _declspec(dllexport)函数名(参数列表); 3.在test.cpp中最后写 extern “C” 返回类型 _decls
Android代码混淆之ProGuard rensanning ProGuard
Android应用的Java代码，通过反编译apk文件（dex2jar、apktool）很容易得到源代码，所以在release版本的apk中一定要混淆一下一些关键的Java源码。 ProGuard是一个开源的Java代码混淆器（obfuscation）。ADT r8开始它被默认集成到了Android SDK中。官网： http://proguard.sourceforge.net/
程序员在编程中遇到的奇葩弱智问题 tomcat_oracle jquery 编程 ide
　　现在收集一下：　　排名不分先后，按照发言顺序来的。 1、Jquery插件一个通用函数一直报错，尤其是很明显是存在的函数，很有可能就是你没有引入jquery。。。或者版本不对 2、调试半天没变化：不在同一个文件中调试。这个很可怕，我们很多时候会备份好几个项目，改完发现改错了。有个群友说的好：在汤匙
解决maven-dependency-plugin (goals "copy-dependencies","unpack") is not supported xp9802 dependency
解决办法：在plugins之前添加如下pluginManagement，二者前后顺序如下： [html] view plain copy <build> <pluginManagement