留出法(简单交叉验证)

from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import ShuffleSplit
from collections import Counter
from sklearn.datasets import load_iris


def test01():

    # 1. 加载数据集
    x, y = load_iris(return_X_y=True)
    print('原始类别比例:', Counter(y))

    # 2. 留出法(随机分割)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
    print('随机类别分割:', Counter(y_train), Counter(y_test))

    # 3. 留出法(分层分割)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y)
    print('分层类别分割:', Counter(y_train), Counter(y_test))


def test02():

    # 1. 加载数据集
    x, y = load_iris(return_X_y=True)
    print('原始类别比例:', Counter(y))
    print('*' * 40)

    # 2. 多次划分(随机分割)
    spliter = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
    for train, test in spliter.split(x, y):
        print('随机多次分割:', Counter(y[test]))

    print('*' * 40)

    # 3. 多次划分(分层分割)
    spliter = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
    for train, test in spliter.split(x, y):
        print('分层多次分割:', Counter(y[test]))


if __name__ == '__main__':
    test01()
    test02()

你可能感兴趣的:(机器学习,算法,人工智能,sklearn,近邻算法)