# -*- coding: utf-8 -*-
"""
Created on Mon Dec 10 09:32:55 2018
@author: muli
"""
from sklearn.model_selection import train_test_split,KFold,StratifiedKFold,\
LeaveOneOut,cross_val_score
import numpy as np
def test_train_test_split():
'''
测试 train_test_split 的用法
:return: None
'''
X=[[1,2,3,4],
[11,12,13,14],
[21,22,23,24],
[31,32,33,34],
[41,42,43,44],
[51,52,53,54],
[61,62,63,64],
[71,72,73,74]]
y=[1,1,0,0,1,1,0,0]
# 切分,测试集大小为原始数据集大小的 40%
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4, random_state=0)
print("X_train=",X_train)
print("X_test=",X_test)
print("y_train=",y_train)
print("y_test=",y_test)
print("----------------")
# 分层采样切分,测试集大小为原始数据集大小的 40%
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4,
random_state=0,stratify=y)
print("Stratify:X_train=",X_train)
print("Stratify:X_test=",X_test)
print("Stratify:y_train=",y_train)
print("Stratify:y_test=",y_test)
def test_KFold():
'''
测试 KFold 的用法
:return: None
'''
X=np.array([[1,2,3,4],
[11,12,13,14],
[21,22,23,24],
[31,32,33,34],
[41,42,43,44],
[51,52,53,54],
[61,62,63,64],
[71,72,73,74],
[81,82,83,84]])
y=np.array([1,1,0,0,1,1,0,0,1])
folder=KFold(n_splits=3,random_state=0,shuffle=False) # 切分之前不混洗数据集
for train_index,test_index in folder.split(X,y):
print("Train Index:",train_index)
print("Test Index:",test_index)
print("X_train:",X[train_index])
print("X_test:",X[test_index])
print("")
print("-----------------------")
print("$$$$$$$$$$$$$$$$$$$$$$$$$")
shuffle_folder=KFold(n_splits=3,random_state=0,shuffle=True) # 切分之前混洗数据集
for train_index,test_index in shuffle_folder.split(X,y):
print("Shuffled Train Index:",train_index)
print("Shuffled Test Index:",test_index)
print("Shuffled X_train:",X[train_index])
print("Shuffled X_test:",X[test_index])
print("")
print("***************************")
def test_StratifiedKFold():
'''
测试 StratifiedKFold 的用法
:return: None
'''
X=np.array([[1,2,3,4],
[11,12,13,14],
[21,22,23,24],
[31,32,33,34],
[41,42,43,44],
[51,52,53,54],
[61,62,63,64],
[71,72,73,74]])
y=np.array([1,1,0,0,1,1,0,0])
folder=KFold(n_splits=4,random_state=0,shuffle=False)
stratified_folder=StratifiedKFold(n_splits=4,random_state=0,shuffle=False)
for train_index,test_index in folder.split(X,y):
print("Train Index:",train_index)
print("Test Index:",test_index)
print("y_train:",y[train_index])
print("y_test:",y[test_index])
print("")
print("***************************")
print("$$$$$$$$$$$$$$$$$$$$$$$$$")
for train_index,test_index in stratified_folder.split(X,y):
print("Stratified Train Index:",train_index)
print("Stratified Test Index:",test_index)
print("Stratified y_train:",y[train_index])
print("Stratified y_test:",y[test_index])
print("")
print("-----------------------")
def test_LeaveOneOut():
'''
测试 LeaveOneOut 的用法
:return: None
'''
X=np.array([[1,2,3,4],
[11,12,13,14],
[21,22,23,24],
[31,32,33,34]]
)
y=np.array([1,1,0,0])
print(np.shape(y))
print(len(y))
# 已弃用
# lo=LeaveOneOut(len(y))
# TypeError: __init__() takes 1 positional argument but 2 were given
lo=LeaveOneOut().split(X)
for train_index,test_index in lo:
print("Train Index:",train_index)
print("Test Index:",test_index)
print("X_train:",X[train_index])
print("X_test:",X[test_index])
print("")
def test_cross_val_score():
'''
测试 cross_val_score 的用法
:return: None
'''
from sklearn.datasets import load_digits
from sklearn.svm import LinearSVC
digits=load_digits() # 加载用于分类问题的数据集
X=digits.data
y=digits.target
result=cross_val_score(LinearSVC(),X,y,cv=10) # 使用 LinearSVC 作为分类器
print("Cross Val Score is:",result)
if __name__=='__main__':
# test_train_test_split() # 调用 test_train_test_split
# test_KFold()# 调用 test_KFold
# test_StratifiedKFold()# 调用 test_StratifiedKFold
# test_LeaveOneOut()# 调用 test_LeaveOneOut
test_cross_val_score()# 调用 test_cross_val_score