KFold通过提供index来给你确定不同组的训练集以及测试的index,来构造交叉验证数据集。
参数(n, n_folds=3, shuffle=False, random_state=None)
n为总数
n_folds为分为多少个交叉验证集
shuffle为是否随机
random_state设置随机因子
from sklearn.cross_validation import KFold
import numpy as np
X = np.arange(24).reshape(12,2)
X
array([[ 0, 1],
[ 2, 3],
[ 4, 5],
[ 6, 7],
[ 8, 9],
[10, 11],
[12, 13],
[14, 15],
[16, 17],
[18, 19],
[20, 21],
[22, 23]])
1.shuffle=False
kf = KFold(12,n_folds=5,shuffle=False)
for i,(train_index,test_index) in enumerate(kf):
print(i,train_index,test_index)
0 [ 3 4 5 6 7 8 9 10 11] [0 1 2]
1 [ 0 1 2 6 7 8 9 10 11] [3 4 5]
2 [ 0 1 2 3 4 5 8 9 10 11] [6 7]
3 [ 0 1 2 3 4 5 6 7 10 11] [8 9]
4 [0 1 2 3 4 5 6 7 8 9] [10 11]
kf = KFold(12,n_folds=5,shuffle=False)
for i,(train_index,test_index) in enumerate(kf):
print(i,train_index,test_index)
0 [ 3 4 5 6 7 8 9 10 11] [0 1 2]
1 [ 0 1 2 6 7 8 9 10 11] [3 4 5]
2 [ 0 1 2 3 4 5 8 9 10 11] [6 7]
3 [ 0 1 2 3 4 5 6 7 10 11] [8 9]
4 [0 1 2 3 4 5 6 7 8 9] [10 11]
2.shuffle=True,俩次不同了
kf = KFold(12,n_folds=5,shuffle=True)
for i,(train_index,test_index) in enumerate(kf):
print(i,train_index,test_index)
0 [ 0 1 2 3 4 6 7 8 11] [ 5 9 10]
1 [ 0 2 3 4 5 8 9 10 11] [1 6 7]
2 [ 0 1 2 3 4 5 6 7 9 10] [ 8 11]
3 [ 0 1 2 5 6 7 8 9 10 11] [3 4]
4 [ 1 3 4 5 6 7 8 9 10 11] [0 2]
kf = KFold(12,n_folds=5,shuffle=True)
for i,(train_index,test_index) in enumerate(kf):
print(i,train_index,test_index)
0 [ 0 3 4 6 7 8 9 10 11] [1 2 5]
1 [ 1 2 5 6 7 8 9 10 11] [0 3 4]
2 [ 0 1 2 3 4 5 8 9 10 11] [6 7]
3 [ 0 1 2 3 4 5 6 7 8 10] [ 9 11]
4 [ 0 1 2 3 4 5 6 7 9 11] [ 8 10]
3.shuffle=True,random_state赋值,俩次又相同了
kf = KFold(12, n_folds=5, shuffle=True, random_state=5)
for i,(train_index,test_index) in enumerate(kf):
print(i,train_index,test_index)
0 [ 0 1 3 4 6 8 9 10 11] [2 5 7]
1 [ 0 1 2 3 5 6 7 8 10] [ 4 9 11]
2 [ 0 2 3 4 5 6 7 9 10 11] [1 8]
3 [ 1 2 3 4 5 6 7 8 9 11] [ 0 10]
4 [ 0 1 2 4 5 7 8 9 10 11] [3 6]
kf = KFold(12, n_folds=5, shuffle=True, random_state=5)
for i,(train_index,test_index) in enumerate(kf):
print(i,train_index,test_index)
0 [ 0 1 3 4 6 8 9 10 11] [2 5 7]
1 [ 0 1 2 3 5 6 7 8 10] [ 4 9 11]
2 [ 0 2 3 4 5 6 7 9 10 11] [1 8]
3 [ 1 2 3 4 5 6 7 8 9 11] [ 0 10]
4 [ 0 1 2 4 5 7 8 9 10 11] [3 6]