将数据集划分为训练子集和验证子集

from sklearn.cross_validation import train_test_split
# 注:若该行报错,可以写:
# from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(
X_train, y_train, test_size=0.2, random_state=2275)       # random_state为随机数种子

print("train dataset", X_train.shape, y_train.shape)
print("Validation dataset", X_valid.shape, y_valid.shape)

你可能感兴趣的:(机器学习实战技巧,机器学习)