import pandas as pd
from sklearn.model_selection import StratifiedShuffleSplit
# 分出10%作为独立测试集
ss = StratifiedShuffleSplit(n_splits = 1,test_size = 0.1,random_state = 42)
data = pd.read_csv("F:\\PaperCode\\Mypaper_python_code\\data\\label\\delete_WSInopatch_sample.tsv", sep = "\t")
X = data.iloc[:, 0]
y = data.iloc[:, 2]
# print(type(X))
#
for train_index, test_index in ss.split(X, y):
# print("TRAIN_INDEX:", train_index, "TEST_INDEX:", test_index) # 获得索引值
X_train, X_test = X[train_index], X[test_index] # 训练集对应的值
y_train, y_test = y[train_index], y[test_index]
# print(type(X_train))
#
由于我的实验是多分类,因此在划分独立测试集时,需要按照不同分类下数据总数提取10%。