数据预处理—打乱训练数据顺序

改变二维数组的一维顺序

import numpy as np
a = np.array([[1,2,3],[4,5,6],[7,8,9]])
lable = np.arange(3)
permutation = np.random.permutation(lable.shape[0])
data = a[permutation,:]
或读取文件时定义函数 :
def open_data(fname, ratio_train):
    """Input:
    direc: location of the UCR archive
    ratio_train: ratio to split training and testset
    dataset: name of the dataset in the UCR archive"""
    # (390,177)
    data_train = np.loadtxt(fname+'/'+fname+'_TRAIN', delimiter=',')
    # (391,177)
    data_test_val = np.loadtxt(fname+'/'+fname+'_TEST', delimiter=',')
    # data shape=(781,177)
    data = np.concatenate((data_train, data_test_val), axis=0)
    N, D = data.shape
    ind_cut = int(ratio_train * N)
    ind = np.random.permutation(N) # ind从0到N-1的乱序数据
    # 返回ind_cut前的数据做作为训练集,ind_cut后的数据作为测试集
    return data[ind[:ind_cut], 1:], data[ind[ind_cut:], 1:], data[ind[:ind_cut], 0], data[ind[ind_cut:], 0]
X_train, X_val, y_train, y_val = open_data(direc,ratio_train)



你可能感兴趣的:(data,preprocessing)