python数据样本不平衡处理

# 将数据拆分为训练集和测试集

X_train,X_test,y_train,y_test = model_selection.train_test_split(X,y,test_size = 0.3, random_state = 1234)

# 导入第三方包

from imblearn.over_sampling import SMOTE

# 运用SMOTE算法实现训练数据集的平衡

over_samples = SMOTE(random_state=0)

over_samples_X,over_samples_y = over_samples.fit_sample(X_train, y_train)

#over_samples_X, over_samples_y = over_samples.fit_sample(X_train.values,y_train.values.ravel())

# 重抽样前的类别比例

print(y_train.value_counts()/len(y_train))

# 重抽样后的类别比例

print(pd.Series(over_samples_y).value_counts()/len(over_samples_y))

你可能感兴趣的:(python数据样本不平衡处理)