文章目录
- 1. Under-sampling
- 2. Over-sampling
- 3. Combination
- 4. Ensemble Learning
1. Under-sampling
from sklearn.datasets import make_classification
from imblearn.under_sampling import RandomUnderSampler
from imblearn.under_sampling import TomekLinks
from imblearn.under_sampling import OneSidedSelection
from imblearn.under_sampling import InstanceHardnessThreshold
from imblearn.under_sampling import CondensedNearestNeighbour
from imblearn.under_sampling import EditedNearestNeighbours
from imblearn.under_sampling import RepeatedEditedNearestNeighbours
from imblearn.under_sampling import AllKNN
from imblearn.under_sampling import NearMiss
from imblearn.under_sampling import NeighbourhoodCleaningRule
X, y = make_classification(n_samples=500, n_features=20, n_classes=2, weights=[0.1, 0.9])
us = RandomUnderSampler()
X_resampled, y_resampled = us.fit_sample(X, y)
us = TomekLinks()
X_resampled, y_resampled = us.fit_sample(X, y)
us = OneSidedSelection()
X_resampled, y_resampled = us.fit_sample(X, y)
us = InstanceHardnessThreshold(cv=5)
X_resampled, y_resampled = us.fit_sample(X, y)
us = CondensedNearestNeighbour()
X_resampled, y_resampled = us.fit_sample(X, y)
us = EditedNearestNeighbours(n_neighbors=3)
X_resampled, y_resampled = us.fit_sample(X, y)
us = RepeatedEditedNearestNeighbours(n_neighbors=3)
X_resampled, y_resampled = us.fit_sample(X, y)
us = AllKNN(n_neighbors=3)
X_resampled, y_resampled = us.fit_sample(X, y)
us = NearMiss(n_neighbors=3)
X_resampled, y_resampled = us.fit_sample(X, y)
us = NeighbourhoodCleaningRule(n_neighbors=3)
X_resampled, y_resampled = us.fit_sample(X, y)
2. Over-sampling
from sklearn.datasets import make_classification
from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import BorderlineSMOTE
from imblearn.over_sampling import KMeansSMOTE
from imblearn.over_sampling import SVMSMOTE
from imblearn.over_sampling import ADASYN
X, y = make_classification(n_samples=500, n_features=20, n_classes=2, weights=[0.1, 0.9])
os = RandomOverSampler()
X_resampled, y_resampled = os.fit_sample(X, y)
os = SMOTE()
X_resampled, y_resampled = os.fit_sample(X, y)
os = BorderlineSMOTE()
X_resampled, y_resampled = os.fit_sample(X, y)
os = KMeansSMOTE(k_neighbors=3)
X_resampled, y_resampled = os.fit_sample(X, y)
os = SVMSMOTE()
X_resampled, y_resampled = os.fit_sample(X, y)
os = ADASYN()
X_resampled, y_resampled = os.fit_sample(X, y)
3. Combination
from sklearn.datasets import make_classification
from imblearn.combine import SMOTEENN
from imblearn.combine import SMOTETomek
X, y = make_classification(n_samples=500, n_features=20, n_classes=2, weights=[0.1, 0.9])
cs = SMOTEENN()
X_resampled, y_resampled = cs.fit_sample(X, y)
cs = SMOTETomek()
X_resampled, y_resampled = cs.fit_sample(X, y)
4. Ensemble Learning
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from imblearn.ensemble import BalancedBaggingClassifier
from imblearn.ensemble import BalancedRandomForestClassifier
from imblearn.ensemble import EasyEnsembleClassifier
from imblearn.ensemble import RUSBoostClassifier
X, y = make_classification(n_samples=500, n_features=20, n_classes=2, weights=[0.1, 0.9])
model = BalancedBaggingClassifier(LogisticRegression(max_iter=5000))
model.fit(X, y)
print(model.score(X, y))
model = BalancedRandomForestClassifier()
model.fit(X, y)
print(model.score(X, y))
model = EasyEnsembleClassifier()
model.fit(X, y)
print(model.score(X, y))
model = RUSBoostClassifier(LogisticRegression(max_iter=5000))
model.fit(X, y)
print(model.score(X, y))