imblearn

文章目录

  • 1. Under-sampling
  • 2. Over-sampling
  • 3. Combination
  • 4. Ensemble Learning




1. Under-sampling

from sklearn.datasets import make_classification

from imblearn.under_sampling import RandomUnderSampler
from imblearn.under_sampling import TomekLinks
from imblearn.under_sampling import OneSidedSelection
from imblearn.under_sampling import InstanceHardnessThreshold
from imblearn.under_sampling import CondensedNearestNeighbour
from imblearn.under_sampling import EditedNearestNeighbours
from imblearn.under_sampling import RepeatedEditedNearestNeighbours
from imblearn.under_sampling import AllKNN
from imblearn.under_sampling import NearMiss
from imblearn.under_sampling import NeighbourhoodCleaningRule

X, y = make_classification(n_samples=500, n_features=20, n_classes=2, weights=[0.1, 0.9])

# RUS
us = RandomUnderSampler()
X_resampled, y_resampled = us.fit_sample(X, y)

# TomekLinks
us = TomekLinks()
X_resampled, y_resampled = us.fit_sample(X, y)

# OneSidedSelection
us = OneSidedSelection()
X_resampled, y_resampled = us.fit_sample(X, y)

# InstanceHardnessThreshold
us = InstanceHardnessThreshold(cv=5)
X_resampled, y_resampled = us.fit_sample(X, y)

# CondensedNearestNeighbour
us = CondensedNearestNeighbour()
X_resampled, y_resampled = us.fit_sample(X, y)

# EditedNearestNeighbours
us = EditedNearestNeighbours(n_neighbors=3)
X_resampled, y_resampled = us.fit_sample(X, y)

# RepeatedEditedNearestNeighbours
us = RepeatedEditedNearestNeighbours(n_neighbors=3)
X_resampled, y_resampled = us.fit_sample(X, y)

# AllKNN
us = AllKNN(n_neighbors=3)
X_resampled, y_resampled = us.fit_sample(X, y)

# NearMiss
us = NearMiss(n_neighbors=3)
X_resampled, y_resampled = us.fit_sample(X, y)

# NeighbourhoodCleaningRule
us = NeighbourhoodCleaningRule(n_neighbors=3)
X_resampled, y_resampled = us.fit_sample(X, y)




2. Over-sampling

from sklearn.datasets import make_classification

from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import BorderlineSMOTE
from imblearn.over_sampling import KMeansSMOTE
from imblearn.over_sampling import SVMSMOTE
from imblearn.over_sampling import ADASYN

X, y = make_classification(n_samples=500, n_features=20, n_classes=2, weights=[0.1, 0.9])

# ROS
os = RandomOverSampler()
X_resampled, y_resampled = os.fit_sample(X, y)

# SMOTE
os = SMOTE()
X_resampled, y_resampled = os.fit_sample(X, y)

# BorderlineSMOTE
os = BorderlineSMOTE()
X_resampled, y_resampled = os.fit_sample(X, y)

# KMeansSMOTE
os = KMeansSMOTE(k_neighbors=3)
X_resampled, y_resampled = os.fit_sample(X, y)

# SVMSMOTE
os = SVMSMOTE()
X_resampled, y_resampled = os.fit_sample(X, y)

# ADASYN
os = ADASYN()
X_resampled, y_resampled = os.fit_sample(X, y)




3. Combination

from sklearn.datasets import make_classification

from imblearn.combine import SMOTEENN
from imblearn.combine import SMOTETomek

X, y = make_classification(n_samples=500, n_features=20, n_classes=2, weights=[0.1, 0.9])

# SMOTE+ENN
cs = SMOTEENN()
X_resampled, y_resampled = cs.fit_sample(X, y)

# SMOTE+Tomek
cs = SMOTETomek()
X_resampled, y_resampled = cs.fit_sample(X, y)




4. Ensemble Learning

from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression

from imblearn.ensemble import BalancedBaggingClassifier
from imblearn.ensemble import BalancedRandomForestClassifier
from imblearn.ensemble import EasyEnsembleClassifier
from imblearn.ensemble import RUSBoostClassifier

X, y = make_classification(n_samples=500, n_features=20, n_classes=2, weights=[0.1, 0.9])

# BalancedBaggingClassifier
model = BalancedBaggingClassifier(LogisticRegression(max_iter=5000))
model.fit(X, y)
print(model.score(X, y))

# BalancedRandomForestClassifier
model = BalancedRandomForestClassifier()
model.fit(X, y)
print(model.score(X, y))

# EasyEnsembleClassifier
model = EasyEnsembleClassifier()
model.fit(X, y)
print(model.score(X, y))

# RUSBoostClassifier
model = RUSBoostClassifier(LogisticRegression(max_iter=5000))
model.fit(X, y)
print(model.score(X, y))

你可能感兴趣的:(Python)