30、基于SelectFromModel和LassoCV的特征选择

30、基于SelectFromModel和LassoCV的特征选择

import matplotlib.pyplot as plt

import numpy as np

from sklearn.datasets import load_diabetes

from sklearn.feature_selection import SelectFromModel

from sklearn.linear_model import LassoCV

# 加载数据

diabetes = load_diabetes()

X = diabetes.data

y = diabetes.target

feature_names = diabetes.feature_names

print(feature_names)

# 决定重要特征

clf = LassoCV().fit(X, y)

importance = np.abs(clf.coef_)

print(importance)

# 从模型特征中选择得分最高的

idx_third = importance.argsort()[-3]

threshold = importance[idx_third] + 0.01

idx_features = (-importance).argsort()[:2]

name_features = np.array(feature_names)[idx_features]

print('Selected features: {}'.format(name_features))

sfm = SelectFromModel(clf, threshold=threshold)

sfm.fit(X, y)

X_transform = sfm.transform(X)

n_features = sfm.transform(X).shape[1]

# 绘制两个最重要的特征

plt.title(

    "基于SelectFromModel和LassoCV的特征选择" "threshold %0.3f." % sfm.threshold)

feature1 = X_transform[:, 0]

feature2 = X_transform[:, 1]

plt.plot(feature1, feature2, 'r.')

plt.xlabel("First feature: {}".format(name_features[0]))

plt.ylabel("Second feature: {}".format(name_features[1]))

plt.ylim([np.min(feature2), np.max(feature2)])

plt.show()


你可能感兴趣的:(30、基于SelectFromModel和LassoCV的特征选择)