sklearn logistic regression学习

1. linear regression

linear_model.LinearRegression()

import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

# Load the diabetes dataset
diabetes = datasets.load_diabetes()
# Use only one feature,newaxis可以添加一维
diabetes_X = diabetes.data[:, np.newaxis, 2]
# Split the data into training/testing sets
diabetes_X_train = diabetes_X[:-20]
diabetes_X_test = diabetes_X[-20:]
diabetes_y_train = diabetes.target[:-20]
diabetes_y_test = diabetes.target[-20:]
# Create linear regression object
regr = linear_model.LinearRegression()
# Train the model using the training sets
regr.fit(diabetes_X_train, diabetes_y_train)
# Make predictions using the testing set
diabetes_y_pred = regr.predict(diabetes_X_test)

# The coefficients
print('Coefficients: \n', regr.coef_)
# The mean squared error
print("Mean squared error: %.2f"
      % mean_squared_error(diabetes_y_test, diabetes_y_pred))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred))
# Plot outputs
plt.scatter(diabetes_X_test, diabetes_y_test,  color='black')
plt.plot(diabetes_X_test, diabetes_y_pred, color='blue', linewidth=3)
plt.xticks(())
plt.yticks(())
plt.show()

sklearn logistic regression学习_第1张图片

1.1.11 logistic regression

logistic regression其实是一种线性分类而非回归。此实现可以适用二元,一对多或多项逻辑回归,并带有可以选择的L1,L2或Elastic-Net正则化。[详见][1]
[1]: https://sklearn.apachecn.org/#/docs/2?id=_11-广义线性模型

eg.
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
X, y = load_iris(return_X_y=True)
#random_state,如果是random_state=int,是随机数生成器的种子;如果random_state=randomstate instance,那么它是一个随机生成器;如果random_state=None,则使用np.random。
#因为sklearn中很多算法都含有随机的因素,为了进行可重复的训练,我需要固定一个random_state。调参的时候是不需要调random_state,相反的是,我们应该先固定random_state,然后再对模型进行调参。
clf = LogisticRegression(random_state=0).fit(X, y)
clf.predict(X[:2, :])
clf.predict_proba(X[:2, :])  #样本对应不同类别的概率
clf.score(X, y)  #基于给的X和y的平均准确率
eg. MNIST classification using multinomial logistic + L1
import time
import matplotlib.pyplot as plt
import numpy as np

from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import check_random_state
t0 = time.time()
train_samples = 5000
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
#check_random_state():这里使用的是numpy.random.RandomState().RandomState()可以使用int,array,None。None的时候就是随机。和np.random.seed()比较,seed只能用一次,每次调用随机函数用在之前再声明一下。
random_state = check_random_state(0)
permutation = random_state.permutation(X.shape[0])  #打乱顺序
X = X[permutation]
y = y[permutation]
X = X.reshape((X.shape[0], -1))  #reshape()最后一位如果是-1,表示适应前面的分法

X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=train_samples, test_size=10000)
#对每个特征,数据归一化(x-u)/s
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Turn up tolerance for faster convergence
clf = LogisticRegression(
    C=50. / train_samples, penalty='l1', solver='saga', tol=0.1
)
clf.fit(X_train, y_train)
sparsity = np.mean(clf.coef_ == 0) * 100
score = clf.score(X_test, y_test)
# print('Best C % .4f' % clf.C_)
print("Sparsity with L1 penalty: %.2f%%" % sparsity)
print("Test score with L1 penalty: %.4f" % score)

coef = clf.coef_.copy()
plt.figure(figsize=(10, 5))
scale = np.abs(coef).max()
for i in range(10):
    l1_plot = plt.subplot(2, 5, i + 1)
    l1_plot.imshow(coef[i].reshape(28, 28), interpolation='nearest',
                   cmap=plt.cm.RdBu, vmin=-scale, vmax=scale)
    l1_plot.set_xticks(())
    l1_plot.set_yticks(())
    l1_plot.set_xlabel('Class %i' % i)
plt.suptitle('Classification vector for...')

run_time = time.time() - t0
print('Example run in %.3f s' % run_time)
plt.show()

sklearn logistic regression学习_第2张图片

你可能感兴趣的:(machine,learning)