23. 日月光华 Python数据分析 - 机器学习 - 逻辑回归

分类问题

import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from IPython.display import Image

# Sigmoid 函数
![image.png](https://upload-images.jianshu.io/upload_images/3968643-0700a1e702a6b8df.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)

![image.png](https://upload-images.jianshu.io/upload_images/3968643-8e1ba03a23492503.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)

德国信用卡欺诈数据集

data = pd.read_csv("credit-a.csv", header=None) 
data.head()
image.png

共15个特征,分类结果是-1或0

from sklearn.model_selection import train_test_split
from sklearn.linear_model.logistic import  LogisticRegression

x = data[data.columns[:-1]]   # 特征值取出去最后一列的所有列
y = data[15].replace(-1,0)     # 将-1变成0,便于逻辑回归
x_train,x_test,y_train,y_test = train_test_split(x,y)    # 划分训练集和测试集
model = LogisticRegression()    # 初始化模型
model.fit(x_train,y_train)            # 训练
# LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
#          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
#          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
#          verbose=0, warm_start=False)

model.predict(x_test)        # 预测
# array([0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0,
#       0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1,
#       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1,
#       1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0,
#       0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0,
#       0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0,
#       1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0,
#       0, 0, 0], dtype=int64)

model.predict_proba(x_test)     # 返回概率值
# array([[  8.42118755e-01,   1.57881245e-01],
#       [  3.53849684e-01,   6.46150316e-01],
#       [  5.89772834e-01,   4.10227166e-01],
#       [  9.89795079e-01,   1.02049210e-02],
#       [  4.53970427e-02,   9.54602957e-01],
#       [  9.55686055e-01,   4.43139449e-02],
#       [  8.28273345e-01,   1.71726655e-01],
#       [  7.15217461e-01,   2.84782539e-01],
#       [  7.90021133e-02,   9.20997887e-01],
#       [  7.09335586e-02,   9.29066441e-01],
#       [  5.31995686e-01,   4.68004314e-01],
#       [  4.96030463e-02,   9.50396954e-01],
#       [  1.52831284e-02,   9.84716872e-01],
#       [  6.70553619e-02,   9.32944638e-01],
#       [  5.20743922e-02,   9.47925608e-01],
#       [  9.29899388e-01,   7.01006118e-02],
#       [  6.85666682e-03,   9.93143333e-01],
#       [  7.02280851e-01,   2.97719149e-01],
#       [  8.63749191e-01,   1.36250809e-01],
#       [  7.93248379e-01,   2.06751621e-01],
#       [  5.35096686e-01,   4.64903314e-01],
#       [  4.18983124e-01,   5.81016876e-01],
#       [  8.14124654e-01,   1.85875346e-01],
#       [  8.14567995e-01,   1.85432005e-01],
#       [  8.85548358e-01,   1.14451642e-01],
#       [  9.41724862e-01,   5.82751379e-02],
#       [  5.44032687e-02,   9.45596731e-01],
#       [  7.67984562e-01,   2.32015438e-01],
#       [  9.83223111e-01,   1.67768890e-02],
#       [  2.29510597e-01,   7.70489403e-01],
#       [  8.48681806e-01,   1.51318194e-01],
#       [  1.36933701e-01,   8.63066299e-01],
#       [  6.15392279e-02,   9.38460772e-01],
#       [  6.49640738e-02,   9.35035926e-01],
#       [  8.44778708e-01,   1.55221292e-01],
#       [  7.74867033e-01,   2.25132967e-01],
#       [  4.52785960e-02,   9.54721404e-01],
#       [  4.13951765e-02,   9.58604824e-01],
#       [  9.55333450e-01,   4.46665496e-02],
#       [  2.47224241e-02,   9.75277576e-01],
#       [  9.56118896e-01,   4.38811039e-02],
#       [  5.44326290e-01,   4.55673710e-01],
#       [  2.78223741e-01,   7.21776259e-01],
#       [  4.16478099e-01,   5.83521901e-01],
#       [  2.60122300e-01,   7.39877700e-01],
#       [  4.88086307e-02,   9.51191369e-01],
#       [  8.56247014e-01,   1.43752986e-01],
#       [  1.69209088e-02,   9.83079091e-01],
#       [  1.12652610e-01,   8.87347390e-01],
#       [  5.25139421e-02,   9.47486058e-01],
#       [  4.10711457e-02,   9.58928854e-01],
#       [  9.38263223e-01,   6.17367772e-02],
#       [  8.65817876e-01,   1.34182124e-01],
#       [  3.06119732e-01,   6.93880268e-01],
#       [  4.39635202e-02,   9.56036480e-01],
#       [  7.49816979e-02,   9.25018302e-01],
#       [  5.03698332e-01,   4.96301668e-01],
#       [  9.99999189e-01,   8.11457800e-07],
#       [  4.98230390e-02,   9.50176961e-01],
#       [  8.32367949e-01,   1.67632051e-01],
#       [  6.19348622e-02,   9.38065138e-01],
#       [  9.95034142e-01,   4.96585837e-03],
#       [  9.46544362e-01,   5.34556377e-02],
#       [  7.77469556e-02,   9.22253044e-01],
#       [  9.88715740e-01,   1.12842601e-02],
#       [  6.76739343e-02,   9.32326066e-01],
#       [  1.00000000e+00,   6.82777428e-30],
#       [  8.35215475e-01,   1.64784525e-01],
#       [  5.25497099e-02,   9.47450290e-01],
#       [  1.73557257e-01,   8.26442743e-01],
#       [  3.18710961e-02,   9.68128904e-01],
#       [  9.17032904e-01,   8.29670959e-02],
#       [  8.26641810e-01,   1.73358190e-01],
#       [  8.39568438e-01,   1.60431562e-01],
#       [  1.05948015e-01,   8.94051985e-01],
#       [  9.71870481e-01,   2.81295187e-02],
#       [  5.22687388e-01,   4.77312612e-01],
#       [  1.27831708e-01,   8.72168292e-01],
#       [  9.89329211e-01,   1.06707891e-02],
#       [  6.40472635e-02,   9.35952737e-01],
#       [  3.80910832e-02,   9.61908917e-01],
#       [  5.00417631e-02,   9.49958237e-01],
#       [  6.64941351e-01,   3.35058649e-01],
#       [  8.23589417e-01,   1.76410583e-01],
#       [  1.93664551e-02,   9.80633545e-01],
#       [  4.33488113e-01,   5.66511887e-01],
#       [  6.32715168e-01,   3.67284832e-01],
#       [  9.09024784e-01,   9.09752159e-02],
#       [  3.69981197e-02,   9.63001880e-01],
#       [  7.07734271e-01,   2.92265729e-01],
#       [  7.76638545e-02,   9.22336145e-01],
#       [  8.94511331e-01,   1.05488669e-01],
#       [  8.74922994e-01,   1.25077006e-01],
#       [  5.51180934e-02,   9.44881907e-01],
#       [  6.38744632e-02,   9.36125537e-01],
#       [  7.98531627e-01,   2.01468373e-01],
#       [  7.27524774e-01,   2.72475226e-01],
#       [  5.39979763e-02,   9.46002024e-01],
#       [  1.30316551e-01,   8.69683449e-01],
#       [  3.12624769e-01,   6.87375231e-01],
#       [  4.88391903e-02,   9.51160810e-01],
#       [  6.20358561e-01,   3.79641439e-01],
#       [  6.63508563e-02,   9.33649144e-01],
#       [  4.43981303e-02,   9.55601870e-01],
#       [  5.62420824e-01,   4.37579176e-01],
#       [  4.11943131e-02,   9.58805687e-01],
#       [  1.03270048e-01,   8.96729952e-01],
#       [  4.45291889e-02,   9.55470811e-01],
#       [  8.81660460e-02,   9.11833954e-01],
#       [  8.50345298e-01,   1.49654702e-01],
#       [  1.07016472e-01,   8.92983528e-01],
#       [  7.88018509e-01,   2.11981491e-01],
#       [  9.00990633e-01,   9.90093667e-02],
#       [  4.13692960e-02,   9.58630704e-01],
#       [  8.21915845e-01,   1.78084155e-01],
#       [  9.34623120e-01,   6.53768801e-02],
#       [  3.59432105e-02,   9.64056789e-01],
#       [  7.82608186e-01,   2.17391814e-01],
#       [  8.49697956e-01,   1.50302044e-01],
#       [  7.63084146e-01,   2.36915854e-01],
#       [  5.63950588e-02,   9.43604941e-01],
#       [  4.73538772e-02,   9.52646123e-01],
#       [  8.20276467e-01,   1.79723533e-01],
#       [  9.75080203e-01,   2.49197969e-02],
#       [  8.29470660e-02,   9.17052934e-01],
#       [  6.18764280e-02,   9.38123572e-01],
#       [  9.86176598e-01,   1.38234016e-02],
#       [  5.29019825e-01,   4.70980175e-01],
#       [  2.41452586e-01,   7.58547414e-01],
#       [  1.23323595e-01,   8.76676405e-01],
#       [  8.44641202e-02,   9.15535880e-01],
#       [  7.67237932e-01,   2.32762068e-01],
#       [  8.49302888e-02,   9.15069711e-01],
#       [  6.09365875e-02,   9.39063413e-01],
#       [  9.98535191e-01,   1.46480916e-03],
#       [  7.16399329e-02,   9.28360067e-01],
#       [  1.67980974e-02,   9.83201903e-01],
#       [  6.23900269e-01,   3.76099731e-01],
#       [  3.69496543e-01,   6.30503457e-01],
#       [  1.15052013e-01,   8.84947987e-01],
#       [  4.83541470e-01,   5.16458530e-01],
#       [  1.99337480e-02,   9.80066252e-01],
#       [  6.10531969e-02,   9.38946803e-01],
#       [  5.85070930e-01,   4.14929070e-01],
#       [  9.08315174e-01,   9.16848261e-02],
#       [  7.93324145e-01,   2.06675855e-01],
#       [  4.40885467e-02,   9.55911453e-01],
#       [  8.98638245e-01,   1.01361755e-01],
#       [  8.70392235e-01,   1.29607765e-01],
#       [  2.09629984e-02,   9.79037002e-01],
#       [  4.05059781e-01,   5.94940219e-01],
#       [  8.93127483e-01,   1.06872517e-01],
#       [  1.00000000e+00,   1.57899513e-17],
#       [  4.86432721e-02,   9.51356728e-01],
#       [  4.70896654e-02,   9.52910335e-01],
#       [  8.11627287e-01,   1.88372713e-01],
#       [  3.30347902e-02,   9.66965210e-01],
#       [  5.85701087e-01,   4.14298913e-01],
#       [  8.92401116e-02,   9.10759888e-01],
#       [  3.29203842e-02,   9.67079616e-01],
#       [  9.22536703e-01,   7.74632967e-02],
#       [  7.53185488e-01,   2.46814512e-01],
#       [  1.00000000e+00,   1.09387062e-16],
#       [  8.68880956e-01,   1.31119044e-01]])


from sklearn.metrics import accuracy_score
accuracy_score(model.predict(x_test), y_test)  # 计算准确率,此时用默认参数的逻辑回归预测结果
# 0.84146341463414631

你可能感兴趣的:(23. 日月光华 Python数据分析 - 机器学习 - 逻辑回归)