分类问题
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from IPython.display import Image
# Sigmoid 函数
![image.png](https://upload-images.jianshu.io/upload_images/3968643-0700a1e702a6b8df.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
![image.png](https://upload-images.jianshu.io/upload_images/3968643-8e1ba03a23492503.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
德国信用卡欺诈数据集
data = pd.read_csv("credit-a.csv", header=None)
data.head()
共15个特征,分类结果是-1或0
from sklearn.model_selection import train_test_split
from sklearn.linear_model.logistic import LogisticRegression
x = data[data.columns[:-1]] # 特征值取出去最后一列的所有列
y = data[15].replace(-1,0) # 将-1变成0,便于逻辑回归
x_train,x_test,y_train,y_test = train_test_split(x,y) # 划分训练集和测试集
model = LogisticRegression() # 初始化模型
model.fit(x_train,y_train) # 训练
# LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
# intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
# penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
# verbose=0, warm_start=False)
model.predict(x_test) # 预测
# array([0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0,
# 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1,
# 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1,
# 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0,
# 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0,
# 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0,
# 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0,
# 0, 0, 0], dtype=int64)
model.predict_proba(x_test) # 返回概率值
# array([[ 8.42118755e-01, 1.57881245e-01],
# [ 3.53849684e-01, 6.46150316e-01],
# [ 5.89772834e-01, 4.10227166e-01],
# [ 9.89795079e-01, 1.02049210e-02],
# [ 4.53970427e-02, 9.54602957e-01],
# [ 9.55686055e-01, 4.43139449e-02],
# [ 8.28273345e-01, 1.71726655e-01],
# [ 7.15217461e-01, 2.84782539e-01],
# [ 7.90021133e-02, 9.20997887e-01],
# [ 7.09335586e-02, 9.29066441e-01],
# [ 5.31995686e-01, 4.68004314e-01],
# [ 4.96030463e-02, 9.50396954e-01],
# [ 1.52831284e-02, 9.84716872e-01],
# [ 6.70553619e-02, 9.32944638e-01],
# [ 5.20743922e-02, 9.47925608e-01],
# [ 9.29899388e-01, 7.01006118e-02],
# [ 6.85666682e-03, 9.93143333e-01],
# [ 7.02280851e-01, 2.97719149e-01],
# [ 8.63749191e-01, 1.36250809e-01],
# [ 7.93248379e-01, 2.06751621e-01],
# [ 5.35096686e-01, 4.64903314e-01],
# [ 4.18983124e-01, 5.81016876e-01],
# [ 8.14124654e-01, 1.85875346e-01],
# [ 8.14567995e-01, 1.85432005e-01],
# [ 8.85548358e-01, 1.14451642e-01],
# [ 9.41724862e-01, 5.82751379e-02],
# [ 5.44032687e-02, 9.45596731e-01],
# [ 7.67984562e-01, 2.32015438e-01],
# [ 9.83223111e-01, 1.67768890e-02],
# [ 2.29510597e-01, 7.70489403e-01],
# [ 8.48681806e-01, 1.51318194e-01],
# [ 1.36933701e-01, 8.63066299e-01],
# [ 6.15392279e-02, 9.38460772e-01],
# [ 6.49640738e-02, 9.35035926e-01],
# [ 8.44778708e-01, 1.55221292e-01],
# [ 7.74867033e-01, 2.25132967e-01],
# [ 4.52785960e-02, 9.54721404e-01],
# [ 4.13951765e-02, 9.58604824e-01],
# [ 9.55333450e-01, 4.46665496e-02],
# [ 2.47224241e-02, 9.75277576e-01],
# [ 9.56118896e-01, 4.38811039e-02],
# [ 5.44326290e-01, 4.55673710e-01],
# [ 2.78223741e-01, 7.21776259e-01],
# [ 4.16478099e-01, 5.83521901e-01],
# [ 2.60122300e-01, 7.39877700e-01],
# [ 4.88086307e-02, 9.51191369e-01],
# [ 8.56247014e-01, 1.43752986e-01],
# [ 1.69209088e-02, 9.83079091e-01],
# [ 1.12652610e-01, 8.87347390e-01],
# [ 5.25139421e-02, 9.47486058e-01],
# [ 4.10711457e-02, 9.58928854e-01],
# [ 9.38263223e-01, 6.17367772e-02],
# [ 8.65817876e-01, 1.34182124e-01],
# [ 3.06119732e-01, 6.93880268e-01],
# [ 4.39635202e-02, 9.56036480e-01],
# [ 7.49816979e-02, 9.25018302e-01],
# [ 5.03698332e-01, 4.96301668e-01],
# [ 9.99999189e-01, 8.11457800e-07],
# [ 4.98230390e-02, 9.50176961e-01],
# [ 8.32367949e-01, 1.67632051e-01],
# [ 6.19348622e-02, 9.38065138e-01],
# [ 9.95034142e-01, 4.96585837e-03],
# [ 9.46544362e-01, 5.34556377e-02],
# [ 7.77469556e-02, 9.22253044e-01],
# [ 9.88715740e-01, 1.12842601e-02],
# [ 6.76739343e-02, 9.32326066e-01],
# [ 1.00000000e+00, 6.82777428e-30],
# [ 8.35215475e-01, 1.64784525e-01],
# [ 5.25497099e-02, 9.47450290e-01],
# [ 1.73557257e-01, 8.26442743e-01],
# [ 3.18710961e-02, 9.68128904e-01],
# [ 9.17032904e-01, 8.29670959e-02],
# [ 8.26641810e-01, 1.73358190e-01],
# [ 8.39568438e-01, 1.60431562e-01],
# [ 1.05948015e-01, 8.94051985e-01],
# [ 9.71870481e-01, 2.81295187e-02],
# [ 5.22687388e-01, 4.77312612e-01],
# [ 1.27831708e-01, 8.72168292e-01],
# [ 9.89329211e-01, 1.06707891e-02],
# [ 6.40472635e-02, 9.35952737e-01],
# [ 3.80910832e-02, 9.61908917e-01],
# [ 5.00417631e-02, 9.49958237e-01],
# [ 6.64941351e-01, 3.35058649e-01],
# [ 8.23589417e-01, 1.76410583e-01],
# [ 1.93664551e-02, 9.80633545e-01],
# [ 4.33488113e-01, 5.66511887e-01],
# [ 6.32715168e-01, 3.67284832e-01],
# [ 9.09024784e-01, 9.09752159e-02],
# [ 3.69981197e-02, 9.63001880e-01],
# [ 7.07734271e-01, 2.92265729e-01],
# [ 7.76638545e-02, 9.22336145e-01],
# [ 8.94511331e-01, 1.05488669e-01],
# [ 8.74922994e-01, 1.25077006e-01],
# [ 5.51180934e-02, 9.44881907e-01],
# [ 6.38744632e-02, 9.36125537e-01],
# [ 7.98531627e-01, 2.01468373e-01],
# [ 7.27524774e-01, 2.72475226e-01],
# [ 5.39979763e-02, 9.46002024e-01],
# [ 1.30316551e-01, 8.69683449e-01],
# [ 3.12624769e-01, 6.87375231e-01],
# [ 4.88391903e-02, 9.51160810e-01],
# [ 6.20358561e-01, 3.79641439e-01],
# [ 6.63508563e-02, 9.33649144e-01],
# [ 4.43981303e-02, 9.55601870e-01],
# [ 5.62420824e-01, 4.37579176e-01],
# [ 4.11943131e-02, 9.58805687e-01],
# [ 1.03270048e-01, 8.96729952e-01],
# [ 4.45291889e-02, 9.55470811e-01],
# [ 8.81660460e-02, 9.11833954e-01],
# [ 8.50345298e-01, 1.49654702e-01],
# [ 1.07016472e-01, 8.92983528e-01],
# [ 7.88018509e-01, 2.11981491e-01],
# [ 9.00990633e-01, 9.90093667e-02],
# [ 4.13692960e-02, 9.58630704e-01],
# [ 8.21915845e-01, 1.78084155e-01],
# [ 9.34623120e-01, 6.53768801e-02],
# [ 3.59432105e-02, 9.64056789e-01],
# [ 7.82608186e-01, 2.17391814e-01],
# [ 8.49697956e-01, 1.50302044e-01],
# [ 7.63084146e-01, 2.36915854e-01],
# [ 5.63950588e-02, 9.43604941e-01],
# [ 4.73538772e-02, 9.52646123e-01],
# [ 8.20276467e-01, 1.79723533e-01],
# [ 9.75080203e-01, 2.49197969e-02],
# [ 8.29470660e-02, 9.17052934e-01],
# [ 6.18764280e-02, 9.38123572e-01],
# [ 9.86176598e-01, 1.38234016e-02],
# [ 5.29019825e-01, 4.70980175e-01],
# [ 2.41452586e-01, 7.58547414e-01],
# [ 1.23323595e-01, 8.76676405e-01],
# [ 8.44641202e-02, 9.15535880e-01],
# [ 7.67237932e-01, 2.32762068e-01],
# [ 8.49302888e-02, 9.15069711e-01],
# [ 6.09365875e-02, 9.39063413e-01],
# [ 9.98535191e-01, 1.46480916e-03],
# [ 7.16399329e-02, 9.28360067e-01],
# [ 1.67980974e-02, 9.83201903e-01],
# [ 6.23900269e-01, 3.76099731e-01],
# [ 3.69496543e-01, 6.30503457e-01],
# [ 1.15052013e-01, 8.84947987e-01],
# [ 4.83541470e-01, 5.16458530e-01],
# [ 1.99337480e-02, 9.80066252e-01],
# [ 6.10531969e-02, 9.38946803e-01],
# [ 5.85070930e-01, 4.14929070e-01],
# [ 9.08315174e-01, 9.16848261e-02],
# [ 7.93324145e-01, 2.06675855e-01],
# [ 4.40885467e-02, 9.55911453e-01],
# [ 8.98638245e-01, 1.01361755e-01],
# [ 8.70392235e-01, 1.29607765e-01],
# [ 2.09629984e-02, 9.79037002e-01],
# [ 4.05059781e-01, 5.94940219e-01],
# [ 8.93127483e-01, 1.06872517e-01],
# [ 1.00000000e+00, 1.57899513e-17],
# [ 4.86432721e-02, 9.51356728e-01],
# [ 4.70896654e-02, 9.52910335e-01],
# [ 8.11627287e-01, 1.88372713e-01],
# [ 3.30347902e-02, 9.66965210e-01],
# [ 5.85701087e-01, 4.14298913e-01],
# [ 8.92401116e-02, 9.10759888e-01],
# [ 3.29203842e-02, 9.67079616e-01],
# [ 9.22536703e-01, 7.74632967e-02],
# [ 7.53185488e-01, 2.46814512e-01],
# [ 1.00000000e+00, 1.09387062e-16],
# [ 8.68880956e-01, 1.31119044e-01]])
from sklearn.metrics import accuracy_score
accuracy_score(model.predict(x_test), y_test) # 计算准确率,此时用默认参数的逻辑回归预测结果
# 0.84146341463414631