就逻辑回归简单原理及python实现来讲,现在我需要两个特征的列表,那么就需要将原字典中特征遍历出来并且一一对应。
转化逻辑代码如下:
这之中可以替换:
featuretable_after = list(featuretable.values())
全部代码如下:
import pandas as pd
from collections import OrderedDict
'''
数据集预处理
'''
featuretable = OrderedDict({
'连续学习天数': [3, 3, 3, 3, 4, 3, 4, 4, 5, 3, 5, 6, 6, 5, 5, 6, 7, 7, 9, 8],
'学习时间': [5.50, 5.75, 6.00, 6.25, 6.50, 6.75, 6.75, 7.00, 7.25,
7.50, 7.75, 8.00, 8.25, 8.5, 9.00, 9.25, 9.50, 9.75, 10.00, 10.25]
})
labeltable = OrderedDict({
'通过考试': [0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1]})
featuretable_after = []
labeltable_after = pd.DataFrame(labeltable)
labeltable_end = labeltable_after.loc[:, '通过考试']
for k in featuretable.keys():
if k == '连续学习天数':
featuretable_after.insert(0, featuretable[k])
if k == '学习时间':
featuretable_after.insert(0, featuretable[k])
featuretable_end = []
for i in range(0, len(featuretable_after[0])):
featuretable_end.append([featuretable_after[0][i], featuretable_after[1][i]])
from sklearn.model_selection import train_test_split
'''
数据集切分
'''
X_train, X_test, y_train, y_test = train_test_split(featuretable_end, labeltable_end, test_size=.2)
from sklearn.linear_model import LogisticRegression
'''
创建逻辑回归模型
'''
model = LogisticRegression(solver='liblinear')
model.fit(X_train,y_train)
print(model.predict_proba([[10.0, 9]]))
print(X_train)
:
[[8.5, 5], [7.5, 3], [9.5, 7], [7.0, 4], [9.0, 5], [8.25, 6], [6.0, 3], [10.0, 9], [9.25, 6], [7.25, 5], [10.25, 8], [6.5, 4], [7.75, 5], [5.75, 3], [6.75, 3], [8.0, 6]]
print(y_train)
:
13 0
9 0
16 1
7 0
14 1
12 1
2 0
18 1
15 1
8 1
19 1
4 0
10 1
1 0
5 0
11 0
Name: 通过考试, dtype: int64
print(model.predict_proba([[10.0, 9]]))
:
[[0.04834103 0.95165897]]
参考:Python2.7中dict.values()+dict.values(),在Python3.5中解决办法