import matplotlib.pyplot as plt
'''
第一步:建立数据集
'''
from collections import OrderedDict
import pandas as pd
examDict = {
'学习时间': [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50,
2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50],
'通过考试': [0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1]
}
examOrderDict = OrderedDict(examDict)
examDf = pd.DataFrame(examOrderDict)
'''
第二步:提取特征和标签
'''
exam_X = examDf.loc[:, '学习时间']
exam_y = examDf.loc[:, '通过考试']
'''
第三步:绘制散点图
'''
'''
第四步:建立模型训练数据集和测试数据集
'''
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(exam_X,
exam_y,
train_size=.6)
'''
第五步:训练模型
'''
X_train = X_train.values.reshape(-1, 1)
X_test = X_test.values.reshape(-1, 1)
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)
'''
第六步:评估模型准确率
'''
correction = model.score(X_test, y_test)
print("模型的正确率为:", correction)
'''
第七步:预测是否通过考试的概率;预测是否可以通过考试
'''
time = 0
failOrPass = model.predict_proba([[time]])
print("考试失败的概率是:", failOrPass[0, 0], ";考试成功的概率是:", failOrPass[0, 1])
pred = model.predict([[time]])
print("是否可以通过考试:", pred[0])