1.实现根据样本数据(用眼距离distance、最长持续用眼时长duration、总用眼时长total_time、户外运动时长outdoor、用眼角度angle、健康环境光照用眼比例proportion)判别是否需要近视预警
2.样本实在太少,结果还行,原理都是一样的
import pandas
patients = pandas.read_csv("data.csv")
patients.head(5)
from sklearn.model_selection import train_test_split
patients_data=patients.loc[:,'distance':'proportion']
patients_target=patients.loc[:,'warning']
data_train,data_test,target_train,target_test=train_test_split(patients_data,patients_target,test_size=0.1,random_state=42)
import numpy as np
from sklearn.feature_selection import SelectKBest, f_classif
import matplotlib.pyplot as plt
predictors = ["distance", "duration", "total_time", "outdoor", "angle", "proportion"]
selector = SelectKBest(f_classif, k=5)
selector.fit(data_train, target_train)
scores = -np.log10(selector.pvalues_)
plt.bar(range(len(predictors)), scores)
plt.xticks(range(len(predictors)), predictors, rotation='vertical')
plt.show()
predictors_best = ["distance", "total_time", "angle", "proportion"]
data_train = data_train[predictors_best]
data_test = data_test[predictors_best]
from sklearn.model_selection import GridSearchCV
tree_param_grid = { 'min_samples_split': list((2,3,4)),'n_estimators':list((3,5,10,15,20,25,30,35,40,45,50))}
grid = GridSearchCV(RandomForestClassifier(),param_grid=tree_param_grid, cv=kf)#(算法,调节参数(用字典形式),交叉验证次数)
grid.fit(data_train, target_train)#训练集
grid.cv_results_ , grid.best_params_, grid.best_score_#得分,最优参数,最优得分
from sklearn import model_selection
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(random_state=1, n_estimators=35, min_samples_split=2, min_samples_leaf=2)
#交叉验证
kf = model_selection.KFold(n_splits=3)
scores = model_selection.cross_val_score(rf, data_train, target_train, cv=kf)
print(scores.mean())
sample | distance | duration | total_time | outdoor | angle | proportion | warning(1 yes 0 no) |
---|---|---|---|---|---|---|---|
1 | 20 | 72 | 344 | 148 | 11 | 81 | 1 |
2 | 34 | 68 | 263 | 135 | 7 | 50 | 1 |
3 | 25 | 98 | 357 | 32 | 12 | 64 | 1 |
4 | 37 | 65 | 291 | 157 | 8 | 89 | 0 |
5 | 34 | 151 | 162 | 169 | 18 | 63 | 1 |
6 | 30 | 178 | 259 | 146 | 32 | 50 | 1 |
7 | 20 | 35 | 134 | 37 | 23 | 68 | 0 |
8 | 39 | 111 | 169 | 87 | 4 | 52 | 0 |
9 | 22 | 44 | 265 | 136 | 14 | 76 | 1 |
10 | 39 | 151 | 219 | 140 | 2 | 55 | 0 |
11 | 21 | 179 | 184 | 64 | 18 | 60 | 1 |
12 | 25 | 41 | 241 | 71 | 16 | 72 | 1 |
13 | 18 | 171 | 286 | 131 | 35 | 89 | 1 |
14 | 32 | 33 | 236 | 102 | 29 | 50 | 1 |
15 | 20 | 133 | 226 | 124 | 17 | 81 | 1 |
16 | 17 | 148 | 236 | 66 | 32 | 75 | 1 |
17 | 34 | 111 | 214 | 57 | 5 | 88 | 0 |
18 | 24 | 85 | 163 | 155 | 14 | 32 | 1 |
19 | 32 | 165 | 276 | 146 | 33 | 52 | 1 |
20 | 25 | 124 | 359 | 171 | 33 | 70 | 0 |
21 | 31 | 51 | 167 | 47 | 25 | 47 | 0 |
22 | 31 | 63 | 352 | 58 | 22 | 44 | 1 |
23 | 16 | 58 | 164 | 45 | 13 | 73 | 0 |
24 | 29 | 37 | 326 | 104 | 33 | 68 | 1 |
25 | 34 | 47 | 197 | 59 | 5 | 66 | 0 |
26 | 36 | 123 | 185 | 165 | 26 | 70 | 0 |
27 | 25 | 126 | 171 | 45 | 23 | 33 | 1 |
28 | 31 | 84 | 98 | 37 | 30 | 51 | 1 |
29 | 30 | 92 | 153 | 114 | 14 | 48 | 0 |
30 | 29 | 178 | 278 | 146 | 27 | 45 | 1 |