B站机器学习ID3代码
代码来源:https://www.bilibili.com/video/BV1Hs411n7Xh?spm_id_from=333.999.0.0
import csv
from sklearn.feature_extraction import DictVectorizer
from sklearn import preprocessing
from sklearn import tree
film_data=open('film.csv','rt')
reader = csv.reader(film_data)
#表头信息
headers = next(reader)
print(headers)
feature_list = []
result_list = []
for row in reader:
result_list.append(row[-1])
#去掉首位列,
feature_list.append(dict(zip(headers[1:-1],row[1:-1])))
print(result_list)
print(feature_list)
#
vec = DictVectorizer() #将数据类型转换成numpy annay
dummyx = vec.fit_transform(feature_list).toarray()
dummyY = preprocessing.LabelBinarizer().fit_transform(result_list)
#humity、outlook、temperature、windy
#high,normal;overcast、Rainy、sunny;cool,hot,mild;false,true;
print(dummyx)
print(dummyY)
clf = tree.DecisionTreeClassifier(criterion='entropy',random_state=0)
clf = clf.fit(dummyx,dummyY)
print("clf:"+str(clf))
# import pydotplus
#
# dot_data = tree.export_graphviz(clf,
# feature_names=vec.get_feature_names(),
# filled=True,rounded=True,
# special_characters=True,
# out_file=None)
# graph = pydotplus.graph_from_dot_data(dot_data)
# graph.write_pdf("file.pdf")
#
##开始预测
A=([[1,0,0,1,0,0,0,1,1,0]])
predict_resurt=clf.predict(A)
if predict_resurt==1:
print("预测结果:打球")
else:
print("预测结果:不打球")
id,outlook,temperature,humidity,windy,play
1,sunny,hot,high,FALSE,No
2,sunny,hot,high,TRUE,No
3,overcast,hot,high,FALSE,Yes
4,rainy,mild,high,FALSE,Yes
5,rainy,cool,normal,FALSE,Yes
6,overcast,cool,normal,TRUE,Yes
7,sunny,mild,high,FALSE,No
8,sunny,cool,normal,FALSE,Yes
9,rainy,mild,normal,FALSE,Yes
10,sunny,mild,normal,TRUE,Yes
11,overcast,mild,high,TRUE,Yes
12,overcast,hot,normal,FALSE,Yes
13,rainy,mild,high,TRUE,No
14,rainy,cool,normal,TRUE,No
这个代码添加sklearn、pydotplus包是不够的,还需要另外安装程序,这里给个连接可以参考上面的一些做法:https://blog.csdn.net/weixin_40592798/article/details/104394733