决策树——泰坦尼克号生还预测
import pandas as pd
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.metrics import classification_report
import graphviz
import pydotplus
data = pd.read_csv(r'./data/titanic_data.csv')
data.drop('PassengerId',axis=1,inplace=True)
data.loc[data['Sex']=='male','Sex']=1
data.loc[data['Sex']=='female','Sex']=1
data['Age'].fillna(data['Age'].mean(),inplace=True)
dtc = DecisionTreeClassifier(max_depth=5,random_state=8)
dtc.fit(data.iloc[:,1:],data['Survived'])
pre = dtc.predict((data.iloc[:,1:]))
data['Survived']==pre
dot_data = export_graphviz(dtc,feature_names=['Pclass', 'Sex', 'Age'],class_names='Survived')
graph = pydotplus.graph_from_dot_data(dot_data)
graph.write_png(r"./data/titanic.png")
precision recall f1-score support
0 0.75 0.85 0.79 549
1 0.69 0.54 0.60 342
accuracy 0.73 891
macro avg 0.72 0.69 0.70 891
weighted avg 0.72 0.73 0.72 891