DecisionTreeClassifier
-
- 参数介绍
- 重要属性介绍
- 重要方法(接口)介绍
- 调参
-
- 利用学习曲线调参
-
- 根据分数高低选择决策树的深度
- 根据预测结果与实际结果比较判断过拟合还是欠拟合
- 利用网格调参
- 举例
参数介绍
clf = tree.DecisionTreeClassifier(criterion="entropy"
,splitter="best"
,max_depth=10
,min_samples_split=5
,min_samples_leaf=1
,min_weight_fraction_leaf=0.0
,max_features=None
,random_state=None
,max_leaf_nodes=None
,min_impurity_decrease=0.0
,min_impurity_split=None
,class_weight=None
,presort=False
)
重要属性介绍
clf.feature_importances_
feature_name = ['酒精','苹果酸','灰','灰的碱性','镁','总酚','类黄酮','非黄烷类酚类','花青素',\
'颜色强度','色调','od280/od315稀释葡萄酒','脯氨酸']
print(*zip(feature_name,clf.feature_importances_))
重要方法(接口)介绍
clf = clf.fit(Xtrain,Ytrain)
score = clf.score(Xtest,Ytest)
clf.apply(Xtest)
clf.predict(Xtest)
调参
利用学习曲线调参
根据分数高低选择决策树的深度
def show(Xtrain, Ytrain,Xtest, Ytest):
test = []
for i in range(10):
clf = tree.DecisionTreeClassifier(max_depth=i + 1
, criterion = "entropy"
, random_state = 30
, splitter = "random"
)
clf = clf.fit(Xtrain, Ytrain)
score = clf.score(Xtest, Ytest)
test.append(score)
plt.plot(range(1, 11), test, color="red", label="max_depth")
plt.legend()
plt.show()
根据预测结果与实际结果比较判断过拟合还是欠拟合
def isOverFitting(Xtrain,Ytrain,x,y):
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
tr = []
te = []
for i in range(10):
clf = DecisionTreeClassifier(random_state=25,max_depth=i+1,criterion="entropy")
clf = clf.fit(Xtrain,Ytrain)
score_tr = clf.score(Xtrain,Ytrain)
score_te = cross_val_score(clf,x,y,cv=10).mean()
tr.append(score_tr)
te.append(score_te)
print(max(te))
plt.plot(range(1,11),tr,color = "red",label="train")
plt.plot(range(1,11),te,color = "blue",label = "test")
plt.legend()
plt.show()
利用网格调参
def adujstParameter(Xtrain,Ytrain):
import numpy as np
from sklearn.tree import tree
from sklearn.model_selection import GridSearchCV
parameters = {'splitter':('best','random')
,'criterion':("gini","entropy")
,"max_depth":[*range(1,10)]
,'min_samples_leaf':[*range(1,50,5)]
,'min_impurity_decrease':[*np.linspace(0,0.5,20)]
}
clf = tree.DecisionTreeClassifier(random_state=25)
GS = GridSearchCV(clf, parameters, cv=10)
GS.fit(Xtrain,Ytrain)
GS.best_params_
GS.best_score_
举例
def decisionTree(Xtrain, Ytrain,Xtest, Ytest):
clf = tree.DecisionTreeClassifier(
splitter='random'
,max_depth=10
)
clf = clf.fit(Xtrain,Ytrain)
score = clf.score(Xtest,Ytest)
return score