sklearn中决策树的实现

def DecisionTree(inputdf):
    import numpy as np
    import scipy as sp
    from sklearn import tree
    from sklearn.metrics import precision_recall_curve
    from sklearn.metrics import classification_report
    from sklearn.cross_validation import train_test_split
    x = np.array(inputdf.select("param1_1","param2_1").toPandas())
    y = np.array(inputdf.select('label').toPandas())
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3)
    clf = tree.DecisionTreeClassifier(criterion='entropy')
    #print(clf)
    clf.fit(x_train, y_train)
    '''save '''
    #with open("tree.dot", 'w') as f:
    #f = tree.export_graphviz(clf, out_file=f)
    #print(clf.feature_importances_)
    answer = clf.predict(x_test)
    #print(x_train)  
    #print(answer)  
    #print(y_train)  
    print(np.mean(answer == y_test))
    #precision, recall, thresholds = precision_recall_curve(y_train, clf.predict(x_train))
    #answer = clf.predict_proba(x)[:,1]
    #print(classification_report(y, answer, target_names = [1, 0]))

你可能感兴趣的:(sklearn中决策树的实现)