为了格式化粘贴代码

测试代码

from sklearn.externals.six.moves import zip
import matplotlib.pyplot as plt
from sklearn.datasets import make_gaussian_quantiles
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

import sys, time
from sklearn.cross_validation import cross_val_score
from sklearn.cross_validation import KFold
from sets import Set
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

num2num = {"zero":0, "one":1, "two":2, "three":3, "four":4, "five":5, "six":6,
	"seven":7, "eight":8, "nine":9, "ten":10, "eleven":11, "twelve":12,
	"thirteen":13, "fourteen":14, "fifteen":15, "sixteen":16, "draw":17}

def transform(x):
    retval = []
    for e in x:
	e = str(e)
        if len(e) > 1:
	    retval.append(num2num[e])
        elif e.isalpha():
            retval.append(ord(e) - ord('a'))
	elif e.isdigit():
	    retval.append(int(e))
    return retval

def preprocess(filename):
    df = pd.read_csv(filename, delimiter=',')
    samples = [transform(x) for x in df.values]
    a = np.array(samples)
    feature_enc = OneHotEncoder()
    label_enc = OneHotEncoder()
    feature_enc.fit(a[:,:6])
    X = feature_enc.transform(a[:,:6]).toarray()
    y = a[:,6]
    return X, y

def main():
    X, y = preprocess("krkopt.data")
    n_split = 25000
    X_train, X_test = X[:n_split], X[n_split:]
    y_train, y_test = y[:n_split], y[n_split:]
    print X_train.shape, y_train.shape

    classifiers = [
        DecisionTreeClassifier(max_depth=5, min_samples_leaf=1),
        SVC(gamma=2, C=1),
        AdaBoostClassifier(
            DecisionTreeClassifier(max_depth=2),
            n_estimators=600,
            learning_rate=1),
        BaggingClassifier(
            DecisionTreeClassifier(max_depth=2),
            n_estimators=600,
            random_state=13),
        AdaBoostClassifier(
            BaggingClassifier(DecisionTreeClassifier(max_depth=2),
                n_estimators=3, random_state=13),
            n_estimators=600,
            learning_rate=1),
        BaggingClassifier(
            AdaBoostClassifier(DecisionTreeClassifier(max_depth=2),
                n_estimators=600, learning_rate=1),
            n_estimators=600,
            random_state=13),
        ]

    for classifier in classifiers:
        start_time = time.time()
        classifier.fit(X_train, y_train)
        accu = np.mean(cross_val_score(classifier, X_test, y_test,
            scoring='accuracy', cv=10, n_jobs=1))
        print accu, time.time() - start_time

if __name__ == "__main__":
    main()


你可能感兴趣的:(为了格式化粘贴代码)