机器学习常用代码

数据清洗时,将dataframe文本转化为数值。

def Replace (X,columns):
    a = X.groupby([columns],as_index=False)[columns].agg({'cnt':'count'})
    for i in a[columns]:
        X[columns] = X[columns].replace(i,a[(a[columns]== i )].index.tolist()[0])
    return (X)

划分测试集与训练集(3:7)

def Data(X,columns):       
    from sklearn import model_selection
    from sklearn import cross_validation
    Y = X[columns]
    X= X.drop([columns],axis = 1)
    X_train, X_test, y_train, y_test = \
        cross_validation.train_test_split( X, Y, test_size=0.3, random_state=0)
    return (X_train, X_test, y_train, y_test)

基础的机器学习代码,查看得分

y_train.astype('int')

def RF(X_train, X_test, y_train, y_test):    #随机森林 
    from  sklearn.ensemble  import  RandomForestClassifier
    model= RandomForestClassifier(n_estimators=100)
    model.fit(X_train, y_train)
    predicted= model.predict(X_test)
    score = accuracy_score(y_test, predicted)
    return (score)

def LOR(X_train, X_test, y_train, y_test):   #逻辑回归
    from sklearn.linear_model import LogisticRegression
    lor = LogisticRegression(penalty='l1',C=100,multi_class='ovr') 
    lor.fit(X_train, y_train)
    predicted= lor.predict(X_test)
    score = accuracy_score(y_test, predicted)
    return (score)

def Svm(X_train, X_test, y_train, y_test):   #支持向量机
    from sklearn import svm
    model = svm.SVC(kernel='rbf')
    model.fit(X_train, y_train)    
    predicted= model.predict(X_test)
    score = accuracy_score(y_test, predicted)
    return (score) 

def LR(X_train, X_test, y_train, y_test):    #线性回归
    from sklearn.linear_model import LinearRegression            
    LR = LinearRegression()
    LR.fit(X_train, y_train)
    predicted = LR.predict(X_test)
    score = accuracy_score(y_test, predicted)
    return ( score,LR.intercept_,LR.coef_)

你可能感兴趣的:(机器学习常用代码)