#线性回归
import sklearn
from sklearn import linear_model
import pandas as pd
data = pd.read_csv('data.csv').fillna(0,aixs=1) #存在缺失值时算法会报错
x = data.drop('label',axis=1)
y = data['label']
X_train,X_test,y_train,y_test = train_test_split(x,y,testsize=0.1,random_state=0)
reg =linear_model.LinearRegression()
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)
from sklearn import metrics
metrics.mean_squared_error(y_test,y_pred)
#Ridge回归
reg = linear_model.Ridge(alpha = 0.5)
#Lasso回归
reg = linear_model.Lasso(alpha=2,max_iter=10)
#logistic回归
reg = linear_model.LogisticRegression()
#SVR
reg = SVR(probability = True,kernel = 'rbf',c=0.1,max_iter=10)
reg.fit(X_train,y_train)
y_pred = cls.predict_proba(X_test)[:,1]
metrics.roc_auc_score(y_test)
#SVC
cls = SVC(probability = True,kernel = 'rbf',c=0.1,max_iter=10)
cls.fit(X_train,y_train)
y_pred = cls.predict_proba(X_test)[:,1]
metrics.roc_auc_score(y_test)
#MLPRegresson
from sklearn.neural_network import MLPClassifier,MLPRegression
reg = MLPRegression(hidden_layer_sizes = (10,10,10),learning_rate = 0.1)
#DecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier
cls = DecisionTreeClassifier(max_depth=6,min_samples_split=10,
min_samples_leaf=5,max_features=0.7)
cls.fit(X_train,y_train)
y_pred = cls.predict_proba(X_test)[:,1]
metrics.roc_auc_score(y_test)
#RandomForestClassifier
cls = RandomForestClassifier(max_depth=6,min_samples_split=10,
min_samples_leaf=5,max_features=0.7)
cls.fit(X_train,y_train)
y_pred = cls.predict_proba(X_test)[:,1]
metrics.roc_auc_score(y_test)
#LGBMClassifier
cls = LGBMClassifier(max_depth=6,min_samples_split=10,
min_samples_leaf=5,max_features=0.7
)
#ExtraTreesClassifier
#XGBClassifier
cls = XGBClassifier(max_depth = 6,min_samples_split=10,
min_samples_leaf=5,max_features=0.7)
cls.fit(X_train,y_train)
y_pred = cls.predict_proba(X_test)[:,1]
metrics.roc_auc_score(y_test,y_pred)