运行环境:win10 64位 py 3.6 pycharm 2018.1.1
from sklearn.metrics import zero_one_loss
y_true = [1,1,1,1,1,0,0,0,0,0]
y_pred = [0,0,0,1,1,1,1,1,0,0]
print("zero_one_loss:",zero_one_loss(y_true,y_pred,normalize=True))
print("zero_one_loss:",zero_one_loss(y_true,y_pred,normalize=False))
from sklearn.metrics import log_loss
y_true = [1,1,1,0,0,0]
y_pred = [
[0.1,0.9],
[0.2,0.8],
[0.3,0.7],
[0.7,0.3],
[0.8,0.2],
[0.9,0.1]
]
print('log_loss:',log_loss(y_true,y_pred,normalize=True))
print('log_loss:',log_loss(y_true,y_pred,normalize=False))
from sklearn.model_selection import train_test_split
X = [
[1,2,3,4],
[11,12,13,14],
[21,22,23,24],
[31,32,33,34],
[41,42,43,44],
[51,52,53,54],
[61,62,63,64],
[71,72,73,74]
]
y = [1,1,0,0,1,1,0,0]
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.4,random_state=0)
print('X_train=',X_train)
print('X_test=',X_test)
print('y_train=',y_train)
print('y_test=',y_test)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.4,random_state=0,stratify=y)
print('stratify:X_train=',X_train)
print('stratify:X_test=',X_test)
print('stratify:y_train=',y_train)
print('stratify:y_test=',y_test)
from sklearn.model_selection import KFold
import numpy as np
X = np.array([
[1,2,3,4],
[11,12,13,14],
[21,22,23,24],
[31,32,33,34],
[41,42,43,44],
[51,52,53,54],
[61,62,63,64],
[71,72,73,74],
[81,82,83,84]
])
y = np.array([1,1,0,0,1,1,0,0,1])
folder = KFold(n_splits=3,random_state=0,shuffle=False)
for train_index,test_index in folder.split(X,y):
print('train_index:',train_index)
print('test_index:', test_index)
print('X_train:', X[train_index])
print('X_test:', X[test_index])
print("")
shuffle_folder = KFold(n_splits=3,random_state=0,shuffle=True)
for train_index,test_index in shuffle_folder.split(X,y):
print('shuffle train_index:',train_index)
print('shuffle test_index:', test_index)
print('shuffle X_train:', X[train_index])
print('shuffle X_test:', X[test_index])
print("")
from sklearn.model_selection import KFold,StratifiedKFold
import numpy as np
X = np.array([
[1,2,3,4],
[11,12,13,14],
[21,22,23,24],
[31,32,33,34],
[41,42,43,44],
[51,52,53,54],
[61,62,63,64],
[71,72,73,74],
])
y = np.array([1,1,0,0,1,1,0,0])
folder = KFold(n_splits=4,random_state=0,shuffle=False)
for train_index,test_index in folder.split(X,y):
print('train_index:',train_index)
print('test_index:', test_index)
print('X_train:', X[train_index])
print('X_test:', X[test_index])
print("")
stratified_folder = StratifiedKFold(n_splits=4,random_state=0,shuffle=False)
for train_index,test_index in stratified_folder.split(X,y):
print('shuffle train_index:',train_index)
print('shuffle test_index:', test_index)
print('shuffle X_train:', X[train_index])
print('shuffle X_test:', X[test_index])
print("")
from sklearn.model_selection import LeaveOneOut
import numpy as np
X = np.array([
[1,2,3,4],
[11,12,13,14],
[21,22,23,24],
[31,32,33,34]
])
y = np.array([1,1,0,0])
lo = LeaveOneOut()
lo.get_n_splits(X)
for train_index,test_index in lo.split(X):
print('train_index:', train_index)
print('test_index:', test_index)
print('X_train:', X[train_index])
print('X_test:', X[test_index])
print("")
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_digits
from sklearn.svm import LinearSVC
digits = load_digits()
X = digits.data
y = digits.target
result = cross_val_score(LinearSVC(),X,y,cv=10)
print('cross val score is:',result)
from sklearn.metrics import accuracy_score
y_true = [1,1,1,1,1,0,0,0,0,0]
y_pre = [0,0,1,1,0,0,1,1,0,0]
print('Accuracy score(normalize=True):',accuracy_score(y_true,y_pre,normalize=True))
print('Accuracy score(normalize=False):',accuracy_score(y_true,y_pre,normalize=False))
from sklearn.metrics import accuracy_score,precision_score
y_true = [1,1,1,1,1,0,0,0,0,0]
y_pre = [0,0,1,1,0,0,1,1,0,0]
print('Accuracy score(normalize=True):',accuracy_score(y_true,y_pre,normalize=True))
print('precision score(normalize=False):',precision_score(y_true,y_pre,normalize=False))
from sklearn.metrics import accuracy_score,precision_score,recall_score
y_true = [1,1,1,1,1,0,0,0,0,0]
y_pre = [0,0,1,1,0,0,1,1,0,0]
print('Accuracy score(normalize=True):',accuracy_score(y_true,y_pre,normalize=True))
print('precision score(normalize=False):',precision_score(y_true,y_pre))
print('Recall Score:',recall_score(y_true,y_pre))
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
y_true = [1,1,1,1,1,0,0,0,0,0]
y_pre = [0,0,1,1,0,0,0,0,0,0]
print('Accuracy score(normalize=True):',accuracy_score(y_true,y_pre,normalize=True))
print('precision score(normalize=False):',precision_score(y_true,y_pre))
print('Recall Score:',recall_score(y_true,y_pre))
print('F1 score:',f1_score(y_true,y_pre))
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,fbeta_score
y_true = [1,1,1,1,1,0,0,0,0,0]
y_pre = [0,0,1,1,0,0,0,0,0,0]
print('Accuracy score(normalize=True):',accuracy_score(y_true,y_pre,normalize=True))
print('precision score(normalize=False):',precision_score(y_true,y_pre))
print('Recall Score:',recall_score(y_true,y_pre))
print('F1 score:',f1_score(y_true,y_pre))
print('Fbeta Score(beta=0.001):',fbeta_score(y_true,y_pre,beta=0.001))
print('Fbeta Score(beta=1):',fbeta_score(y_true,y_pre,beta=1))
print('Fbeta Score(beta=10):',fbeta_score(y_true,y_pre,beta=10))
print('Fbeta Score(beta=10000):',fbeta_score(y_true,y_pre,beta=10000))
from sklearn.metrics import classification_report
y_true = [1,1,1,1,1,0,0,0,0,0]
y_pre = [0,0,1,1,0,0,0,0,0,0]
print('classification_report:\n',classification_report(y_true,y_pre,target_names=['class_0','class_1']))
from sklearn.metrics import confusion_matrix
y_true = [1,1,1,1,1,0,0,0,0,0]
y_pre = [0,0,1,1,0,0,0,0,0,0]
print('confusion matrix:\n',confusion_matrix(y_true,y_pre,labels=[0,1]))
暂不处理
#precision_recall_curve 用于计算分类结果的P-R曲线
#roc_curve 用于计算分类结果的ROC曲线
#roc_auc_score 用于计算分类结果的ROC曲线的面积
from sklearn.metrics import mean_absolute_error
y_true = [1,1,1,1,1,2,2,2,0,0]
y_pred = [0,0,0,1,1,1,0,0,0,0]
print('mean absolute error:',mean_absolute_error(y_true,y_pred))
from sklearn.metrics import mean_squared_error,mean_absolute_error
y_true = [1,1,1,1,1,2,2,2,0,0]
y_pred = [0,0,0,1,1,1,0,0,0,0]
print('mean absolute error:',mean_absolute_error(y_true,y_pred))
print('mean square error:',mean_squared_error(y_true,y_pred))
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_digits
from sklearn.svm import LinearSVC
from sklearn.model_selection import validation_curve
digits = load_digits()
X,y = digits.data,digits.target
param_name='C'
param_range = np.logspace(-2,2)
train_scores,test_scores = validation_curve(LinearSVC(),X,y,param_name=param_name,param_range=param_range,cv=10,scoring='accuracy')
train_scores_mean = np.mean(train_scores,axis=1)
train_scores_std = np.std(train_scores,axis=1)
test_scores_mean = np.mean(test_scores,axis=1)
test_scores_std = np.std(test_scores,axis=1)
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.semilogx(param_range,train_scores_mean,label='Training Accuracy',color='r')
ax.fill_between(param_range,train_scores_mean-train_scores_std,train_scores_mean+train_scores_std,alpha=0.2,color='r')
ax.semilogx(param_range,test_scores_mean,label='Test Accuracy',color='r')
ax.fill_between(param_range,test_scores_mean-test_scores_std,test_scores_mean+test_scores_std,alpha=0.2,color='g')
ax.set_title('validation Curve with LinearSVC')
ax.set_xlabel('C')
ax.set_ylabel('score')
ax.set_ylim(0,1.1)
ax.legend(loc='best')
plt.show()
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_digits
from sklearn.svm import LinearSVC
from sklearn.model_selection import learning_curve
digits = load_digits()
X,y = digits.data,digits.target
train_sizes = np.linspace(0.1,1.0,endpoint=True,dtype='float')
abs_trains_sizes,train_scores,test_scores = learning_curve(LinearSVC(),X,y,cv=10,scoring='accuracy',train_sizes=train_sizes)
train_scores_mean = np.mean(train_scores,axis=1)
train_scores_std = np.std(train_scores,axis=1)
test_scores_mean = np.mean(test_scores,axis=1)
test_scores_std = np.std(test_scores,axis=1)
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(abs_trains_sizes,train_scores_mean,label='Training Accuracy',color='r')
ax.fill_between(abs_trains_sizes,train_scores_mean-train_scores_std,train_scores_mean+train_scores_std,alpha=0.2,color='r')
ax.plot(abs_trains_sizes,test_scores_mean,label='Test Accuracy',color='r')
ax.fill_between(abs_trains_sizes,test_scores_mean-test_scores_std,test_scores_mean+test_scores_std,alpha=0.2,color='g')
ax.set_title('Learning Curve with LinearSVC')
ax.set_xlabel('Sample Nums')
ax.set_ylabel('score')
ax.set_ylim(0,1.1)
ax.legend(loc='best')
plt.show()
from sklearn.datasets import load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
digits = load_digits()
X_train,X_test,y_train,y_test = train_test_split(digits.data,digits.target,test_size=0.25,random_state=0,stratify=digits.target)
tuned_parameters = [
{
'penalty':['l1','l2'],
'C':[0.01,0.05,0.1,0.5,1,5,10,50,100],
'solver':['liblinear'],
'multi_class':['ovr']
},
{
'penalty':['l2'],
'C':[0.01,0.05,0.1,0.5,1,5,10,50,100],
'solver':['lbfgs'],
'multi_class':['ovr','multinomial']
},
]
clf = GridSearchCV(LogisticRegression(tol=1e-6),tuned_parameters,cv=10)
clf.fit(X_train,y_train)
print('Best parameters set found:',clf.best_params_)
print('Grid scores:')
for params,mean_score,scores in clf.grid_scores_:
print('\t%0.3f (+/-%0.03f)for %s'%(mean_score,scores.std()*2,params))
print('Optimized score:',clf.score(X_test,y_test))
print('Detailed classification report:')
y_true,y_pred = y_test,clf.predict(X_test)
print(classification_report(y_true,y_pred))
from sklearn.datasets import load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import scipy
digits = load_digits()
X_train,X_test,y_train,y_test = train_test_split(digits.data,digits.target,test_size=0.25,random_state=0,stratify=digits.target)
tuned_parameters = {
'C':scipy.stats.expon(scale=100),
'multi_class':['ovr','multinomial']
}
clf = RandomizedSearchCV(LogisticRegression(penalty='l2',solver='lbfgs',tol=1e-6),tuned_parameters,cv=10,scoring='accuracy',n_iter=100)
clf.fit(X_train,y_train)
print('Best parameters set found:',clf.best_params_)
print('Grid scores:')
for params,mean_score,scores in clf.grid_scores_:
print('\t%0.3f (+/-%0.03f)for %s'%(mean_score,scores.std()*2,params))
print('Optimized score:',clf.score(X_test,y_test))
print('Detailed classification report:')
y_true,y_pred = y_test,clf.predict(X_test)
print(classification_report(y_true,y_pred))