from sklearn.datasets import fetch_openml
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
def sort_by_target(mnist):
reorder_train = np.array(sorted([(target, i) for i, target in enumerate(mnist.target[:60000])]))[:, 1]
reorder_test = np.array(sorted([(target, i) for i, target in enumerate(mnist.target[60000:])]))[:, 1]
mnist.data[:60000] = mnist.data[reorder_train]
mnist.target[:60000] = mnist.target[reorder_train]
mnist.data[60000:] = mnist.data[reorder_test + 60000]
mnist.target[60000:] = mnist.target[reorder_test + 60000]
mnist = fetch_openml('mnist_784')
mnist.target = mnist.target.astype(np.int8)
sort_by_target(mnist)
X , y = mnist['data'] , mnist['target']
some_digit = X[36000]
print(y[36000])
X_train , X_test , y_train , y_test = X[:60000] , X[60000:] , y[:60000] , y[60000:]
shuffle_index = np.random.permutation(60000)
X_train , y_train = X_train[shuffle_index] , y_train[shuffle_index]
y_train_5 = (y_train == 5)
y_test_5 = (y_test == 5)
from sklearn.linear_model import SGDClassifier
sgd_clf = SGDClassifier(random_state=42)
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score , recall_score , f1_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import precision_recall_curve
def plot_precision_recall_vs_threshold(precisions,recalls,thresholds):
plt.plot(thresholds,precisions[:-1],"b--",label="Precision")
plt.plot(thresholds,recalls[:-1],"g-",label="Recall")
plt.xlabel('Threshold')
plt.legend(loc='upper left')
plt.ylim([0,1])
def plot_precision_vc_recall(precisions,recalls):
plt.plot(recalls,precisions,"b--",linewidth=2)
plt.xlabel('Recall',fontsize='16')
plt.ylabel('Precision',fontsize='16')
plt.axis([0,1,0,1])
from sklearn.metrics import roc_curve
def plot_roc_curve(fpr,tpr,label=None):
plt.plot(fpr,tpr,'b--',linewidth=2,label=label)
plt.plot([0,1],[0,1],'k--')
plt.axis([0,1,0,1])
plt.xlabel('Fales Positive Rate')
plt.xlabel('True Positive Rate')
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier
forest_clf = RandomForestClassifier(random_state=42)
y_probas_forest = cross_val_predict(forest_clf,X_train,y_train_5,cv=3,method='predict_proba')
y_scores_forest = y_probas_forest[:,1]
fpr_forest , tpr_forest , thresholds_forest = roc_curve(y_train_5,y_scores_forest)
sgd_clf = sgd_clf.fit(X_train,y_train)
print(sgd_clf.predict([some_digit]))
some_digit_scores = sgd_clf.decision_function([some_digit])
print(some_digit_scores)
print(np.argmax(some_digit_scores))
print(sgd_clf.classes_)
from sklearn.multiclass import OneVsOneClassifier
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.astype(np.float64))
y_train_pred = cross_val_predict(sgd_clf,X_train_scaled,y_train,cv=3)
conf_mx = confusion_matrix(y_train,y_train_pred)
plt.matshow(conf_mx,cmap=plt.cm.gray)
plt.show()
row_sums = conf_mx.sum(axis=1,keepdims=True)
norm_conf_mx = conf_mx / row_sums
norm_conf_mx = np.fill_diagonal(norm_conf_mx,0)
plt.matshow(norm_conf_mx,cmap=plt.cm.gray)
plt.show()
from sklearn.neighbors import KNeighborsClassifier
y_train_large = (y_train >= 7)
y_train_odd = (y_train % 2 == 1)
y_multilable = np.c_[y_train_large,y_train_odd]
knn_clf = KNeighborsClassifier()
knn_clf = knn_clf.fit(X_train,y_multilable)
print(knn_clf.predict([some_digit]))
y_train_knn_pred = cross_val_predict(knn_clf,X_train,y_train,cv=3)
print(f1_score(y_train,y_train_knn_pred,average='macro'))