补交作业:忘记交了
9、主成分分析
7.逻辑回归实践
1.手写数字数据集
- from sklearn.datasets import load_digits
- digits = load_digits()
from sklearn.datasets import load_digits
import numpy as np
digits = load_digits()
x_data = digits.data.astype(np.float32)
y_data = digits.target.astype(np.float32).reshape(-1, 1)
2.图片数据预处理
- x:归一化MinMaxScaler()
- y:独热编码OneHotEncoder()或to_categorical
- 训练集测试集划分
- 张量结构
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
scaler = MinMaxScaler()
x_data = scaler.fit_transform(x_data)
print(x_data)
x = x_data.reshape(-1, 8, 8, 1) # 转换为图片格式
y = OneHotEncoder().fit_transform(y_data).todense()
# 训练集测试集划分
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0, stratify=y)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape
3.设计卷积神经网络结构
- 绘制模型结构图,并说明设计依据。
# 建立模型
model = Sequential()
# 定义卷积核的大小
# 后面的padding等参数都设置成一样
ks = (5, 5)
input_shape = X_train.shape[1:]
# 一层卷积
model.add(Conv2D(filters=16, kernel_size=ks, padding='same', input_shape=input_shape, activation='relu'))
# 池化层1
model.add(MaxPool2D(pool_size=(2, 2)))
# 防止过拟合,随机丢掉链接
model.add(Dropout(0.25))
# 二层卷积
model.add(Conv2D(filters=32, kernel_size=ks, padding='same', activation='relu'))
# 池化层2
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
# 三层卷积
model.add(Conv2D(filters=64, kernel_size=ks, padding='same', activation='relu'))
# 四层卷积
model.add(Conv2D(filters=128, kernel_size=ks, padding='same', activation='relu'))
# 池化层3
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
# 平坦层
model.add(Flatten())
# 全连接层
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.25))
# 激活函数
model.add(Dense(10, activation='softmax'))
# 输出模型每一层的参数状况
print(model.summary())
4.模型训练
- model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
- train_history = model.fit(x=X_train,y=y_train,validation_split=0.2, batch_size=300,epochs=10,verbose=2)
5.模型评价
- model.evaluate()
- 交叉表与交叉矩阵
- pandas.crosstab
- seaborn.heatmap
score =model.evaluate(x_test,y_test)
print(score)
#预测值
y_pred=model.predict_classes(x_test)
y_pred[:10]
y_test[:10]
#交叉表查看预测数据与原数据对比
import pandas as pd
import seaborn as sns
y_test1=np.argmax(y_test,axis=1).reshape(-1)
y_test1=np.array(y_test1)[0]#记得要将数据提取为一维的 不然后面的会报错
y_test1.shape
y_pred.shape
a=pd.crosstab(np.array(y_test1),y_pred,rownames=['lables'],colnames=['predict'])
#转换成dataframe
df=pd.DataFrame(a)
sns.heatmap(df,annot=True,cmap="YlGnBu",linewidths=0.2,linecolor='G')
plt.show()