数据集 caltech101 中给出了经过转化的 102 种物体图像数据
( 128*128 像 素 ), 共 9144 个样例,相应的类标签在
caltech101_labels给出。在此基础上将原始数据划分为训练集(80%)
和测试集(20%)。(如果计算机性能有限,可以从 102 种物体中任意
抽取 10~20 种作为数据集)
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import SGD
from sklearn.model_selection import train_test_split
labels=pd.read_csv('caltech101_labels.csv')
data=pd.read_csv('caltech101.csv')
data.drop(data.columns[0],axis=1,inplace=True)
x_train,x_test,y_train,y_test = train_test_split(data,labels,test_size = 0.2,random_state = 1)
num_classes=102
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
建立具有两个隐藏层以及 Dropout 层的神经网络模型对图像数据进行分类,并对模型性能进行评价。
model = Sequential()
model.add(Dense(4092, activation='relu', input_shape=(16384,)))
model.add(Dense(2048, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2048, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(102, activation='softmax'))
sgd = SGD(lr=0.01, decay=1e-4, momentum=0.9, nesterov=True)
model.summary()
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
history = model.fit(x_train, y_train, batch_size=128, epochs=30,
verbose=1, validation_data=(x_test, y_test))
score = model.evaluate(x_train, y_train, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
训练集:0.59
测试集:0.41
分别建立具有 1 个和 2 个卷积层的 CNN 模型对图像数据进行分类,并对模型性能进行评价,重点考察卷积核数量对模型性能的影响。
from sklearn.datasets import fetch_mldata
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Conv2D, MaxPooling2D,Flatten
from keras.optimizers import SGD
data1=data.iloc[:5000,:]
label1=labels.iloc[:5000,:]
img_rows, img_cols = 128, 128
X_train,X_test,y_train,y_test = train_test_split(data1,label1,test_size = 0.2,random_state = 1)
X_train = np.array(X_train).reshape(X_train.shape[0], img_rows, img_cols, 1)
X_test = np.array(X_test).reshape(X_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
# convert class vectors to binary class matrices
num_classes=len(label1.iloc[:,0].value_counts().index) #len(label1.iloc[:,0].unique())
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model=Sequential()
model.add(Conv2D(16,
kernel_size=(3,3),
activation='relu',
input_shape=input_shape,
padding='same'
)
)
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(16,
kernel_size=(3,3),
activation='relu',
padding='same'
)
)
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(32*32*16,activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(num_classes,activation='softmax'))
sgd = SGD(lr=0.01, decay=1e-4, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
optimizer=sgd,metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=128,epochs=5,
verbose=1,validation_data=(X_test, y_test))
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
Test loss: 0.3388944187760353
Test accuracy: 0.91
2. 训练集
score = model.evaluate(X_train, y_train, verbose=0)
print('Train loss:', score[0])
print('Train accuracy:', score[1])
Train loss: 1.37
Train accuracy: 0.655
搭建一个卷积层容易报错
model=Sequential()
model.add(Conv2D(16,
kernel_size=(3,3),
activation='relu',
input_shape=input_shape,
padding='same'
)
)
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(16*16*16,activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(num_classes,activation='softmax'))
sgd = SGD(lr=0.01, decay=1e-4, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',optimizer=sgd,metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=128,epochs=5, verbose=1,validation_data=(X_test, y_test))
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
Test loss: 1,。47
Test accuracy: 0.629
2. 训练集
score = model.evaluate(X_train, y_train, verbose=0)
print('Train loss:', score[0])
print('Train accuracy:', score[1])
Train loss: 1.192
Train accuracy: 0.6905