在做图片中缺陷分类项目时出现正面报错信息:
Using TensorFlow backend.
(x_train.shape: (144, 1920, 2560)
(y_train.shape: (144, 12)
(x_test.shape: (24, 1920, 2560)
(y_test.shape: (24, 12)
x_train shape: (144, 1920, 2560, 1)
144 train samples
24 test samples
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_1 (Conv2D) (None, 1621, 2261, 32) 2880032
_________________________________________________________________
conv2d_2 (Conv2D) (None, 1619, 2259, 64) 18496
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 809, 1129, 64) 0
_________________________________________________________________
dropout_1 (Dropout) (None, 809, 1129, 64) 0
_________________________________________________________________
flatten_1 (Flatten) (None, 58455104) 0
_________________________________________________________________
dense_1 (Dense) (None, 128) -110768115
_________________________________________________________________
dropout_2 (Dropout) (None, 128) 0
_________________________________________________________________
dense_2 (Dense) (None, 12) 1548
=================================================================
Total params: -1,104,781,076.0
Trainable params: -1,104,781,076.0
Non-trainable params: 0.0
_________________________________________________________________
+++++++++++++model.compile+++++++++++++++++
(x_train.shape: (144, 1920, 2560, 1)
(y_train.shape: (1728, 12)
(x_test.shape: (24, 1920, 2560, 1)
(y_test.shape: (288, 12)
Traceback (most recent call last):
File "d:/TIANCHI/code/al1_ng.py", line 131, in
validation_data=(x_test, y_test))
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\models.py", line 845, in fit
initial_epoch=initial_epoch)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1405, in fit
batch_size=batch_size)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1307, in _standardize_user_data
_check_array_lengths(x, y, sample_weights)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 229, in _check_array_lengths
'and ' + str(list(set_y)[0]) + ' target samples.')
ValueError: Input arrays should have the same number of samples as target arrays. Found 144 input samples and 1728 target samples.
代码如下:
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.callbacks import TensorBoard
import numpy as np
from PIL import Image
import os
#batch_size = 128
batch_size = 12
num_classes = 12
#epochs = 12
epochs = 5
# input image dimensions
img_rows, img_cols = 1920, 2560
MODEL_SAVE_PATH = "D:/TIANCHI/model"
# the data, shuffled and between train and test sets
#(x_train, y_train), (x_test, y_test) = mnist.load_data()
#(x_train, y_train), (x_test, y_test) = mnist.load_data(path='D:/aura/data/mnist.npz')
def get_data_and_label(root_path):
image_labels = os.listdir(root_path)
x = []
y = []
for lbl in image_labels:
pathName = root_path + lbl
images = os.listdir(pathName)
label = np.zeros(12)
lbl_int = int(lbl)
label[lbl_int] = 1
for im in images:
imgPath = pathName + "/" + im
img = Image.open(imgPath)
#转为灰度图
img = img.convert("L")
image_array = np.array(img)
x.append(image_array)
y.append(label)
x = np.array(x)
y = np.array(y)
return x, y
train_path = 'D:/TIANCHI/data/train2_12/'
x_train,y_train = get_data_and_label(train_path)
print("(x_train.shape:",x_train.shape)
print("(y_train.shape:",y_train.shape)
test_path = 'D:/TIANCHI/data/test2_2/'
x_test,y_test = get_data_and_label(test_path)
print("(x_test.shape:",x_test.shape)
print("(y_test.shape:",y_test.shape)
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
print("exec this method")
#三通道时要变为(3, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
# Convert from [0, 255] -> [0.0, 1.0].
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
#32个卷积核,3x3,没有固定设置方法,可参考经典网络结构
model.add(Conv2D(32, kernel_size=(300, 300),
activation='relu',
input_shape=input_shape))
#卷积层
model.add(Conv2D(64, (3, 3), activation='relu'))
#池化层:对图像的一部分提取最大值或均值
model.add(MaxPooling2D(pool_size=(2, 2)))
#正则化,防止过拟合,丢掉一部分信息
model.add(Dropout(0.25))
#展平
model.add(Flatten())
#激活函数
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
#归一化
model.add(Dense(num_classes, activation='softmax'))
#查看模型结构信息
model.summary()
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer="sgd", # Adam
metrics=['accuracy'])
#verbose=1看日志,verbose=0不看日志
print("+++++++++++++model.compile+++++++++++++++++")
print("(x_train.shape:",x_train.shape)
print("(y_train.shape:",y_train.shape)
print("(x_test.shape:",x_test.shape)
print("(y_test.shape:",y_test.shape)
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
model.save(MODEL_SAVE_PATH)
查了很多资料都说是x_train与y_train长度不一致,但从报错信息看x_train与y_train的长度都是144。
我迷惑了。
最后因为看到报错是在model.fit时报的,我就在model.fit前重新打印了一下x_train和y_train的shape,这时发现长度确实不一致。
经过仔细研究代码发现我对y_train做了两次onehot.
第一次是在获取数据时:
label = np.zeros(12)
lbl_int = int(lbl)
label[lbl_int] = 1
可是在获取数据后又做了一次:
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
这就导致x_train与y_train长度不一致了。
把获取数据时的onehot删除后搞定!!!