ValueError: Input arrays should have the same number of samples as target arrays.

在做图片中缺陷分类项目时出现正面报错信息:

Using TensorFlow backend.
(x_train.shape: (144, 1920, 2560)
(y_train.shape: (144, 12)
(x_test.shape: (24, 1920, 2560)
(y_test.shape: (24, 12)
x_train shape: (144, 1920, 2560, 1)
144 train samples
24 test samples
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
conv2d_1 (Conv2D)            (None, 1621, 2261, 32)    2880032
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 1619, 2259, 64)    18496
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 809, 1129, 64)     0
_________________________________________________________________
dropout_1 (Dropout)          (None, 809, 1129, 64)     0
_________________________________________________________________
flatten_1 (Flatten)          (None, 58455104)          0
_________________________________________________________________
dense_1 (Dense)              (None, 128)               -110768115
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0
_________________________________________________________________
dense_2 (Dense)              (None, 12)                1548
=================================================================
Total params: -1,104,781,076.0
Trainable params: -1,104,781,076.0
Non-trainable params: 0.0
_________________________________________________________________
+++++++++++++model.compile+++++++++++++++++
(x_train.shape: (144, 1920, 2560, 1)
(y_train.shape: (1728, 12)
(x_test.shape: (24, 1920, 2560, 1)
(y_test.shape: (288, 12)
Traceback (most recent call last):
  File "d:/TIANCHI/code/al1_ng.py", line 131, in
    validation_data=(x_test, y_test))
  File "C:\ProgramData\Anaconda3\lib\site-packages\keras\models.py", line 845, in fit
    initial_epoch=initial_epoch)
  File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1405, in fit
    batch_size=batch_size)
  File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1307, in _standardize_user_data
    _check_array_lengths(x, y, sample_weights)
  File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 229, in _check_array_lengths
    'and ' + str(list(set_y)[0]) + ' target samples.')
ValueError: Input arrays should have the same number of samples as target arrays. Found 144 input samples and 1728 target samples.

 

 

代码如下:

from __future__ import print_function

import keras

from keras.datasets import mnist

from keras.models import Sequential

from keras.layers import Dense, Dropout, Flatten

from keras.layers import Conv2D, MaxPooling2D

from keras import backend as K

from keras.callbacks import TensorBoard

import numpy as np

from PIL import Image

import os

 

#batch_size = 128

batch_size = 12

 

num_classes = 12

#epochs = 12

epochs = 5

 

# input image dimensions

img_rows, img_cols = 1920, 2560

 

MODEL_SAVE_PATH = "D:/TIANCHI/model"

# the data, shuffled and between train and test sets

#(x_train, y_train), (x_test, y_test) = mnist.load_data()

 

#(x_train, y_train), (x_test, y_test) = mnist.load_data(path='D:/aura/data/mnist.npz')


 

def get_data_and_label(root_path):

image_labels = os.listdir(root_path)

x = []

y = []

for lbl in image_labels:

pathName = root_path + lbl

images = os.listdir(pathName)

label = np.zeros(12)

lbl_int = int(lbl)

label[lbl_int] = 1

for im in images:

imgPath = pathName + "/" + im

img = Image.open(imgPath)

#转为灰度图

img = img.convert("L")

image_array = np.array(img)

x.append(image_array)

y.append(label)

x = np.array(x)

y = np.array(y)

return x, y

 

train_path = 'D:/TIANCHI/data/train2_12/'

x_train,y_train = get_data_and_label(train_path)

print("(x_train.shape:",x_train.shape)

print("(y_train.shape:",y_train.shape)

 

test_path = 'D:/TIANCHI/data/test2_2/'

x_test,y_test = get_data_and_label(test_path)

print("(x_test.shape:",x_test.shape)

print("(y_test.shape:",y_test.shape)



 

if K.image_data_format() == 'channels_first':

x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)

x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)

print("exec this method")

#三通道时要变为(3, img_rows, img_cols)

input_shape = (1, img_rows, img_cols)

 

else:

x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)

x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)

input_shape = (img_rows, img_cols, 1)

 

x_train = x_train.astype('float32')

x_test = x_test.astype('float32')

# Convert from [0, 255] -> [0.0, 1.0].

x_train /= 255

x_test /= 255

print('x_train shape:', x_train.shape)

print(x_train.shape[0], 'train samples')

print(x_test.shape[0], 'test samples')

 

# convert class vectors to binary class matrices

y_train = keras.utils.to_categorical(y_train, num_classes)

y_test = keras.utils.to_categorical(y_test, num_classes)

 

model = Sequential()

#32个卷积核,3x3,没有固定设置方法,可参考经典网络结构

model.add(Conv2D(32, kernel_size=(300, 300),

activation='relu',

input_shape=input_shape))

#卷积层

model.add(Conv2D(64, (3, 3), activation='relu'))

#池化层:对图像的一部分提取最大值或均值

model.add(MaxPooling2D(pool_size=(2, 2)))

#正则化,防止过拟合,丢掉一部分信息

model.add(Dropout(0.25))

#展平

model.add(Flatten())

#激活函数

model.add(Dense(128, activation='relu'))

model.add(Dropout(0.5))

#归一化

model.add(Dense(num_classes, activation='softmax'))

#查看模型结构信息

model.summary()

model.compile(loss=keras.losses.categorical_crossentropy,

optimizer="sgd", # Adam

metrics=['accuracy'])

#verbose=1看日志,verbose=0不看日志

print("+++++++++++++model.compile+++++++++++++++++")

print("(x_train.shape:",x_train.shape)

print("(y_train.shape:",y_train.shape)

 

print("(x_test.shape:",x_test.shape)

print("(y_test.shape:",y_test.shape)


 

model.fit(x_train, y_train,

batch_size=batch_size,

epochs=epochs,

verbose=1,

validation_data=(x_test, y_test))

score = model.evaluate(x_test, y_test, verbose=1)

print('Test loss:', score[0])

print('Test accuracy:', score[1])

model.save(MODEL_SAVE_PATH)

 

 

查了很多资料都说是x_train与y_train长度不一致,但从报错信息看x_train与y_train的长度都是144。

我迷惑了。

最后因为看到报错是在model.fit时报的,我就在model.fit前重新打印了一下x_train和y_train的shape,这时发现长度确实不一致。

经过仔细研究代码发现我对y_train做了两次onehot.

第一次是在获取数据时:

label = np.zeros(12)

lbl_int = int(lbl)

label[lbl_int] = 1

 

可是在获取数据后又做了一次:

y_train = keras.utils.to_categorical(y_train, num_classes)

y_test = keras.utils.to_categorical(y_test, num_classes)

这就导致x_train与y_train长度不一致了。

把获取数据时的onehot删除后搞定!!!

你可能感兴趣的:(人工智能,python,tensorflow,keras)