【学习笔记】卷积神经网络的过拟合

在上一部分我们发现严重的过拟合导致了我们的验证集准确率很低。

这次我们将通过扩充数据集以及丢弃正则化(dropout)的方法来降低过拟合带来的影响。

我们先来看一下dropout函数:

import tensorflow as tf
import numpy as np
x = np.array([1,2,3,4,5]).astype('float32')
sess = tf.Session()
print(sess.run(tf.nn.dropout(x, 0.6)))


[0.        3.3333333 0.        6.6666665 8.333333 ]

通过观察,我们很容易就发现了保留下来的数字都被除以了 keep_prob(保留率)中的数字。

丢弃正则化一般应用于全连接层, 主要因为全连接层神经元数量多。 丢弃正则化可以有效的减小我们的模型。

换个角度思考,卷积部分好比我们的眼睛,全连接层好比我们的大脑。大脑运作的时候不会激活全部神经元,丢弃正则化也模仿了大脑的运作。

 

上一次的过拟合主要因为我们的数据集过小。那么我们先扩充数据集:

基于上一篇cnn文章我们已经将原始数据resize为150*150,而且创建了相应的文件夹。这里我不再贴出之前的代码。

# coding=utf-8
import os
import cv2
import numpy as np

base_dir = './dataset/cats_and_dogs_filtered'
train_dir = os.path.join(base_dir, 'train')
train_cats_dir = os.path.join(train_dir, 'cats/')
train_dogs_dir = os.path.join(train_dir, 'dogs/')
train_cat_fnames = os.listdir(train_cats_dir)
train_dog_fnames = os.listdir(train_dogs_dir)
target_cats = os.path.join(train_dir, 'resize_cats/')
target_dogs = os.path.join(train_dir, 'resize_dogs/')


def tPerspectiveTransform(img, inputs_shape, transform_shape, outputs_shape):
    pts1 = np.float32(inputs_shape)
    pts2 = np.float32(transform_shape)
    M = cv2.getPerspectiveTransform(pts1, pts2)
    dst = cv2.warpPerspective(img, M, outputs_shape)
    return dst


for i in train_cat_fnames:
    img = cv2.imread(train_cats_dir + i, 1)
    rows, cols = img.shape[0:2]
    rows, cols = round(rows), round(cols)
    dst_1 = tPerspectiveTransform(img, [[rows/6, cols/6], [rows*5/6, cols/6],
                                        [rows/6, cols*5/6], [rows*5/6, cols*5/6]],
                                  [[0, 0], [150, 0], [0, 150], [150, 150]], (150, 150))
    dst_2 = tPerspectiveTransform(img, [[rows/4, cols/4], [rows*3/4, cols/4],
                                        [rows/4, cols*3/4], [rows*3/4, cols*3/4]],
                                  [[0, 0], [150, 0], [0, 150], [150, 150]], (150, 150))
    cv2.imwrite(target_cats + 'pts_1_1' + i, dst_1)
    cv2.imwrite(target_cats + 'pts_1_2' + i, dst_2)
    M_1 = cv2.getRotationMatrix2D((cols/2, rows/2), 90, 1)
    M_2 = cv2.getRotationMatrix2D((cols/2, rows/2), 270, 1)
    rotation_1 = cv2.warpAffine(img, M_1, (cols, rows))
    resize_ro1 = cv2.resize(rotation_1, dsize=(150, 150))
    rotation_2 = cv2.warpAffine(img, M_2, (cols, rows))
    resize_ro2 = cv2.resize(rotation_2, dsize=(150, 150))
    cv2.imwrite(target_cats + 'rotation_1' + i, resize_ro1)
    cv2.imwrite(target_cats + 'rotation_2' + i, resize_ro2)


for i in train_dog_fnames:
    img = cv2.imread(train_dogs_dir + i, 1)
    rows, cols = img.shape[0:2]
    rows, cols = round(rows), round(cols)
    dst_1 = tPerspectiveTransform(img, [[rows/6, cols/6], [rows*5/6, cols/6],
                                        [rows/6, cols*5/6], [rows*5/6, cols*5/6]],
                                  [[0, 0], [150, 0], [0, 150], [150, 150]], (150, 150))
    dst_2 = tPerspectiveTransform(img, [[rows/4, cols/4], [rows*3/4, cols/4],
                                        [rows/4, cols*3/4], [rows*3/4, cols*3/4]],
                                  [[0, 0], [150, 0], [0, 150], [150, 150]], (150, 150))
    cv2.imwrite(target_dogs + 'pts_1_1' + i, dst_1)
    cv2.imwrite(target_dogs + 'pts_1_2' + i, dst_2)
    M_1 = cv2.getRotationMatrix2D((cols/2, rows/2), 90, 1)
    M_2 = cv2.getRotationMatrix2D((cols/2, rows/2), 270, 1)
    rotation_1 = cv2.warpAffine(img, M_1, (cols, rows))
    resize_ro1 = cv2.resize(rotation_1, dsize=(150, 150))
    rotation_2 = cv2.warpAffine(img, M_2, (cols, rows))
    resize_ro2 = cv2.resize(rotation_2, dsize=(150, 150))
    cv2.imwrite(target_dogs + 'rotation_1' + i, resize_ro1)
    cv2.imwrite(target_dogs + 'rotation_2' + i, resize_ro2)

这里我用了将原始图片进行了2次旋转,2次投影。 大家也可以试试其他方法。制作好图片后我们依旧用之前制作tfrecords的代码来制作数据集。

# coding=utf-8
import os
import tensorflow as tf
from PIL import Image


cwd = './dataset/cats_and_dogs_filtered/train/'
classes = ('resize_cats', 'resize_dogs')
writer = tf.python_io.TFRecordWriter('cats_and_dogs_train_onehot.tfrecords')

for index, name in enumerate(classes):
    class_path = cwd + name + '/'
    for img_name in os.listdir(class_path):
        img_path = class_path + img_name
        img = Image.open(img_path)
        img_raw = img.tobytes()
        example = tf.train.Example(features=tf.train.Features(feature={
            'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[1, 0]if index==0 else[0, 1])),
            'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw]))
        }))
        writer.write(example.SerializeToString())

writer.close()

下一步我们来训练我们的神经网络:

# coding=utf-8
import tensorflow as tf


def _parse_function(record):
    """Extracts features and labels.
    Args:
      record: File path to a TFRecord file
    Returns:
      A `tuple` `(labels, features)`:
        features: A dict of tensors representing the features
        labels: A tensor with the corresponding labels.
    """
    features = {
        "label": tf.FixedLenFeature([2], tf.int64),  # terms are strings of varying lengths
        "img_raw": tf.FixedLenFeature([], tf.string)  # labels are 0 or 1
    }

    parsed_features = tf.parse_single_example(record, features)

    img_raw = parsed_features['img_raw']
    img_raw = tf.decode_raw(img_raw, tf.uint8)
    img_raw = tf.reshape(img_raw, [150, 150, 3])
    labels = parsed_features['label']

    return img_raw, labels


def my_input_fn(input_filenames, num_epochs=None, shuffle=True):
    # Same code as above; create a dataset and map features and labels.
    ds = tf.data.TFRecordDataset(input_filenames)
    ds = ds.map(_parse_function)

    if shuffle:
        ds = ds.shuffle(10000)

    # Our feature data is variable-length, so we pad and batch
    # each field of the dataset structure to whatever size is necessary.
    ds = ds.padded_batch(100, ds.output_shapes)

    ds = ds.repeat(num_epochs)

    # Return the next batch of data.
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels


def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=.1)
    return tf.Variable(initial)


def biases_variable(shape):
    initial = tf.constant(.1, shape=shape)
    return tf.Variable(initial)


def conv2d(x, w):
    return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


def _loss(ys, pred):
    cross_entropy = tf.reduce_mean(
        -tf.reduce_sum(ys * tf.log(tf.clip_by_value(pred, 1e-10, 1.0)), reduction_indices=[1]))
    return cross_entropy


def train_step(learning_rate, loss):
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    return optimizer


def accuracy(pred, ys):
    _bool = tf.equal(tf.argmax(pred, 1), tf.argmax(ys, 1))
    acc = tf.reduce_mean(tf.cast(_bool, tf.float32))
    return acc


train_path = my_input_fn('cats_and_dogs_train_onehot.tfrecords')
xs = train_path[0]
xs = tf.cast(xs, tf.float32)
x_input = xs / 255
ys = train_path[1]
y_input = tf.cast(ys, tf.float32)

w_conv1 = weight_variable([3, 3, 3, 16])
b_conv1 = biases_variable([16])
h_conv1 = tf.nn.relu(conv2d(x_input, w_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

w_conv2 = weight_variable([3, 3, 16, 32])
b_conv2 = biases_variable([32])
h_conv2 = tf.nn.relu(conv2d(h_pool1, w_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

w_conv3 = weight_variable([3, 3, 32, 64])
b_conv3 = biases_variable([64])
h_conv3 = tf.nn.relu(conv2d(h_pool2, w_conv3) + b_conv3)
h_pool3 = max_pool_2x2(h_conv3)
h_pool3_flat = tf.reshape(h_pool3, [-1, 19 * 19 * 64])

w_fc1 = weight_variable([19 * 19 * 64, 512])
b_fc1 = biases_variable([512])
h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, w_fc1) + b_fc1)
h_fc1_dropout = tf.nn.dropout(h_fc1, 0.5)
w_fc2 = weight_variable([512, 2])
b_fc2 = biases_variable([2])
pred = tf.nn.softmax(tf.matmul(h_fc1_dropout, w_fc2) + b_fc2)

start_learning_rate = .0001
loss = _loss(y_input, pred)
train = train_step(start_learning_rate, loss)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

saver = tf.train.Saver()

for i in range(3000):
    sess.run(train)
    save_path = saver.save(sess, 'my_net/simple_cnn1.ckpt')
    if i % 50 == 0:
        acc = accuracy(pred, y_input)
        print('steps:', i, 'accuracy:', sess.run(acc))

这里我batch_size = 100, 反向传播方法为adam, lr =.0001 

对输入除以255使其归一化。 dropout添加到了全连接层 保留率为50%。 训练步数为3000步。

在网络结构不变的情况下,上面的参数都会对网络的结果造成影响,如果愿意试的话,可以看看参数对结果的影响。

 

batch_size 我试过25/50/100,在相同步数下,验证集准确率随着batch_size的增大而增大。

反向传播算法我试了RMSprop和adam。

官方文档中使用的也是RMSprop。我个人也倾向于RMSprop。

adam在该网络中并没有体现出收敛速度快的优势,同时也需要设置一个很低的lr来保证梯度下降的稳定性。( RMSprop可以设置更高的学习率)

(如若出现了CNN将所有图片都分为一类的情况或者不收敛的情况,一般都是lr设置过高造成的,请优先降低lr)

dropout 的保留率一般设置为50%, 保留率过低可能导致网络不收敛,过高则可能导致正则化效果不好。

训练步数  这里3000步其实并不合适。

不过在3000步后,我得到训练集的准确率为87.3%,验证集准确率为73%。(因初始随机数不同,结果可能不同)

也就是说明丢弃正则化并不能完全解决CNN中的过拟合问题。我们依旧需要早停法,或者更大的训练集

 

最后附上我们验证用的网络:

# coding=utf-8
import tensorflow as tf
import numpy as np


def _parse_function(record):
    """Extracts features and labels.
    Args:
      record: File path to a TFRecord file
    Returns:
      A `tuple` `(labels, features)`:
        features: A dict of tensors representing the features
        labels: A tensor with the corresponding labels.
    """
    features = {
        "label": tf.FixedLenFeature([2], tf.int64),  # terms are strings of varying lengths
        "img_raw": tf.FixedLenFeature([], tf.string)  # labels are 0 or 1
    }

    parsed_features = tf.parse_single_example(record, features)

    img_raw = parsed_features['img_raw']
    img_raw = tf.decode_raw(img_raw, tf.uint8)
    img_raw = tf.reshape(img_raw, [150, 150, 3])
    labels = parsed_features['label']

    return img_raw, labels


def my_input_fn(input_filenames, num_epochs=None, shuffle=False):
    # Same code as above; create a dataset and map features and labels.
    ds = tf.data.TFRecordDataset(input_filenames)
    ds = ds.map(_parse_function)

    if shuffle:
        ds = ds.shuffle(10000)

    # Our feature data is variable-length, so we pad and batch
    # each field of the dataset structure to whatever size is necessary.
    ds = ds.padded_batch(25, ds.output_shapes)

    ds = ds.repeat(num_epochs)

    # Return the next batch of data.
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels


def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=.1)
    return tf.Variable(initial)


def biases_variable(shape):
    initial = tf.constant(.1, shape=shape)
    return tf.Variable(initial)


def conv2d(x, w):
    return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


def accuracy(pred, ys):
    _bool = tf.equal(tf.argmax(pred, 1), tf.argmax(ys, 1))
    acc = tf.reduce_mean(tf.cast(_bool, tf.float32))
    return acc


train_path = my_input_fn('cats_and_dogs_train_onehot.tfrecords')
# train_path = my_input_fn('cats_and_dogs_validation_onehot.tfrecords')  验证集
xs = train_path[0]
xs = tf.cast(xs, tf.float32)
x_input = xs / 255
ys = train_path[1]
y_input = tf.cast(ys, tf.float32)

w_conv1 = weight_variable([3, 3, 3, 16])
b_conv1 = biases_variable([16])
h_conv1 = tf.nn.relu(conv2d(x_input, w_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

w_conv2 = weight_variable([3, 3, 16, 32])
b_conv2 = biases_variable([32])
h_conv2 = tf.nn.relu(conv2d(h_pool1, w_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

w_conv3 = weight_variable([3, 3, 32, 64])
b_conv3 = biases_variable([64])
h_conv3 = tf.nn.relu(conv2d(h_pool2, w_conv3) + b_conv3)
h_pool3 = max_pool_2x2(h_conv3)
h_pool3_flat = tf.reshape(h_pool3, [-1, 19 * 19 * 64])

w_fc1 = weight_variable([19 * 19 * 64, 512])
b_fc1 = biases_variable([512])
h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, w_fc1) + b_fc1)
h_fc1_dropout = tf.nn.dropout(h_fc1, 1)  # 验证以及使用的时候,我们不使用丢弃正则化, 丢弃正则化只用在我们训练网络的时候。
w_fc2 = weight_variable([512, 2])
b_fc2 = biases_variable([2])
pred = tf.nn.softmax(tf.matmul(h_fc1_dropout, w_fc2) + b_fc2)

sess = tf.Session()
saver = tf.train.Saver()
saver.restore(sess, 'my_net/simple_cnn1.ckpt')
lst = []
#  验证集较小  range(80)就跑完了整个验证集
for i in range(400):
    acc = accuracy(pred, y_input)
    lst.append(sess.run(acc))

print(lst)
print(np.mean(lst))

需要注意

丢弃正则化并不会在我们测试准确率以及使用网络中用到,其作为正则化的一种方法只会用在我们训练网络的时候。

(这里我偷懒了,训练用的网络将keep_prob设置为 placeholder更好,这样我们可以通过feed_dict的方法来控制其保留率。不过训练用的准确率我只是为了看网络是否收敛,所以其准确率意义不大。)

这里的batch_size是25, 我们验证集图片为2000张,  所以循环80次就可以得到所有图片的准确率。

 

 

你可能感兴趣的:(【学习笔记】卷积神经网络的过拟合)