

VGG网络是2014年ILSVRC图像分类比赛的第二名,将 Top-5错误率降到7.3%。。VGG网络结构简洁,可以当做图像分类算法的baseline进行修改开发。






# 卷积
def conv2d(x, W, strides=1, padding='SAME', name=None):
    return tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding=padding, name=name)

# 平均池化
def max_pool(x, size=2, strides=2, padding='SAME', name=None):
    return tf.nn.max_pool(x, ksize=[1, size, size, 1], strides=[1, strides, strides, 1], padding=padding, name=name)


def Vgg_net(input,keep_prob):
    with tf.variable_scope("vggnet")as scope:
        x_image = tf.reshape(input, [-1, 224, 224, 1])
        kernel1 = tf.Variable(tf.random_uniform([3, 3, 1, 64], -0.1, 0.1))
        conv1 = conv2d(x_image, kernel1)
        relu1 = tf.nn.relu(conv1)
        kernel2 = tf.Variable(tf.random_uniform([3, 3, 64, 64], -0.1, 0.1))
        con2 = conv2d(relu1, kernel2)
        relu2 = tf.nn.relu(con2)

        maxpool1 = max_pool(relu2)

        kernel3 = tf.Variable(tf.random_uniform([3, 3, 64, 128], -0.1, 0.1))
        conv3 = conv2d(maxpool1, kernel3)
        relu3 = tf.nn.relu(conv3)
        kernel4 = tf.Variable(tf.random_uniform([3, 3, 128, 128], -0.1, 0.1))
        conv4 = conv2d(relu3, kernel4)
        relu4 = tf.nn.relu(conv4)

        maxpool2 = max_pool(relu4)

        kernel5 = tf.Variable(tf.random_uniform([3, 3, 128, 256], -0.1, 0.1))
        conv5 = conv2d(maxpool2, kernel5)
        relu5 = tf.nn.relu(conv5)
        kernel6 = tf.Variable(tf.random_uniform([3, 3, 256, 256], -0.1, 0.1))
        conv6 = conv2d(relu5, kernel6)
        relu6 = tf.nn.relu(conv6)
        kernel7 = tf.Variable(tf.random_uniform([3, 3, 256, 256], -0.1, 0.1))
        conv7 = conv2d(relu6, kernel7)
        relu7 = tf.nn.relu(conv7)

        maxpool3 = max_pool(relu7)

        kernel8 = tf.Variable(tf.random_uniform([3, 3, 256, 512], -0.1, 0.1))
        conv8 = conv2d(maxpool3, kernel8)
        relu8 = tf.nn.relu(conv8)
        kernel9 = tf.Variable(tf.random_uniform([3, 3, 512, 512], -0.1, 0.1))
        conv9 = conv2d(relu8, kernel9)
        relu9 = tf.nn.relu(conv9)
        kernel10 = tf.Variable(tf.random_uniform([3, 3, 512, 512], -0.1, 0.1))
        conv10 = conv2d(relu9, kernel10)
        relu10 = tf.nn.relu(conv10)

        maxpool4 = max_pool(relu10)

        kernel11 = tf.Variable(tf.random_uniform([3, 3, 512, 512], -0.1, 0.1))
        conv11 = conv2d(maxpool4, kernel11)
        relu11 = tf.nn.relu(conv11)
        kernel12 = tf.Variable(tf.random_uniform([3, 3, 512, 512], -0.1, 0.1))
        conv12 = conv2d(relu11, kernel12)
        relu12 = tf.nn.relu(conv12)
        kernel13 = tf.Variable(tf.random_uniform([3, 3, 512, 512], -0.1, 0.1))
        conv13 = conv2d(relu12, kernel13)
        relu13 = tf.nn.relu(conv13)

        maxpool5 = max_pool(relu13)


        W1 = tf.Variable(tf.random_uniform([25088,4096], -0.1, 0.1))
        bias1 = tf.Variable(tf.random_normal([4096], mean=0.0, stddev=0.01))
        FC1 = tf.matmul(maxreshape, W1) + bias1
        relu14 = tf.nn.relu(FC1)
        out_1 = tf.nn.dropout(relu14, keep_prob=keep_prob)

        W2 = tf.Variable(tf.random_uniform([4096, 4096], -0.1, 0.1))
        bias2 = tf.Variable(tf.random_normal([4096], mean=0.0, stddev=0.01))
        FC2 = tf.matmul(out_1, W2) + bias2
        relu15 = tf.nn.relu(FC2)
        out_2 = tf.nn.dropout(relu15, keep_prob=keep_prob)

        W3 = tf.Variable(tf.random_uniform([4096, 10], -0.1, 0.1))
        bias3 = tf.Variable(tf.random_normal([10], mean=0.0, stddev=0.01))
        y_= tf.matmul(out_2, W3) + bias3        
        return y_

# 图像路径和标签
def get_filename(file_dir):
    original_pic = []
    labe1_pic = []
    for filename in os.listdir(file_dir):
        ls = file_dir + "\\" + filename
        for subfilename in os.listdir(ls):
            original_pic.append(ls + '\\' + subfilename)

    return np.asarray(original_pic), np.asarray(labe1_pic)

def shuffle_image(image, label):
    return image, label

Train_Path = r'.\train_image'
Test_Path = r'.\test_image'

epoches = 100000
a, b = get_filename(Train_Path)
#c = get_Test_filename(Test_Path)
shuffle_image(a, b)

ratio = 0.9
s = np.int(b.shape[0] * ratio)
s0 = b.shape[0] - s
a_train = a[:s]
b_train = b[:s]
a_val = a[s:]
b_val = b[s:]

input_image = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 1])
keep_prob = tf.placeholder(tf.float32)
data_x = np.zeros([BATCHSIZE, 100, 100, 1])
data_y = np.zeros([BATCHSIZE, 10])
y_ = Vgg_net(input_image,keep_prob)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_))
train_step = tf.train.AdamOptimizer(1e-3).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_true, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
if isTrain:
    with tf.Session() as sess:
        saver = tf.train.Saver()
        tf.summary.scalar('loss_gen', cross_entropy)
        tf.summary.scalar('accuracy', accuracy)
        writer = tf.summary.FileWriter('./my_graph/1', sess.graph)
        summary_op = tf.summary.merge_all()
        for epoch in range(1000000):
            randnum = epoch % int(s / BATCHSIZE)
            image_train = a_train[randnum * BATCHSIZE:(randnum + 1) * BATCHSIZE]
            label_train = b_train[randnum * BATCHSIZE:(randnum + 1) * BATCHSIZE]
            for j in range(BATCHSIZE):
                imc = cv2.imread(image_train[j])
                data_x[j, :, :, :] = imc
                data_y[j, :] = [(label_train[j] == var) for var in
                        ['000','001', '002', '003', '004', '005', '006', '007', '008', '009']]
            _, accuracy_p, summary_op_p = sess.run([train_step , accuracy , summary_op],
                                            feed_dict={input_image: data_x, y_true:data_y,keep_prob:0.5})
            print('epoch:%d,coorect:%f' % (epoch, accuracy_p))
            if epoch % 1000 == 0:
                randnum = np.random.randint(low=0, high=int(s0 / BATCHSIZE))
                image_val = a_val[randnum * BATCHSIZE:(randnum + 1) * BATCHSIZE]
                label_val = b_val[randnum * BATCHSIZE:(randnum + 1) * BATCHSIZE]
                for k in range(BATCHSIZE):
                    imc = cv2.imread(image_val[k])
                    data_x[k, :, :, :] = imc
                    data_y[k, :] = [(label_val[k] == var) for var in
                            ['000','001', '002', '003', '004', '005', '006', '007', '008', '009']]
                accuracy_p = sess.run(accuracy, feed_dict={input_image: data_x, y_true: data_y,keep_prob:1.0})
                print('val------------epoch:%d,coorect:%f' % (epoch,accuracy_p ))
            if epoch % 10 == 0:
                writer.add_summary(summary_op_p, epoch)
            if epoch % 10000 == 0:
                saver.save(sess, './myModel/model' + str(epoch / 10000) + '.cptk')
    with tf.Session() as sess:
        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint('myModel')
        saver.restore(sess, ckpt)

