【人工智能】实验二基于CNN的图像分类

        这个算是个记录吧,这个是黑龙江大学-马吉权老师的人工智能课,这个老师听说科研很好,但是他交的就是有点云里雾里,主要是概念,没有实际代码,令人感叹,老师主要是让大伙自学,不过没有期末考试,还挺好过的。

---------------------------------------------------------------------------------------------------------------------------------

实验的话就是把用cifar100把图片分割为32*32 60k,然后的话主要是分为4个步骤,即

  1. load datasets
  2. build network
  3. train
  4. test

然后整个分割内容是这样的

【人工智能】实验二基于CNN的图像分类_第1张图片

 这个是老师给的待完成代码

import  tensorflow as tf
from    tensorflow.keras import layers, optimizers, datasets, Sequential
import  os
 
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
tf.random.set_seed(2345)
 
conv_layers = [ # 5 units of conv + max pooling
    #Please build vgg13 network according to the demonstration in readme file, bellow is a instance for unit 1
    #Please complete other parts. (unit 2 to unit 5)
    # unit 1
    layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
 
    # unit 2
    #____________________________________________________________________________
    #____________________________________________________________________________
    #____________________________________________________________________________
 
    # unit 3
    #____________________________________________________________________________
    #____________________________________________________________________________
    #____________________________________________________________________________
 
    # unit 4
    #____________________________________________________________________________
    #____________________________________________________________________________
    #____________________________________________________________________________
 
    # unit 5
    #____________________________________________________________________________
    #____________________________________________________________________________
    #____________________________________________________________________________
 
]
 
 
 
def preprocess(x, y):
    # [0~1]
    x = tf.cast(x, dtype=tf.float32) / 255.
    y = tf.cast(y, dtype=tf.int32)
    return x,y
 
 
(x,y), (x_test, y_test) = datasets.cifar100.load_data()
y = tf.squeeze(y, axis=1)
y_test = tf.squeeze(y_test, axis=1)
print(x.shape, y.shape, x_test.shape, y_test.shape)
 
 
train_db = tf.data.Dataset.from_tensor_slices((x,y))
train_db = train_db.shuffle(1000).map(preprocess).batch(128)
 
test_db = tf.data.Dataset.from_tensor_slices((x_test,y_test))
test_db = test_db.map(preprocess).batch(64)
 
sample = next(iter(train_db))
print('sample:', sample[0].shape, sample[1].shape,
      tf.reduce_min(sample[0]), tf.reduce_max(sample[0]))
 
 
def main():
 
    # [b, 32, 32, 3] => [b, 1, 1, 512]
    conv_net = Sequential(conv_layers)
# Please add your code in blank
    fc_net = Sequential([
        layers.Dense(____________________________),
        layers.Dense(____________________________),
        layers.Dense(100, activation=None), #you can try other activation function to evaluate and compare
    ])
# Please add your code in blank
    conv_net.build(input_shape=[None, ____, _____, ___])
    fc_net.build(input_shape=[None, ____])
    optimizer = optimizers.Adam(lr=1e-4)
 
    # [1, 2] + [3, 4] => [1, 2, 3, 4]
    variables = conv_net.trainable_variables + fc_net.trainable_variables
 
    for epoch in range(50):
 
        for step, (x,y) in enumerate(train_db):
 
            with tf.GradientTape() as tape:
                # [b, 32, 32, 3] => [b, 1, 1, 512]
                out = conv_net(x)
                # flatten, => [b, 512]
                out = tf.reshape(out, [-1, 512])
                # [b, 512] => [b, 100]
                logits = fc_net(out)
                # [b] => [b, 100]
                y_onehot = tf.one_hot(y, depth=100)
                # compute loss
                loss = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
                loss = tf.reduce_mean(loss)
# Please add your code in blank
            grads = tape.gradient(___________, ______________)
            optimizer.apply_gradients(zip(__________, ___________))
 
            if step %100 == 0:
                print(epoch, step, 'loss:', float(loss))
 
 
 
        total_num = 0
        total_correct = 0
        for x,y in test_db:
 
            out = conv_net(x)
            out = tf.reshape(out, [-1, 512])
            logits = fc_net(out)
            prob = tf.nn.softmax(logits, axis=1)
            pred = tf.argmax(prob, axis=1)
            pred = tf.cast(pred, dtype=tf.int32)
 
            correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
            correct = tf.reduce_sum(correct)
 
            total_num += x.shape[0]
            total_correct += int(correct)
 
        acc = total_correct / total_num
        print(epoch, 'acc:', acc)
 
 
 
if __name__ == '__main__':
    main()

这个是补完后的代码

import tensorflow as tf
from tensorflow.keras import layers, optimizers, datasets, Sequential
import os
import time

print("tf.version", tf.__version__)
# print("tf is using GPU", tf.test.is_gpu_available())
print("tf is using GPU", tf.config.list_physical_devices(device_type='GPU'))
print(tf.config.experimental.list_physical_devices(device_type='CPU'))
start_time = time.perf_counter()

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.random.set_seed(2345)
#
# 二维卷积层layers.Conv2D:参数(
# 卷积核个数的变化filters
# 表示卷积核的大小kernel_size
# 是否对周围进行填充padding(==“same”四周会填充0,==“valid”不会填充)
# 激活函数activation
# )
# 池化层layers.MaxPool2D:参数(
# 池化核的尺寸pool_size,默认是2×2
# 移动步长strides,默认是池化核尺寸,即2
# 是否对周围进行填充padding(==“same”四周会填充0,==“valid”不会填充)
# )
#
# 首先我们知道Sequential这个容器,接受一个13层的list.我们先组成list;网络的第一部分。
conv_layers = [  # 5 units of conv + max pooling
    # unit 1
    layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),

    # unit 2
    layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),

    # unit 3
    layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),

    # unit 4
    layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),

    # unit 5
    layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same')

]


# 进行数据预处理,仅仅是类型的转换。    [0~1]
# cast数据类型转换函数 tf.cast(x, dtype, name=None) x:待转换的数据(张量),dtype:目标数据类型,name:可选参数,定义操作的名称
def preprocess(x, y):
    x = tf.cast(x, dtype=tf.float32) / 255.
    y = tf.cast(y, dtype=tf.int32)
    return x, y


# load datasets 根据要求使用的是cifar100
(x, y), (x_test, y_test) = datasets.cifar100.load_data()
y = tf.squeeze(y)  # 或者tf.squeeze(y, axis=1)把1维度的squeeze掉。
y_test = tf.squeeze(y_test)  # 或者tf.squeeze(y, axis=1)把1维度的squeeze掉。
print(x.shape, y.shape, x_test.shape, y_test.shape)

# from_tensor_slices 将输入的张量的第一个维度看做样本的个数,沿其第一个维度将tensor切片,得到的每个切片是一个样本数据。实现了输入张量的自动切片。
# batch size指的就是更新梯度中使用的样本数。如果把batch_size设置为数据集的长度,就成了批量梯度下降算法,batch_size设置为1就是随机梯度下降算法
# shuffle的功能为打乱dataset中的元素,它有一个参数buffersize,表示打乱时使用的buffer的大小,不设置会报错,buffer_size越大,打乱程度越大
# map接收一个函数,Dataset中的每个元素都会被当作这个函数的输入,并将函数返回值作为新的Dataset

# 训练集
train_db = tf.data.Dataset.from_tensor_slices((x, y))
train_db = train_db.shuffle(1000).map(preprocess).batch(128)
# 测试集
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_db = test_db.map(preprocess).batch(128)

# 测试一下sample的形状。
sample = next(iter(train_db))
print('sample:', sample[0].shape, sample[1].shape,
      tf.reduce_min(sample[0]), tf.reduce_max(sample[0]))  # 值范围为[0,1]


def main():
    # 把参数放进Sequential容器
    # 输入:[b, 32, 32, 3] => 输出[b, 1, 1, 512]
    conv_net = Sequential(conv_layers)
    # conv_net.build(input_shape=[None, 32, 32, 3])
    # x = tf.random.normal([4, 32, 32, 3])
    # out = conv_net(x)
    # print(out.shape)

    # 创建全连接层网络;网络的第二部分;第二部分的输入为第一部分的输出。
    fc_net = Sequential([
        layers.Dense(256, activation=tf.nn.relu),
        layers.Dense(128, activation=tf.nn.relu),
        layers.Dense(100, activation=None),
    ])

    # keras.optimizers.Adam 梯度学习率优化器,根据次数的增多,动态的减小学习率,以实现效率和效果的兼得
    # (如果学习率过大,很可能会越过最优值,反而如果学习率过小,优化的效率可能很低,导致过长的运算时间)

    # 这里其实把一个网络分成2个来写,
    conv_net.build(input_shape=[None, 32, 32, 3])
    fc_net.build(input_shape=[None, 512])
    # 创建一个优化器
    #1e-1 99999999
    optimizer = optimizers.Adam(learning_rate=0.001)
    conv_net.summary()
    fc_net.summary()

    # 下面的+表示拼接。python中的list列表拼接,2个列表变为一个。
    # 例如:[1, 2] + [3, 4] => [1, 2, 3, 4]
    # tf.get_variable和tf.Variable的默认选项是True,表示该变量是否可训练
    variables = conv_net.trainable_variables + fc_net.trainable_variables
    for epoch in range(50):

        for step, (x, y) in enumerate(train_db):
            with tf.GradientTape() as tape:
                # [b, 32, 32, 3] => [b, 1, 1, 512]
                out = conv_net(x)
                # 之后squeeze或者reshape为平坦的flatten;flatten, => [b, 512]
                out = tf.reshape(out, [-1, 512])
                # 送入全连接层输入,得到输出logits
                # [b, 512] => [b, 100]
                logits = fc_net(out)
                # [b] => [b, 100]转换为热编码。
                y_onehot = tf.one_hot(y, depth=100)
                # compute loss   结果维度[b]
                loss = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
                loss = tf.reduce_mean(loss)

            # 梯度求解
            grads = tape.gradient(loss, variables)
            # 梯度更新
            optimizer.apply_gradients(zip(grads, variables))

            if step % 100 == 0:
                print(epoch, step, 'loss:', float(loss))

        # 做测试
        total_num = 0
        total_correct = 0
        for x, y in test_db:
            out = conv_net(x)
            out = tf.reshape(out, [-1, 512])
            logits = fc_net(out)
            # 预测可能性。
            prob = tf.nn.softmax(logits, axis=1)
            pred = tf.argmax(prob, axis=1)
            pred = tf.cast(pred, dtype=tf.int32)

            # 拿到预测值pred和真实值比较。
            correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
            correct = tf.reduce_sum(correct)

            total_num += x.shape[0]
            total_correct += int(correct)  # 转换为numpy数据

        acc = total_correct / total_num
        print(epoch, 'acc:', acc)


if __name__ == '__main__':
    start=time.perf_counter()
    main()
    end = time.perf_counter()
    print('Running time: %s min %s s' % int((end - start)/60) %((end-start)%60) )

keras看自己环境所在位置决定是否要加TensorFlow为前缀

让后末尾记录一个折磨了自己挺久的东西

【人工智能】实验二基于CNN的图像分类_第2张图片

 这个报错是因为TensorFlow和keras版本不匹配 conda install keras就可以了让万能的conda帮你匹配一下。就是说改进的话看看怎么优化比较好。

实际上所有数据跑30次的话正确率和时间都很慢,属于是折磨了。

【人工智能】实验二基于CNN的图像分类_第3张图片

你可能感兴趣的:(人工智能)