关于神经网络深度学习的几个实验

目录

前言

一、MNIST-全连接

二、CNN卷积神经网络

三、RNN-文本分类

四、CGAN-MNIST-数字识别

五、ResNet-残差神经网络

六、Stack-AE AutoEncoder

七、机器翻译

八、目标检测

总结


前言

都是在paddlepaddle上的aistudio上完成的,感觉很有意义,没有嚼完记录下来以后想深入学习     T T



一、MNIST-全连接

#导入相关依赖(包)
import paddle
import numpy as np
import matplotlib.pyplot as plt
import paddle.vision.transforms as T

#进行数据的加载以及预处理
transform = T.Normalize(mean = [127.5], std = [127.5]) # mean 和 std参数是用于将数据归一化到[-1, 1]的

#数据集的训练
train_dataset = paddle.vision.datasets.MNIST(mode = 'train', transform = transform)

#数据集的评估
eval_dataset = paddle.vision.datasets.MNIST(mode = 'test', transform = transform)

#打印训练集样本量和验证集样本量
print('训练集样本量:{},验证集样本量{}'.format(len(train_dataset),len(eval_dataset)))

#模型的选择与封装(784 - 512 - 10)
network = paddle.nn.Sequential(
    paddle.nn.Flatten(),       #拉平,将(28,28)=>(784)
    paddle.nn.Linear(784,512), #隐藏层:线性变换层
    paddle.nn.ReLU(),          #激活函数,这里使用的是Relu函数,激活函数通常还可以使用sigmoid
    paddle.nn.Linear(512,10)   #输出层
)

model = paddle.Model(network)

model.prepare (
    paddle.optimizer.Lamb(learning_rate = 0.001, parameters = network.parameters()), # 这里设置的学习率为0.001
    paddle.nn.CrossEntropyLoss(),
    paddle.metric.Accuracy()
)

#进行模型的训练
model.fit(
    train_dataset,#训练数据集
    eval_dataset,#评估数据集
    epochs = 5,#训练的总轮次
    batch_size = 64,#训练使用的批大小
    verbose = 1 )#日志展示形式

#评估模型
result = model.evaluate(eval_dataset, verbose = 1)
print(result)

#预测与可视化
result = model.predict(eval_dataset)

#定义画图方法
def show_img(img,predict):
    plt.figure
    plt.title('predict:{}'.format(predict))
    plt.imshow(img.reshape([28,28]),cmap=plt.cm.binary)
    plt.show()

#抽样显示
indexs = [2,15,38,211] #定义下标
for idx in indexs:
    show_img(eval_dataset[idx][0], np.argmax(result[0][idx]))








二、CNN卷积神经网络

# 进行文件的解压
!unzip -d work data/data75768/food-11.zip
!rm -rf work/__MACOSX

# 一、导入相关库
import os
import paddle
import paddle.vision.transforms as T
import numpy as np
from PIL import Image
import paddle
import paddle.nn.functional as F
import cv2
from sklearn.utils import shuffle

#二、读取数据
data_path = 'work/food-11/'  # 设置初始文件地址
character_folders = os.listdir(data_path)  # 查看地址下文件夹

# 每次运行前删除txt,重新新建标签列表
if(os.path.exists('./training_set.txt')):  # 判断有误文件
    os.remove('./training_set.txt')  # 删除文件
if(os.path.exists('./validation_set.txt')):
    os.remove('./validation_set.txt')
if(os.path.exists('./testing_set.txt')):
    os.remove('./testing_set.txt')

for character_folder in character_folders:  #循环文件夹列表
    with open(f'./{character_folder}_set.txt', 'a') as f_train:  # 新建文档以追加的形式写入
        character_imgs = os.listdir(os.path.join(data_path,character_folder))  # 读取文件夹下面的内容
        count = 0
        if character_folder in 'testing':  # 检查是否是测试集
            for img in character_imgs:  # 循环列表
                f_train.write(os.path.join(data_path,character_folder,img) + '\n')  # 把地址写入文档
                count += 1
            print(character_folder,count)
        else:
            for img in character_imgs:  # 检查是否是训练集和测试集
                f_train.write(os.path.join(data_path,character_folder,img) + '\t' + img[0:img.rfind('_', 1)] + '\n')  # 写入地址及标签
                count += 1
            print(character_folder,count)
#三、数据预处理
#下面使用paddle.vision.transforms.Compose做数据预处理,主要是这几个部分:
#1、以RGB格式加载图片 
#2、将图片resize,从224x224变成100x100 
#3、进行transpose操作,从HWC格式转变成CHW格式 
#4、将图片的所有像素值进行除以255进行归一化 
#5、对各通道进行减均值、除标准差

img_h, img_w = 100, 100   #进行参数的设置
means, stdevs = [], []
img_list = [] # 开辟图片列表空间
imgs_path = 'work/food-11/training' # 图片路径
imgs_path_list = os.listdir(imgs_path)
len_ = len(imgs_path_list)

i = 0
for item in imgs_path_list:
    img = cv2.imread(os.path.join(imgs_path,item))
    img = cv2.resize(img,(img_w,img_h))
    img = img[:, :, :, np.newaxis]
    img_list.append(img)
    i += 1
    # print(i,'/',len_)

imgs_path = 'work/food-11/testing'
imgs_path_list = os.listdir(imgs_path)
 
len_ = len(imgs_path_list)
i = 0
for item in imgs_path_list:
    img = cv2.imread(os.path.join(imgs_path,item))
    img = cv2.resize(img,(img_w,img_h))
    img = img[:, :, :, np.newaxis]
    img_list.append(img)
    i += 1

imgs = np.concatenate(img_list, axis=3)
imgs = imgs.astype(np.float32) / 255.

for i in range(3):
    pixels = imgs[:, :, i, :].ravel()  # 拉成一行
    means.append(np.mean(pixels))
    stdevs.append(np.std(pixels))

# BGR --> RGB , CV读取的需要转换,PIL读取的不用转换
means.reverse()
stdevs.reverse()

print("normMean = {}".format(means))
print("normStd = {}".format(stdevs))

# 只需要执行一次代码记录住数据即可

# normMean = [0.5560434, 0.4515875, 0.34473255]

# normStd = [0.27080873, 0.2738704, 0.280732]


normMean = [0.5560434, 0.4515875, 0.34473255]

normStd = [0.27080873, 0.2738704, 0.280732]
# 定义数据预处理
data_transforms = T.Compose([
    T.Resize(size=(100, 100)),
    T.RandomHorizontalFlip(100),
    T.RandomVerticalFlip(100),
    T.RandomRotation(90),
    T.CenterCrop(100),
    T.Transpose(),    # HWC -> CHW

    # 使用Batch Norm
    T.Normalize(
        mean=[0.5560434, 0.4515875, 0.34473255],      #归一化 上个模块所求的均值与标准差 
        std=[0.27080873, 0.2738704, 0.280732],
        to_rgb=True)    
        #计算过程:output[channel] = (input[channel] - mean[channel]) / std[channel]
])
#四、继承dataset类

class FoodDataset(paddle.io.Dataset):

    '''数据集类的定义

    '''
    def __init__(self,mode='training_set'):  #进行初始化
        '''
        初始化函数
        '''
        self.data=[]
        with open(f'{mode}_set.txt')as f:
            for line in f.readlines():
                info=line.strip().split('\t')
                if len(info) > 0:
                    self.data.append([info[0].strip(),info[1].strip() ])

    def __getitem__(self,index):   #对图片数据进行获取

        '''
        读取图片,对图片进行归一化处理,返回图片和标签
        '''

        image_file,label=self.data[index] #获取数据

        img=Image.open(image_file).convert('RGB') #读取图片

        return data_transforms(img).astype('float32'),np.array(label,dtype='int64')

    def __len__(self):

        '''获取样本'''

        return len(self.data)

train_dataset=FoodDataset(mode='training')    

train_loader=paddle.io.DataLoader(train_dataset,places=paddle.CPUPlace(),batch_size=64,shuffle=True,num_workers=0)  #定义数据集和数据集的专用工具

eval_dataset=FoodDataset(mode='training')

eval_loader=paddle.io.DataLoader(eval_dataset,places=paddle.CPUPlace(),batch_size=64,shuffle=True,num_workers=0)
#五、搭建模型

#继承paddle.nn.Layer类,用于搭建模型

class MyCNN(paddle.nn.Layer):

    def __init__(self):

        super(MyCNN,self).__init__()

        self.conv0 = paddle.nn.Conv2D(in_channels=3,out_channels=20,kernel_size=5,padding=0) #二维卷积层
        self.pool0 = paddle.nn.MaxPool2D(kernel_size=2,stride=2)   #最大池化层
        self._batch_norm_0 = paddle.nn.BatchNorm2D(num_features =20)    #归一层


        self.conv1 = paddle.nn.Conv2D(in_channels=20,out_channels=50,kernel_size=5,padding=0) #二维卷积层
        self.pool1 = paddle.nn.MaxPool2D(kernel_size=2,stride=2)   #最大池化层
        self._batch_norm_1 = paddle.nn.BatchNorm2D(num_features =50)    #归一层


        self.conv2 = paddle.nn.Conv2D(in_channels=50,out_channels=50,kernel_size=5,padding=0) #二维卷积层
        self.pool2 = paddle.nn.MaxPool2D(kernel_size=2,stride=2)   #最大池化层

        self.fc1 = paddle.nn.Linear(in_features=4050,out_features=218)# 线性层 - 全连接层
        self.fc2 = paddle.nn.Linear(in_features=218,out_features=100)
        self.fc3 = paddle.nn.Linear(in_features=100,out_features=11)


    def forward(self,input):

        #将输入数据的样子改变成[1,3,100,100]
        input = paddle.reshape(input,shape=[-1,3,100,100]) #转换维度
        #print(input.shape)

        x=self.conv0(input) #数据输入卷积层

        x=F.relu(x) #激活层
        x=self.pool0(x) #池化层
        x=self._batch_norm_0(x) #归一层

        x=self.conv1(x)
        x=F.relu(x)
        x=self.pool1(x)
        x=self._batch_norm_1(x)

        x=self.conv2(x)
        x=F.relu(x)
        x=self.pool2(x)
        x=paddle.reshape(x,[x.shape[0],-1])
        #print(x,shape)
        x=self.fc1(x) #线性层 - 全连接层
        x=F.relu(x)
        x=self.fc2(x)
        x=F.relu(x)
        x=self.fc3(x)

        #y=F.softmax(x) #分类器

        return x


network=MyCNN() #模拟实例化

#模拟训练相关配置,准备损失计算方法,优化器和精度计算方法

#定义优化器
scheduler=paddle.optimizer.lr.LinearWarmup(
        learning_rate=0.001,warmup_steps=100,start_lr=0,end_lr=0.001,verbose=True)
optim=paddle.optimizer.SGD(learning_rate=scheduler,parameters=model.parameters())

#配置模型
model.prepare(
    optim,
    paddle.nn.CrossEntropyLoss(),
    paddle.metric.Accuracy()
    )

visualdl=paddle.callbacks.VisualDL(log_dir='visualdl_log')
#模型训练与评估

model.fit( 

    train_loader, #训练数据集
    eval_loader, #评估数据集
    epochs=5, #训练使用的批大小
    verbose=1, #日志展示形式
    callbacks=[visualdl]) #设置可视化

#模型评估
model.evaluate(eval_dataset,batch_size=128,verbose=1)
#七、保存模型
model.save('finetuning/mnist')  # 保存模型
#八、测试

def opening():  # 读取图片函数
    with open(f'testing_set.txt') as f:  #读取文件夹
        test_img = []
        txt =  []

        for line in f.readlines():  # 循环读取每一行
            img = Image.open(line[:-1])  # 打开图片
            img = data_transforms(img).astype('float32')
            txt.append(line[:-1])  # 生成列表
            test_img.append(img)
        return txt,test_img

img_path, img = opening()  # 读取列表
from PIL import Image
model_state_dict = paddle.load('finetuning/mnist.pdparams') # 读取模型

model = MyCNN()   # 实例化模型
model.set_state_dict(model_state_dict)
model.eval()

site = 10  # 读取图片位置
ceshi = model(paddle.to_tensor(img[site]))  # 测试
print('预测的结果为:', np.argmax(ceshi.numpy()))  # 获取值

value = ["面包","乳制品","甜点","鸡蛋","油炸食品","肉类","面条/意大利面","米饭","海鲜","汤","蔬菜/水果"]
print('           ', value[np.argmax(ceshi.numpy())])
Image.open(img_path[site])  # 显示图片








三、RNN-文本分类

代码如下(示例):

!unzip -oq /home/aistudio/data/data113551/RumorClassification-GRU.zip
!rm -rf work/__MACOSX
#一、导入相关的包
import numpy as np
import paddle
from paddle.io import Dataset, DataLoader
import pandas as pd
import paddle.nn as nn

#二、加载、处理数据集

# 读取数据集(这里的路径需要进行一定的更改)
all_data = pd.read_csv("RumorClassification-GRU/all_data.tsv", sep="\t")

# 转成字符串
all_str = all_data["text"].values.tolist()

dict_set = set() # 保证每个字符只有唯一的对应数字

for content in all_str:
    for s in content:
        dict_set.add(s)

# 添加未知字符
dict_set.add("")

# 把元组转换成字典,一个字对应一个数字(用于计算机的处理)
dict_list = []
i = 0
for s in dict_set:
    dict_list.append([s, i])
    i += 1
dict_txt = dict(dict_list)

# 字典保存到本地
with open("dict.txt", 'w', encoding='utf-8') as f:
    f.write(str(dict_txt))

# 获取字典的长度
def get_dict_len(dict_path):
    with open(dict_path, 'r', encoding='utf-8') as f:
        line = eval(f.readlines()[0])
    return len(line.keys())

print(get_dict_len("dict.txt"))

#三、划分训练集、验证集以及测试集

all_data_list = all_data.values.tolist() # 首先将数据结果转成list
train_length = len(all_data) // 10 * 7  # 测试集的长度
dev_length = len(all_data) // 10 * 2 # 验证集的长度

train_data = [] # 开辟训练集空间
dev_data = []  # 开辟测试集空间
test_data = [] # 开辟测试集空间

# 进行训练集的数据填充
for i in range(train_length):
    text = ""
    for s in all_data_list[i][1]:
        text = text + str(dict_txt[s]) + ","
    text = text[:-1]
    train_data.append([text, all_data_list[i][0]])

# 进行验证集数据填充
for i in range(train_length, train_length+dev_length):
    text = ""
    for s in all_data_list[i][1]:
        text = text + str(dict_txt[s]) + ","
    text = text[:-1]
    dev_data.append([text, all_data_list[i][0]])

# 进行测试集数据填充
for i in range(train_length+dev_length, len(all_data)):
    text = ""
    for s in all_data_list[i][1]:
        text = text + str(dict_txt[s]) + ","
    text = text[:-1]
    test_data.append([text, all_data_list[i][0]])

print(len(train_data))
print(len(dev_data))
print(len(test_data))

# DataFram是一种二维表
df_train = pd.DataFrame(columns=["text", "label"], data=train_data)
df_dev = pd.DataFrame(columns=["text", "label"], data=dev_data)
df_test = pd.DataFrame(columns=["text", "label"], data=test_data)

# 这里应该是转成csv文件的作用
df_train.to_csv("train_data.csv", index=False)
df_dev.to_csv("dev_data.csv", index=False)
df_test.to_csv("test_data.csv", index=False)

#四、继承Dataset类
class MyDataset(Dataset):
    """
    步骤一:继承paddle.io.Dataset类
    """
    def __init__(self, mode='train'):
        """
        步骤二:实现构造函数,定义数据读取方式,划分训练和测试数据集
        """
        super(MyDataset, self).__init__()
        self.label = True
        if mode == 'train':
            text = pd.read_csv("train_data.csv")["text"].values.tolist()
            label = pd.read_csv("train_data.csv")["label"].values.tolist()
            self.data = []
            for i in range(len(text)):
                self.data.append([])
                self.data[-1].append(np.array([int(i) for i in text[i].split(",")]))
                self.data[-1][0] = self.data[-1][0][:256].astype('int64')if len(self.data[-1][0])>=256 else np.concatenate([self.data[-1][0], np.array([dict_txt[""]]*(256-len(self.data[-1][0])))]).astype('int64')
                self.data[-1].append(np.array(int(label[i])).astype('int64'))
        elif mode == 'dev':
            text = pd.read_csv("dev_data.csv")["text"].values.tolist()
            label = pd.read_csv("dev_data.csv")["label"].values.tolist()
            self.data = []
            for i in range(len(text)):
                self.data.append([])
                self.data[-1].append(np.array([int(i) for i in text[i].split(",")]))
                self.data[-1][0] = self.data[-1][0][:256].astype('int64')if len(self.data[-1][0])>=256 else np.concatenate([self.data[-1][0], np.array([dict_txt[""]]*(256-len(self.data[-1][0])))]).astype('int64')
                self.data[-1].append(np.array(int(label[i])).astype('int64'))
        else:
            text = pd.read_csv("test_data.csv")["text"].values.tolist()
            label = pd.read_csv("test_data.csv")["label"].values.tolist()
            self.data = []
            for i in range(len(text)):
                self.data.append([])
                self.data[-1].append(np.array([int(i) for i in text[i].split(",")]))
                self.data[-1][0] = self.data[-1][0][:256].astype('int64')if len(self.data[-1][0])>=256 else np.concatenate([self.data[-1][0], np.array([dict_txt[""]]*(256-len(self.data[-1][0])))]).astype('int64')
                self.data[-1].append(np.array(int(label[i])).astype('int64'))
            self.label = False
    def __getitem__(self, index):
        """
        步骤三:实现__getitem__方法,定义指定index时如何获取数据,并返回单条数据(训练数据,对应的标签)
        """
        text_ =  self.data[index][0]
        label_ = self.data[index][1]

        if self.label:
            return text_, label_
        else:
            return text_

    def __len__(self):
        """
        步骤四:实现__len__方法,返回数据集总数目
        """
        return len(self.data)

train_data = MyDataset(mode='train')
dev_data = MyDataset(mode='dev')
test_data = MyDataset(mode='test')

BATCH_SIZE = 128

train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
dev_loader = DataLoader(dev_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True)

# 五 配置封装模型
inputs_dim = get_dict_len("dict.txt")

class myGRU(nn.Layer):
    def __init__(self):
        super(myGRU, self).__init__()
        self.embedding = nn.Embedding(inputs_dim, 256)
        self.gru = nn.GRU(256, 256, num_layers=2,
                         direction='bidirectional', dropout=0.5)
        self.linear = nn.Linear(in_features=256*2, out_features=2)
        self.dropout = nn.Dropout(0.5)


    def forward(self, inputs):

        emb = self.dropout(self.embedding(inputs))

        output, hidden = self.gru(emb)
        #把前向的 hidden与后向的 hidden合并在一起
        hidden = paddle.concat((hidden[-2,:,:], hidden[-1,:,:]), axis = 1)
        hidden = self.dropout(hidden)

        return self.linear(hidden)


GRU_model = paddle.Model(myGRU())

GRU_model.prepare(paddle.optimizer.Adam(learning_rate=0.001,parameters=GRU_model.parameters()),
                                        paddle.nn.CrossEntropyLoss(),
                                        paddle.metric.Accuracy())


# 六、训练模型
GRU_model.fit(train_loader,
             dev_loader,
             epochs=10,
             batch_size=BATCH_SIZE,
             verbose=1,
             save_dir="work/GRU")

# 模型预测
result = GRU_model.predict(test_loader)

四、CGAN-MNIST-数字识别

# 导入数据集 !unzip -d work data/data114246/output.zip
# 首先导入相关依赖库
## 定义数据读取
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import Conv2D, Pool2D, Linear, Conv2DTranspose
import numpy as np
import matplotlib.pyplot as plt

# 噪声维度
Z_DIM = 100
BATCH_SIZE = 128
# BATCH_SIZE = 3 # debug

# 噪声生成,通过由噪声来生成假的图片数据输入。
def z_reader():
    while True:
        yield np.random.normal(0.0, 1.0, (Z_DIM, 1, 1)).astype('float32')

# 生成真实图片reader
mnist_generator = paddle.batch(
    paddle.reader.shuffle(paddle.dataset.mnist.train(), 30000), batch_size=BATCH_SIZE)

# 生成假图片的reader
z_generator = paddle.batch(z_reader, batch_size=BATCH_SIZE)

## import matplotlib.pyplot as plt
%matplotlib inline

data_tmp = next(mnist_generator())
print('一个batch图片数据的形状:batch_size =', len(data_tmp), ', data_shape =', data_tmp[0][0].shape, ', num = ', data_tmp[0][1])

plt.imshow(data_tmp[0][0].reshape(28, 28))
plt.show()

z_tmp = next(z_generator())
print('一个batch噪声z的形状:batch_size =', len(z_tmp), ', data_shape =', z_tmp[0].shape)

## 定义CGAN
# 定义特征图拼接
def conv_concatenate(x, y):
    # print('---', x.shape, y.shape)
    # y = fluid.dygraph.to_variable(y.numpy().astype('float32'))
    if len(x.shape) == 2: # 给全连接层输出的特征图拼接噪声
        y = fluid.layers.reshape(y, shape=[x.shape[0], 1])
        ones = fluid.layers.fill_constant(y.shape, dtype='float32', value=1.0)
    elif len(x.shape) == 4: # 给卷积层输出的特征图拼接噪声
        y = fluid.layers.reshape(y, shape=[x.shape[0], 1, 1, 1])
        ones = fluid.layers.fill_constant(x.shape, dtype='float32', value=1.0)
    x = fluid.layers.concat([x, ones * y], axis=1)
    # print(ones.shape, x.shape, y.shape, '---')

    return x

# 通过重写Layer,生成不同功能的网络
# 定义生成器(生成网络)
class G(fluid.dygraph.Layer):
    def __init__(self, name_scope):
        super(G, self).__init__(name_scope)
        name_scope = self.full_name()
        # 第一组全连接和BN层
        self.fc1 = Linear(input_dim=100+1, output_dim=1024)
        self.bn1 = fluid.dygraph.BatchNorm(num_channels=1024, act='relu')
        # 第二组全连接和BN层
        self.fc2 = Linear(input_dim=1024+1, output_dim=128*7*7)
        self.bn2 = fluid.dygraph.BatchNorm(num_channels=128*7*7, act='relu')
        # 第一组转置卷积运算
        self.convtrans1 = Conv2DTranspose(256, 64, 4, stride=2, padding=1)
        self.bn3 = fluid.dygraph.BatchNorm(64, act='relu')
        # 第二组转置卷积运算
        self.convtrans2 = Conv2DTranspose(128, 1, 4, stride=2, padding=1, act='relu')
        
    def forward(self, z, label):
        z = fluid.layers.reshape(z, shape=[-1, 100])
        z = conv_concatenate(z, label) # 拼接噪声和label
        y = self.fc1(z)
        y = self.bn1(y)
        y = conv_concatenate(y, label) # 拼接特征图和label
        y = self.fc2(y)
        y = self.bn2(y)
        y = fluid.layers.reshape(y, shape=[-1, 128, 7, 7])
        y = conv_concatenate(y, label) # 拼接特征图和label
        y = self.convtrans1(y)
        #print('G第一次transpose:',y.shape)
        y = self.bn3(y)
        y = conv_concatenate(y, label) # 拼接特征图和label
        y = self.convtrans2(y)
        #print('G第2次transpose:',y.shape)
        return y

# 定义判别器(分类网络)
# 定义判别器
class D(fluid.dygraph.Layer):
    def __init__(self, name_scope):
        super(D, self).__init__(name_scope)
        name_scope = self.full_name()
        # 第一组卷积池化
        self.conv1 = Conv2D(num_channels=2, num_filters=64, filter_size=3)
        self.bn1 = fluid.dygraph.BatchNorm(num_channels=64, act='leaky_relu')
        self.pool1 = Pool2D(pool_size=2, pool_stride=2)
        # 第二组卷积池化
        self.conv2 = Conv2D(num_channels=128, num_filters=128, filter_size=3)
        self.bn2 = fluid.dygraph.BatchNorm(num_channels=128, act='leaky_relu')
        self.pool2 = Pool2D(pool_size=2, pool_stride=2)
        # 全连接输出层
        self.fc1 = Linear(input_dim=128*5*5+1, output_dim=1024)
        self.bnfc1 = fluid.dygraph.BatchNorm(num_channels=1024, act='leaky_relu')
        self.fc2 = Linear(input_dim=1024+1, output_dim=1)

    def forward(self, img, label):
        y = conv_concatenate(img, label) # 拼接输入图片和label
        y = self.conv1(y)
        y = self.bn1(y)
        y = self.pool1(y)
        y = conv_concatenate(y, label) # 拼接特征图和label
        y = self.conv2(y)
        y = self.bn2(y)
        y = self.pool2(y)
        y = fluid.layers.reshape(y, shape=[-1, 128*5*5])
        y = conv_concatenate(y, label) # 拼接特征图和label
        y = self.fc1(y)
        #print('D第一次transpose:',y.shape)
        y = self.bnfc1(y)
        y = conv_concatenate(y, label) # 拼接特征图和label
        y = self.fc2(y)
        #print('D第2次transpose:',y.shape)

        return y

## 测试生成网络G和判别网络D
with fluid.dygraph.guard():
    g_tmp = G('G')
    l_tmp = fluid.dygraph.to_variable(np.array([x[1] for x in data_tmp]).astype('float32'))
    tmp_g = g_tmp(fluid.dygraph.to_variable(np.array(z_tmp)), l_tmp).numpy()
    print('生成器G生成图片数据的形状:', tmp_g.shape)
    plt.imshow(tmp_g[0][0])
    plt.show()

    d_tmp = D('D')
    tmp_d = d_tmp(fluid.dygraph.to_variable(tmp_g), l_tmp).numpy()
    print('判别器D判别生成的图片的概率数据形状:', tmp_d.shape)

    
# 定义显示图片的函数,构建一个18*n大小(n=batch_size/16)的图片阵列,把预测的图片打印到note中。
# 辅助函数
## 定义显示图片的函数,构建一个18*n大小(n=batch_size/16)的图片阵列,把预测的图片打印到note中。
## import matplotlib.pyplot as plt
%matplotlib inline

def show_image_grid(images, batch_size=128, pass_id=None):
    fig = plt.figure(figsize=(8, batch_size/32))
    fig.suptitle("Pass {}".format(pass_id))
    gs = plt.GridSpec(int(batch_size/16), 16)
    gs.update(wspace=0.05, hspace=0.05)

    for i, image in enumerate(images):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(image[0], cmap='Greys_r')
    
    plt.show()

show_image_grid(tmp_g, BATCH_SIZE)

## 训练CGAN
from visualdl import LogWriter
import time
import random

def train(mnist_generator, epoch_num=10, batch_size=128, use_gpu=True, load_model=False):
    # with fluid.dygraph.guard():
    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        # 模型存储路径(这里要修改)
        model_path = './work/home/aistudio/output/'

        d = D('D')
        d.train()
        g = G('G')
        g.train()

        # 创建优化方法
        g_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=2e-4, parameter_list=g.parameters())
        d_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=2e-4, parameter_list=d.parameters())
        
        # 读取上次保存的模型
        if load_model == True:
            g_para, g_opt = fluid.load_dygraph(model_path+'g')
            d_para, d_opt = fluid.load_dygraph(model_path+'d')
            g.load_dict(g_para)
            g_optimizer.set_dict(g_opt)
            d.load_dict(d_para)
            d_optimizer.set_dict(d_opt)

        iteration_num = 0
        print('Start time :', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), 'start step:', iteration_num + 1)
        for epoch in range(epoch_num):
            for i, real_data in enumerate(mnist_generator()):
                # 丢弃不满整个batch_size的数据
                if(len(real_data) != BATCH_SIZE):
                    continue
                
                iteration_num += 1
                
                '''
                判别器d通过最小化输入真实图片时判别器d的输出与真值标签ones的交叉熵损失,来优化判别器的参数,
                以增加判别器d识别真实图片real_image为真值标签ones的概率。
                '''
                # 将MNIST数据集里的图片读入real_image,将真值标签ones用数字1初始化
                ri = np.array([x[0] for x in real_data]).reshape(-1, 1, 28, 28)
                rl = np.array([x[1] for x in real_data]).astype('float32')
                real_image = fluid.dygraph.to_variable(np.array(ri))
                real_label = fluid.dygraph.to_variable(rl)
                ones = fluid.dygraph.to_variable(np.ones([len(real_image), 1]).astype('float32'))
                # 计算判别器d判断真实图片的概率
                p_real = d(real_image, real_label)
                # 计算判别真图片为真的损失
                # real_cost = fluid.layers.sigmoid_cross_entropy_with_logits(p_real, ones)
                real_cost = (p_real - ones) ** 2 #lsgan
                real_avg_cost = fluid.layers.mean(real_cost)

                '''
                判别器d通过最小化输入生成器g生成的假图片g(z)时判别器的输出与假值标签zeros的交叉熵损失,
                来优化判别器d的参数,以增加判别器d识别生成器g生成的假图片g(z)为假值标签zeros的概率。
                '''
                # 创建高斯分布的噪声z,将假值标签zeros初始化为0
                z = next(z_generator())
                z = fluid.dygraph.to_variable(np.array(z))
                zeros = fluid.dygraph.to_variable(np.zeros([len(real_image), 1]).astype('float32'))
                # 判别器d判断生成器g生成的假图片的概率
                p_fake = d(g(z, real_label), real_label)
                # fl = rl
                # for i in range(batch_size):
                #     fl[i] = random.randint(0, 9)
                # fake_label = fluid.dygraph.to_variable(fl)
                # p_fake = d(g(z, fake_label), fake_label)
                # 计算判别生成器g生成的假图片为假的损失
                # fake_cost = fluid.layers.sigmoid_cross_entropy_with_logits(p_fake, zeros)
                fake_cost = (p_fake - zeros) ** 2 #lsgan
                fake_avg_cost = fluid.layers.mean(fake_cost)
                
                # 更新判别器d的参数
                d_loss = real_avg_cost + fake_avg_cost
                d_loss.backward()
                d_optimizer.minimize(d_loss)
                d.clear_gradients()

                '''
                生成器g通过最小化判别器d判别生成器生成的假图片g(z)为真的概率d(fake)与真值标签ones的交叉熵损失,
                来优化生成器g的参数,以增加生成器g使判别器d判别其生成的假图片g(z)为真值标签ones的概率。
                '''
                # 生成器用输入的高斯噪声z生成假图片
                fake = g(z, real_label)
                # 计算判别器d判断生成器g生成的假图片的概率
                p_fake = d(fake, real_label)
                # 使用判别器d判断生成器g生成的假图片的概率与真值ones的交叉熵计算损失
                # g_cost = fluid.layers.sigmoid_cross_entropy_with_logits(p_fake, ones)
                g_cost = (p_fake - ones) ** 2 #lsgan
                g_avg_cost = fluid.layers.mean(g_cost)
                # 反向传播更新生成器g的参数
                g_avg_cost.backward()
                g_optimizer.minimize(g_avg_cost)
                g.clear_gradients()
                
                if(iteration_num % 100 == 0):
                    print('epoch =', epoch, ', batch =', i, ', d_loss =', d_loss.numpy(), 'g_loss =', g_avg_cost.numpy())
                    show_image_grid(fake.numpy(), BATCH_SIZE, epoch)

        print('End time :', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), 'End Step:', iteration_num)
        # 存储模型
        fluid.save_dygraph(g.state_dict(), model_path+'g')
        fluid.save_dygraph(g_optimizer.state_dict(), model_path+'g')
        fluid.save_dygraph(d.state_dict(), model_path+'d')
        fluid.save_dygraph(d_optimizer.state_dict(), model_path+'d')

# train(mnist_generator, epoch_num=1, batch_size=BATCH_SIZE, use_gpu=True)

train(mnist_generator, epoch_num=1, batch_size=BATCH_SIZE, use_gpu=True, load_model=True)
# train(mnist_generator, epoch_num=20, batch_size=BATCH_SIZE, use_gpu=True, load_model=True) #11m
# train(mnist_generator, epoch_num=800, batch_size=BATCH_SIZE, use_gpu=True, load_model=True) #440m



## 使用CGAN分别生成数字0~9
def infer(batch_size=128, num=0, use_gpu=True):
    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        # 模型存储路径(这里要修改)
        model_path = './work/home/aistudio/output/'

        g = G('G')
        g.eval()

        
        # 读取上次保存的模型
        g_para, g_opt = fluid.load_dygraph(model_path+'g')
        g.load_dict(g_para)
        # g_optimizer.set_dict(g_opt)

        z = next(z_generator())
        z = fluid.dygraph.to_variable(np.array(z))
        
        label = fluid.layers.fill_constant([batch_size], dtype='float32', value=float(num))
        fake = g(z, label)
        show_image_grid(fake.numpy(), batch_size, -1)                

for i in range(10):
    infer(batch_size=BATCH_SIZE, num=i)

 五、ResNet-残差神经网络

数据集: 

关于神经网络深度学习的几个实验_第1张图片 


# 导入相关的库
import paddle
import paddle.nn.functional as F
import numpy as np
from paddle.vision.transforms import Compose, Resize, Transpose, Normalize

# 数据的加载和预处理
t = Compose([Resize(size=96),Normalize(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], data_format='HWC'),Transpose()])

# 训练数据集
cifar10_train = paddle.vision.datasets.cifar.Cifar10(mode='train', transform=t, backend='cv2')

# 测试数据集
cifar10_test = paddle.vision.datasets.cifar.Cifar10(mode="test", transform=t, backend='cv2')

#
print('训练集样本量: {},验证集样本量: {}'.format(len(cifar10_train), len(cifar10_test)))

# 残差网络网络中Residual的构建
class Residual(paddle.nn.Layer):
    def __init__(self, in_channel, out_channel, use_conv1x1=False, stride=1):
        super(Residual, self).__init__()
        self.conv1 = paddle.nn.Conv2D(in_channel, out_channel, kernel_size=3, padding=1, stride=stride)
        self.conv2 = paddle.nn.Conv2D(out_channel, out_channel, kernel_size=3, padding=1)
        if use_conv1x1:         #使用1x1卷积核
            self.conv3 = paddle.nn.Conv2D(in_channel, out_channel, kernel_size=1, stride=stride)
        else:
            self.conv3 = None
        self.batchNorm1 = paddle.nn.BatchNorm2D(out_channel)
        self.batchNorm2 = paddle.nn.BatchNorm2D(out_channel)

    def forward(self, x):
        y = F.relu(self.batchNorm1(self.conv1(x)))
        y = self.batchNorm2(self.conv2(y))
        if self.conv3:
            x = self.conv3(x)
        out = F.relu(y+x)   #核心代码
        return out


# 依靠上面的Residual进行ResNetBlock的构建
def ResNetBlock(in_channel, out_channel, num_layers, is_first=False):
    if is_first:
        assert in_channel == out_channel
    block_list = []
    for i in range(num_layers):
        if i == 0 and not is_first:
            block_list.append(Residual(in_channel, out_channel, use_conv1x1=True, stride=2))
        else:
            block_list.append(Residual(out_channel, out_channel))
    resNetBlock = paddle.nn.Sequential(*block_list)       #用*号可以把list列表展开为元素
    return resNetBlock

class ResNetModel(paddle.nn.Layer):
    def __init__(self):
        super(ResNetModel, self).__init__()
        self.b1 = paddle.nn.Sequential(
                    paddle.nn.Conv2D(3, 64, kernel_size=7, stride=2, padding=3),
                    paddle.nn.BatchNorm2D(64), 
                    paddle.nn.ReLU(),
                    paddle.nn.MaxPool2D(kernel_size=3, stride=2, padding=1))
        self.b2 = ResNetBlock(64, 64, 2, is_first=True)
        self.b3 = ResNetBlock(64, 128, 2)
        self.b4 = ResNetBlock(128, 256, 2)
        self.b5 = ResNetBlock(256, 512, 2)
        self.AvgPool = paddle.nn.AvgPool2D(2)
        self.flatten = paddle.nn.Flatten()
        self.Linear = paddle.nn.Linear(512, 10)
        
    def forward(self, x):
        x = self.b1(x)
        x = self.b2(x)
        x = self.b3(x)
        x = self.b4(x)
        x = self.b5(x)
        x = self.AvgPool(x)
        x = self.flatten(x)
        x = self.Linear(x)
        return x
epoch_num = 5
batch_size = 512
learning_rate = 0.001

val_acc_history = []
val_loss_history = []

def train(model):
    #开启训练模式
    model.train()
    #优化器
    opt = paddle.optimizer.Adam(learning_rate=learning_rate, parameters=model.parameters())
    #数据小批量加载器
    train_loader = paddle.io.DataLoader(cifar10_train, shuffle=True, batch_size=batch_size)
    valid_loader = paddle.io.DataLoader(cifar10_test, batch_size=batch_size)

    for epoch in range(epoch_num):
        for batch_id, data in enumerate(train_loader()):
            x_data = paddle.cast(data[0], 'float32')
            y_data = paddle.cast(data[1], 'int64')
            y_data = paddle.reshape(y_data, (-1, 1))
            y_predict = model(x_data)
            loss = F.cross_entropy(y_predict, y_data)
            loss.backward()
            opt.step()
            opt.clear_grad()
        print("训练轮次: {}; 损失: {}".format(epoch, loss.numpy()))

        #启动评估模式
        model.eval()
        accuracies = []
        losses = []
        for batch_id, data in enumerate(valid_loader()):
            x_data = paddle.cast(data[0], 'float32')
            y_data = paddle.cast(data[1], 'int64')
            y_data = paddle.reshape(y_data, (-1, 1))
            y_predict = model(x_data)
            loss = F.cross_entropy(y_predict, y_data)
            acc = paddle.metric.accuracy(y_predict, y_data)
            accuracies.append(np.mean(acc.numpy()))
            losses.append(np.mean(loss.numpy()))

        avg_acc, avg_loss = np.mean(accuracies), np.mean(losses)
        print("评估准确度为:{};损失为:{}".format(avg_acc, avg_loss))
        val_acc_history.append(avg_acc)
        val_loss_history.append(avg_loss)
        model.train()

model = ResNetModel()
train(model)

六、Stack-AE AutoEncoder

测试集:关于神经网络深度学习的几个实验_第2张图片

 

# 首先导入相关依赖库
import paddle
import paddle.fluid as fluid
import numpy as np
import sys, os, math
import paddle.fluid.layers as F
import matplotlib.pyplot as plt
from PIL import Image

# 模型搭建
class AutoEncoder(fluid.dygraph.Layer):
    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.encoder = fluid.dygraph.Linear(784, 512, act='relu') # encoder层
        self.decoder = fluid.dygraph.Linear(512, 784, act='relu')  # decoder层
    def loss(self, x, lable):
        # 传入参数: lable是原始图像,x是解码之后的图像
        # 输出结果: 原始图像和解码图像之间的【欧氏距离】
        return F.square(x - lable)
    def forward(self, x):
        self.input_imgs = x
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# 可视化处理
def show_array2img(array,title):
    rebuilded_img = Image.fromarray(array.astype('uint8')).convert('RGB')
    plt.imshow(rebuilded_img)
    plt.title(title)
    plt.show()

def draw_train_process(iters,loss):
    '''
    训练可视化
    '''
    plt.title('training',fontsize=24)
    plt.xlabel('iters',fontsize=20)
    plt.ylabel('loss',fontsize=20)
    plt.plot(iters,loss,color='green',label='loss')
    plt.legend()
    plt.grid()
    plt.show()

def fix_value(img_pixs):#像素拉伸
        '''
        img_pixs:featuremap的像素矩阵
        '''
        pix_max=np.max(img_pixs)# 取最大像素
        pix_min=np.min(img_pixs)# 取最小像素
        pix_range=np.abs(pix_max)+np.abs(pix_min)# 获取像素距离
        if(pix_range==0): # 如果所有值都是零则直接返回(下面不能除以零)
            return img_pixs
        pix_rate = 255/pix_range# 获取像素缩放倍率
        pix_left = pix_min*pix_rate# 获取最小还原像素值
        img_pixs = img_pixs*pix_rate-pix_left# 整体像素值平移
        img_pixs[np.where(img_pixs<0)]=0. # 增加鲁棒性,检查超出区间的像素值,np.where(a255)]=255.
        return img_pixs

# 模型训练

# 设置参数
train_params = {
    'save_model_name' : 'AutoEnconder',
    'epoch_num' : 40000,
    'batch_size' : 32,
    'learning_rate' : 0.00001
}

# 训练,同时展示结果
with fluid.dygraph.guard():
    print('start training')
    # 数据读入器
    train_reader = paddle.batch(reader=paddle.reader.shuffle(\
        paddle.dataset.mnist.train(),buf_size=512),batch_size=train_params['batch_size'])

    # 读取【32 × 78】的输入, 用于迭代时使用
    for i, data in enumerate(train_reader()):
        temp_images = []
        for i in range(32):
            temp_images.append(np.reshape(data[i][0],(784)))
        # 从ndarray中创建一个variable
        temp_images = fluid.dygraph.to_variable(np.asarray(temp_images).reshape(32, 784))
        # 原数据区间:[-1,1] → 现数据区间:[0,1]
        temp_images = (temp_images + 1) / 2
        break
    model = AutoEncoder()
    # 下面进行模型的训练
    model.train()
    all_iter = 0
    # loss数组
    all_loss = []
    # 迭代数组
    all_iters = []
    # 准确率数组
    all_accs = []
    # 优化器
    opt = fluid.optimizer.AdamOptimizer(learning_rate=train_params['learning_rate'], parameter_list=model.parameters())
    for pass_num in range(train_params['epoch_num']):
        # 预测模型
        predict = model(temp_images)
        # 计算平均损失
        avg_loss = F.mean(model.loss(predict, temp_images))
        # 反向传播
        avg_loss.backward()
        # 求解局部最优
        opt.minimize(avg_loss)
        # 清空
        opt.clear_gradients()
        # 迭代次数 + 1
        all_iter += 1
        # 设置每5000次保存,打印一次结果
        if all_iter % 5000 == 0:
            all_loss.append(avg_loss.numpy()[0])
            all_iters.append(all_iter)
            print('pass_epoch:{}, iters:{}, loss:{}'.format(pass_num, all_iter, avg_loss.numpy()[0]))
    # 保存模型参数
    fluid.save_dygraph(model.state_dict(), train_params['save_model_name'])
    # 绘制训练过程
    draw_train_process(all_iters,all_loss)
    predict = fix_value(predict.numpy())
    print(type(model.input_imgs))
    imput_imgs = fix_value(model.input_imgs.numpy())
    print('finished training')

    # 图像重构显示
    for i in range(10):
        show_array2img(np.reshape(imput_imgs[i], (28, 28)), 'input_img')
        show_array2img(np.asarray(np.reshape(predict[i], (28, 28))), 'rebuild_img')

七、机器翻译

#一、导入相关库
import paddle
import paddle.nn.functional as F
import re
import numpy as np

#二、读取数据
MAX_LEN = 20


# 1.准备数据
def read_data():
    # 用于读取数据
    data = []
    lines = open('./work/cmn.txt', encoding='utf-8').read().strip().split('\n')  # 用open来读取数据
    words_re = re.compile(r'\w+')  # 用于把英文句子分解成单词的正则匹配项
    for each in lines:
        en_sent, cn_sent, _ = each.split('\t')
        data.append((words_re.findall(en_sent.lower()), list(cn_sent)))

    data_filtered = []
    for each in data:
        # 选取中英文句子长度均小于20的样本
        if len(each[0]) < MAX_LEN and len(each[1]) < MAX_LEN:
            data_filtered.append(each)
    return data_filtered


def build_vocab(data):
    # 用于构建词典
    eng_vocab = {}  # 英文词典
    chn_vocab = {}  # 中文词典
    # 分别在词典中添加:代表填充词,代表开始词,代表结束词
    eng_vocab[''], eng_vocab[''], eng_vocab[''] = 0, 1, 2
    chn_vocab[''], chn_vocab[''], chn_vocab[''] = 0, 1, 2
    # 迭代data,一旦发现新词便加进词典里
    eng_idx, chn_idx = 3, 3
    for eng, chn in data:
        for word in eng:
            if word not in eng_vocab:
                eng_vocab[word] = eng_idx
                eng_idx += 1
        for word in chn:
            if word not in chn_vocab:
                chn_vocab[word] = chn_idx
                chn_idx += 1
    return eng_vocab, chn_vocab


data = read_data()  # 读取数据
eng_vocab, chn_vocab = build_vocab(data)  # 根据数据构建词典

#三、数据预处理
# 填充句子
padded_eng_sents = []
padded_chn_sents = []
padded_chn_label_sents = []
for eng, chn in data:
    # 给每个英文句子结尾加上,并且把不足MAX_LEN单词数量的英文句子填充
    padded_eng_sent = eng + [''] + [''] * (MAX_LEN - len(eng))
    # 给每个中文句子开头加上、结尾加上,并且把不足MAX_LEN个词数量的句子填充
    padded_chn_sent = [''] + chn + [''] + [''] * (MAX_LEN - len(chn))
    padded_chn_label_sent = chn + [''] + [''] * (MAX_LEN - len(chn) + 1)
    # 根据字典,把句子中的单词转成字典中相对应的数字
    padded_eng_sents.append([eng_vocab[w] for w in padded_eng_sent])
    padded_chn_sents.append([chn_vocab[w] for w in padded_chn_sent])
    padded_chn_label_sents.append([chn_vocab[w] for w in padded_chn_label_sent])

train_eng_sents = np.array(padded_eng_sents).astype('int64')
train_chn_sents = np.array(padded_chn_sents).astype('int64')
train_chn_label_sents = np.array(padded_chn_label_sents).astype('int64')

#四、构建模型
embedding_size = 128
hidden_size = 256
epochs = 50
batch_size = 64
eng_vocab_size = len(list(eng_vocab))
chn_vocab_size = len(list(chn_vocab))


# 编码器
class Encoder(paddle.nn.Layer):
    def __init__(self):
        super(Encoder, self).__init__()
        # 词向量层
        self.embed = paddle.nn.Embedding(eng_vocab_size, embedding_size)
        # 长短期记忆网络层
        self.lstm = paddle.nn.LSTM(input_size=embedding_size, hidden_size=hidden_size, num_layers=1)

    def forward(self, x):
        # 输入数据形状大小为[批量数,时间步长]
        x = self.embed(x)
        # 经过词嵌入层,输出形状大小为[批量数,时间步长,词向量维度(embedding_size)].其中,时间步长=MAX_LEN+1
        x, (_, _) = self.lstm(x)
        # 经过长短期记忆网络层,输出形状大小为:[批量数,时间步长,隐藏层维度(hidden_size)].其中,时间步长=MAX_LEN+1
        return x


# 解码器
class Decoder(paddle.nn.Layer):
    def __init__(self):
        super(Decoder, self).__init__()
        # 词嵌入层
        self.embed = paddle.nn.Embedding(chn_vocab_size, embedding_size)
        # 长短期记忆网络层
        self.lstm = paddle.nn.LSTM(input_size=embedding_size + hidden_size, hidden_size=hidden_size)
        # 注意力计算函数
        self.attention_linear1 = paddle.nn.Linear(hidden_size * 2, hidden_size)
        self.attention_linear2 = paddle.nn.Linear(hidden_size, 1)
        self.linear = paddle.nn.Linear(hidden_size, chn_vocab_size)

    def forward(self, x, previous_hidden, previous_cell, encoder_outputs):
        # 输入数据x的形状大小为[批量数, 1]
        # 上个时间步的隐藏层previous_hidden形状大小为[批量数, 1, 隐藏层维度(hidden_size)]
        # 上个时间步的单元previous_cell形状大小为[批量数, 1, 隐藏层维度(hidden_size)]
        # 编码器在各时间步隐藏状态encoder_outputs形状大小为[批量数,时间步长,隐藏层维度(hidden_size)]
        # 输入编码器的是英文句子,每句的长度为MAX_LEN+1,加了一个结束符
        x = self.embed(x)
        # 经过词嵌入层,输出形状大小为[批量数,1,词向量维度(embedding_size)]
        # 把编码器在各个时间部的隐藏状态与解码器的上一时间步的隐藏状态拼接起来
        # 编码器在各时间步隐藏状态encoder_outputs形状大小为[批量数,时间步长,隐藏层维度(hidden_size)]
        # 而解码器在上个时间步的隐藏层previous_hidden的形状大小为[批量数, 1, 隐藏层维度(hidden_size)]
        # 需要用paddle.tile方法对previous_hidden在时间步维度进行复制扩展
        # 之后,用paddle.concat方法把encoder_outputs和扩展后的previous_hidden在最后一个维度进行拼接
        # 输出attention_inputs的形状大小变为[批量数,时间步长,隐藏层维度*2]
        attention_inputs = paddle.concat(
            (encoder_outputs, paddle.tile(previous_hidden, repeat_times=[1, MAX_LEN + 1, 1])), axis=-1)
        # 采用单隐藏层的多层感知机进行变换
        attention_hidden = self.attention_linear1(attention_inputs)
        attention_hidden = F.tanh(attention_hidden)
        attention_logits = self.attention_linear2(attention_hidden)
        # 此时的输出形状大小为[批量数,时间步长,1]
        attention_logits = paddle.squeeze(attention_logits)  # 删除输入Tensor的Shape中尺寸为1的维度
        # 此时的输出形状大小为[批量数,时间步长]
        # 利用softmax运算得到注意力权重,形状大小为[批量数,时间步长],每个取值在0至1之间,它是在时间维取权重。
        attention_weights = F.softmax(attention_logits)
        # 编码器在各时间步隐藏状态encoder_outputs形状大小为[批量数,时间步长,隐藏层维度(hidden_size)]
        # 而注意力权重的形状大小为[批量数,时间步长],因此需要用paddle.unsqueeze方法对注意力权重增加一个维度
        # 接着,使用paddle.expand_as方法把注意力权重扩展成encoder_outputs的形状
        attention_weights = paddle.expand_as(paddle.unsqueeze(attention_weights, -1), encoder_outputs)
        # 逐元素相乘得到背景向量
        context_vector = paddle.multiply(encoder_outputs, attention_weights)
        # 此时的背景向量形状大小为[批量数,时间步长,隐藏层维度]
        # 接着对背景向量在时间步求和
        context_vector = paddle.sum(context_vector, 1)
        # 此时的背景向量形状大小为[批量数,隐藏层维度]
        context_vector = paddle.unsqueeze(context_vector, 1)  # 在第1维插入尺寸为1的维度
        # 此时的背景向量形状大小为[批量数,1,隐藏层维度]
        # 经过词嵌入层,输出x形状大小为[批量数,1,词向量维度(embedding_size)]
        # 把x与背景向量在最后一个维度上拼接起来,得到形状大小为[批量数,1,词向量维度+隐藏层维度]
        lstm_input = paddle.concat((x, context_vector), axis=-1)
        # 上个时间步的隐藏层previous_hidden形状大小转变为[1,批量数, 隐藏层维度(hidden_size)]
        previous_hidden = paddle.transpose(previous_hidden, [1, 0, 2])
        # 上个时间步的单元previous_cell形状大小转变为[1,批量数, 隐藏层维度(hidden_size)]
        previous_cell = paddle.transpose(previous_cell, [1, 0, 2])
        # 数据输入长短期记忆网络层
        x, (hidden, cell) = self.lstm(lstm_input, (previous_hidden, previous_cell))
        hidden = paddle.transpose(hidden, [1, 0, 2])
        cell = paddle.transpose(cell, [1, 0, 2])
        # 经过上述转置,当前时间步隐藏层输出形状大小为[批量数,1,隐藏层维度]
        output = self.linear(hidden)
        # 此时,输出形状大小为[批量数,1,中文词典大小]
        output = paddle.squeeze(output)  # 删除输入Tensor的Shape中尺寸为1的维度
        # 此时,输出形状大小为[批量数, 中文词典大小]
        return output, (hidden, cell)


#五、训练模型
encoder = Encoder()  # 生成编码器实例
decoder = Decoder()  # 生成解码器实例
# 优化器
optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=encoder.parameters() + decoder.parameters())
# 进行训练
for epoch in range(epochs):
    print("第{}轮训练开始...".format(epoch))
    # 打乱数据顺序
    order = np.random.permutation(len(train_eng_sents))
    train_eng_sents_shuffled = train_eng_sents[order]
    train_chn_sents_shuffled = train_chn_sents[order]
    train_chn_label_sents_shuffled = train_chn_label_sents[order]

    for iteration in range(train_eng_sents_shuffled.shape[0] // batch_size):
        eng_sentence = train_eng_sents_shuffled[(batch_size * iteration):(batch_size * (iteration + 1))]
        eng_sentence = paddle.to_tensor(eng_sentence)

        encoder_outputs = encoder(eng_sentence)

        x_chn_data = train_chn_sents_shuffled[(batch_size * iteration):(batch_size * (iteration + 1))]
        x_chn_label_data = train_chn_label_sents_shuffled[(batch_size * iteration):(batch_size * (iteration + 1))]

        # shape: (batch,  num_layer(=1 here) * num_of_direction(=1 here), hidden_size)
        hidden = paddle.zeros([batch_size, 1, hidden_size])
        cell = paddle.zeros([batch_size, 1, hidden_size])

        loss = paddle.zeros([1])
        # 循环调用解码器,每次喂入一个时间步的批量数据
        for i in range(MAX_LEN + 2):
            chn_word = paddle.to_tensor(x_chn_data[:, i:i + 1])
            chn_word_label = paddle.to_tensor(x_chn_label_data[:, i])

            logits, (hidden, cell) = decoder(chn_word, hidden, cell, encoder_outputs)
            step_loss = F.cross_entropy(logits, chn_word_label)
            loss += step_loss

        loss = loss / (MAX_LEN + 2)
        if (iteration % 200 == 0):
            print("iter {}, loss:{}".format(iteration, loss.numpy()))

        loss.backward()
        optimizer.step()
        optimizer.clear_grad()

#六、模型预测
encoder.eval()
decoder.eval()

num_of_exampels_to_evaluate = 10

indices = np.random.choice(len(train_eng_sents), num_of_exampels_to_evaluate, replace=False)
x_data = train_eng_sents[indices]
sent = paddle.to_tensor(x_data)
en_repr = encoder(sent)

word = np.array([[chn_vocab['']]] * num_of_exampels_to_evaluate)
word = paddle.to_tensor(word)

hidden = paddle.zeros([num_of_exampels_to_evaluate, 1, hidden_size])
cell = paddle.zeros([num_of_exampels_to_evaluate, 1, hidden_size])

decoded_sent = []
for i in range(MAX_LEN + 2):
    logits, (hidden, cell) = decoder(word, hidden, cell, en_repr)
    word = paddle.argmax(logits, axis=1)
    decoded_sent.append(word.numpy())
    word = paddle.unsqueeze(word, axis=-1)

results = np.stack(decoded_sent, axis=1)
for i in range(num_of_exampels_to_evaluate):
    en_input = " ".join(data[indices[i]][0])
    ground_truth_translate = "".join(data[indices[i]][1])
    model_translate = ""
    for k in results[i]:
        w = list(chn_vocab)[k]
        if w != '' and w != '':
            model_translate += w
    print(en_input)
    print("true: {}".format(ground_truth_translate))
    print("pred: {}".format(model_translate))

八、目标检测

- -


总结

有趣的实验 记录下来- -

你可能感兴趣的:(机器学习,人工智能)