基于深度强化学习的绘画智能体 代码分析(五)

GIthub源码

  1. wgan.py
import torch
import torch.nn as nn
import numpy as np
from torch.optim import Adam, SGD
from torch import autograd
from torch.autograd import Variable
import torch.nn.functional as F
from torch.autograd import grad as torch_grad
import torch.nn.utils.weight_norm as weightNorm
from utils.util import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dim = 128
LAMBDA = 10 # Gradient penalty lambda hyperparameter

class TReLU(nn.Module):
    def __init__(self):
            super(TReLU, self).__init__() #子类继承了父类的所有属性和方法,父类属性自然会用父类方法来进行初始化
            self.alpha = nn.Parameter(torch.FloatTensor(1), requires_grad=True) #将一个不可训练的类型Tensor转换成可以训练的类型parameter并将这个parameter绑定到这个module里面,self.alpha变成了模型的一部分,成为了模型中根据训练可以改动的参数了,让某些变量在学习的过程中不断的修改其值以达到最优化,Tensor可以通过参数 requires_grad=True 创建, 
            self.alpha.data.fill_(0)

    def forward(self, x):
        x = F.relu(x - self.alpha) + self.alpha
        return x

class Discriminator(nn.Module):
        def __init__(self):
            super(Discriminator, self).__init__()

            self.conv0 = weightNorm(nn.Conv2d(6, 16, 5, 2, 2))
            self.conv1 = weightNorm(nn.Conv2d(16, 32, 5, 2, 2))
            self.conv2 = weightNorm(nn.Conv2d(32, 64, 5, 2, 2))
            self.conv3 = weightNorm(nn.Conv2d(64, 128, 5, 2, 2))
            self.conv4 = weightNorm(nn.Conv2d(128, 1, 1, 1, 0))
            self.relu0 = TReLU()
            self.relu1 = TReLU()
            self.relu2 = TReLU()
            self.relu3 = TReLU()

        def forward(self, x):
            x = self.conv0(x)
            x = self.relu0(x)
            x = self.conv1(x)
            x = self.relu1(x)
            x = self.conv2(x)
            x = self.relu2(x)
            x = self.conv3(x)
            x = self.relu3(x)
            x = self.conv4(x)
            x = x.view(-1, 64) # Patch Q
            return x

netD = Discriminator()  
target_netD = Discriminator() #模型结构一样,但是独立的两个网络
netD = netD.to(device)
target_netD = target_netD.to(device) #CPU,GPU
hard_update(target_netD, netD) #netD复制到target_netD上

optimizerD = Adam(netD.parameters(), lr=3e-4, betas=(0.5, 0.999)) #调整每个参数的学习率。它的优点主要在于经过偏置校正后,每一次迭代学习率都有个确定范围,使得参数比较平稳。
#lr:学习率,更新梯度的时候使用,步子的大小
#betas:用于计算梯度的平均和平方的系


def cal_gradient_penalty(netD, real_data, fake_data, batch_size):
    alpha = torch.rand(batch_size, 1) #返回一个张量,包含了从区间 [0, 1) 的均匀分布中抽取的一组随机数。张量的形状batch_size*1。
    alpha = alpha.expand(batch_size, int(real_data.nelement()/batch_size)).contiguous() #expand将单个维度扩大成更大维度, 第二维是int(real_data.nelement()/batch_size),直接复制的,让数值重复,返回一个新的tensor
#contiguous() 返回一个内存连续的有相同数据的tensor,如果原tensor内存连续,则返回原tensor
    alpha = alpha.view(batch_size, 6, dim, dim) #改变维度 batch_size*6*dim*dim,view()相当于numpy中resize()的功能
    alpha = alpha.to(device)
    fake_data = fake_data.view(batch_size, 6, dim, dim)
    interpolates = Variable(alpha * real_data.data + ((1 - alpha) * fake_data.data), requires_grad=True)
    disc_interpolates = netD(interpolates)
    gradients = autograd.grad(disc_interpolates, interpolates,  
                              grad_outputs=torch.ones(disc_interpolates.size()).to(device),
                              create_graph=True, retain_graph=True)[0] #使用autograd必需先将tensor数据包成Variable,autograd.grad()函数实现自动求导 
    gradients = gradients.view(gradients.size(0), -1)
    gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * LAMBDA  #.main() 求平均
    return gradient_penalty
详解 pytorch 中的 autograd.grad() 函数

autograd.grad(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=False, only_inputs=True, allow_unused=False)

  • outputs: 求导的因变量(需要求导的函数)
  • inputs: 求导的自变量
  • grad_outputs: 如果 outputs为标量,则grad_outputs=None,也就是说,可以不用写; 如果outputs 是向量,则此参数必须写
  • retain_graph: True 则保留计算图, False则释放计算图
  • create_graph: 若要计算高阶导数,则必须选为True
  • allow_unused: 允许输入变量不进入计算
def cal_reward(fake_data, real_data):
    return target_netD(torch.cat([real_data, fake_data], 1))  #cat是concatnate的意思:拼接,联系在一起。按维数1拼接(横着拼)

def save_gan(path):
    netD.cpu()
    torch.save(netD.state_dict(),'{}/wgan.pkl'.format(path))
    netD.to(device)

def load_gan(path):
    netD.load_state_dict(torch.load('{}/wgan.pkl'.format(path)))

def update(fake_data, real_data):
    fake_data = fake_data.detach() #返回一个新的tensor,从当前计算图中分离下来的,但是仍指向原变量的存放位置,不同之处只是requires_grad为false,得到的这个tensor永远不需要计算其梯度,不具有grad。
    real_data = real_data.detach()
    fake = torch.cat([real_data, fake_data], 1)
    real = torch.cat([real_data, real_data], 1)
    D_real = netD(real)
    D_fake = netD(fake)
    gradient_penalty = cal_gradient_penalty(netD, real, fake, real.shape[0])
    optimizerD.zero_grad() #将模型的参数梯度初始化为0
    D_cost = D_fake.mean() - D_real.mean() + gradient_penalty
    D_cost.backward()#.backward()是自动求导函数,out是一个标量的话(相当于一个神经网络有一个样本,这个样本有两个属性,神经网络有一个输出)那么此backward函数是不需要输入任何参数的。
    optimizerD.step() #更新所有参数  
    soft_update(target_netD, netD, 0.001)
    return D_fake.mean(), D_real.mean(), gradient_penalty
  1. evaluator.py
import numpy as np
from utils.util import *

class Evaluator(object):

    def __init__(self, args, writer): #args可以当成字典argument,把值传给self    
        self.validate_episodes = args.validate_episodes 
        self.max_step = args.max_step
        self.env_batch = args.env_batch
        self.writer = writer
        self.log = 0

    def __call__(self, env, policy, debug=False):        
        observation = None
        for episode in range(self.validate_episodes):
            # reset at the start of episode
            observation = env.reset(test=True, episode=episode)
            episode_steps = 0
            episode_reward = 0.     
            assert observation is not None  #用于在调试过程中捕捉程序错误          
            # start episode 
            episode_reward = np.zeros(self.env_batch)
            while (episode_steps < self.max_step or not self.max_step): #小于且不等于
                action = policy(observation) #看输入里的函数
                observation, reward, done, (step_num) = env.step(action)
                episode_reward += reward
                episode_steps += 1
                env.save_image(self.log, episode_steps)
            dist = env.get_dist()
            self.log += 1
        return episode_reward, dist
  1. multi.py
import cv2
import torch
import numpy as np
from env import Paint
from utils.util import *
from DRL.ddpg import decode
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class fastenv():
    def __init__(self, 
                 max_episode_length=10, env_batch=64, \
                 writer=None):
        self.max_episode_length = max_episode_length
        self.env_batch = env_batch
        self.env = Paint(self.env_batch, self.max_episode_length)
        self.env.load_data()
        self.observation_space = self.env.observation_space
        self.action_space = self.env.action_space
        self.writer = writer
        self.test = False
        self.log = 0

    def save_image(self, log, step):
        for i in range(self.env_batch):
            if self.env.imgid[i] <= 10:
                canvas = cv2.cvtColor((to_numpy(self.env.canvas[i].permute(1, 2, 0))), cv2.COLOR_BGR2RGB)
                self.writer.add_image('{}/canvas_{}.png'.format(str(self.env.imgid[i]), str(step)), canvas, log)
        if step == self.max_episode_length:
            for i in range(self.env_batch):
                if self.env.imgid[i] < 50:
                    gt = cv2.cvtColor((to_numpy(self.env.gt[i].permute(1, 2, 0))), cv2.COLOR_BGR2RGB)
                    canvas = cv2.cvtColor((to_numpy(self.env.canvas[i].permute(1, 2, 0))), cv2.COLOR_BGR2RGB)
                    self.writer.add_image(str(self.env.imgid[i]) + '/_target.png', gt, log)
                    self.writer.add_image(str(self.env.imgid[i]) + '/_canvas.png', canvas, log)
    
    def step(self, action):
        with torch.no_grad():
            ob, r, d, _ = self.env.step(torch.tensor(action).to(device))
        if d[0]:
            if not self.test:
                self.dist = self.get_dist()
                for i in range(self.env_batch):
                    self.writer.add_scalar('train/dist', self.dist[i], self.log)
                    self.log += 1
        return ob, r, d, _

    def get_dist(self):
        return to_numpy((((self.env.gt.float() - self.env.canvas.float()) / 255) ** 2).mean(1).mean(1).mean(1))
        
    def reset(self, test=False, episode=0):
        self.test = test
        ob = self.env.reset(self.test, episode * self.env_batch)
        return ob
  1. rpm.py
import numpy as np
import random
import torch
import pickle as pickle

class rpm(object):
    # replay memory 起到存储最近的样本的作用,使同一样本可以多次参加训练
    def __init__(self, buffer_size):
        self.buffer_size = buffer_size
        self.buffer = []
        self.index = 0
        
    def append(self, obj):
        if self.size() > self.buffer_size:
            print('buffer size larger than set value, trimming...') #当存储容量大就修剪
            self.buffer = self.buffer[(self.size() - self.buffer_size):]
        elif self.size() == self.buffer_size:
            self.buffer[self.index] = obj
            self.index += 1
            self.index %= self.buffer_size
        else:
            self.buffer.append(obj)

    def size(self):
        return len(self.buffer)

    def sample_batch(self, batch_size, device, only_state=False):
        if self.size() < batch_size:
            batch = random.sample(self.buffer, self.size()) #从指定序中随机获取指定长度的 片断并随机排列,结果以列表的形式返回
        else:
            batch = random.sample(self.buffer, batch_size)

        if only_state:
            res = torch.stack(tuple(item[3] for item in batch), dim=0) #在维度上连接(concatenate)若干个张量。(这些张量形状相同) ,tuple元组格式           
            return res.to(device)
        else:
            item_count = 5
            res = []
            for i in range(5):
                k = torch.stack(tuple(item[i] for item in batch), dim=0)
                res.append(k.to(device))
            return res[0], res[1], res[2], res[3], res[4]

你可能感兴趣的:(绘画智能体代码分析,pytorch,深度学习,神经网络)