GIthub源码
import torch
import torch.nn as nn
import numpy as np
from torch.optim import Adam, SGD
from torch import autograd
from torch.autograd import Variable
import torch.nn.functional as F
from torch.autograd import grad as torch_grad
import torch.nn.utils.weight_norm as weightNorm
from utils.util import *
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dim = 128
LAMBDA = 10 # Gradient penalty lambda hyperparameter
class TReLU(nn.Module):
def __init__(self):
super(TReLU, self).__init__() #子类继承了父类的所有属性和方法,父类属性自然会用父类方法来进行初始化
self.alpha = nn.Parameter(torch.FloatTensor(1), requires_grad=True) #将一个不可训练的类型Tensor转换成可以训练的类型parameter并将这个parameter绑定到这个module里面,self.alpha变成了模型的一部分,成为了模型中根据训练可以改动的参数了,让某些变量在学习的过程中不断的修改其值以达到最优化,Tensor可以通过参数 requires_grad=True 创建,
self.alpha.data.fill_(0)
def forward(self, x):
x = F.relu(x - self.alpha) + self.alpha
return x
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.conv0 = weightNorm(nn.Conv2d(6, 16, 5, 2, 2))
self.conv1 = weightNorm(nn.Conv2d(16, 32, 5, 2, 2))
self.conv2 = weightNorm(nn.Conv2d(32, 64, 5, 2, 2))
self.conv3 = weightNorm(nn.Conv2d(64, 128, 5, 2, 2))
self.conv4 = weightNorm(nn.Conv2d(128, 1, 1, 1, 0))
self.relu0 = TReLU()
self.relu1 = TReLU()
self.relu2 = TReLU()
self.relu3 = TReLU()
def forward(self, x):
x = self.conv0(x)
x = self.relu0(x)
x = self.conv1(x)
x = self.relu1(x)
x = self.conv2(x)
x = self.relu2(x)
x = self.conv3(x)
x = self.relu3(x)
x = self.conv4(x)
x = x.view(-1, 64) # Patch Q
return x
netD = Discriminator()
target_netD = Discriminator() #模型结构一样,但是独立的两个网络
netD = netD.to(device)
target_netD = target_netD.to(device) #CPU,GPU
hard_update(target_netD, netD) #netD复制到target_netD上
optimizerD = Adam(netD.parameters(), lr=3e-4, betas=(0.5, 0.999)) #调整每个参数的学习率。它的优点主要在于经过偏置校正后,每一次迭代学习率都有个确定范围,使得参数比较平稳。
#lr:学习率,更新梯度的时候使用,步子的大小
#betas:用于计算梯度的平均和平方的系
def cal_gradient_penalty(netD, real_data, fake_data, batch_size):
alpha = torch.rand(batch_size, 1) #返回一个张量,包含了从区间 [0, 1) 的均匀分布中抽取的一组随机数。张量的形状batch_size*1。
alpha = alpha.expand(batch_size, int(real_data.nelement()/batch_size)).contiguous() #expand将单个维度扩大成更大维度, 第二维是int(real_data.nelement()/batch_size),直接复制的,让数值重复,返回一个新的tensor
#contiguous() 返回一个内存连续的有相同数据的tensor,如果原tensor内存连续,则返回原tensor
alpha = alpha.view(batch_size, 6, dim, dim) #改变维度 batch_size*6*dim*dim,view()相当于numpy中resize()的功能
alpha = alpha.to(device)
fake_data = fake_data.view(batch_size, 6, dim, dim)
interpolates = Variable(alpha * real_data.data + ((1 - alpha) * fake_data.data), requires_grad=True)
disc_interpolates = netD(interpolates)
gradients = autograd.grad(disc_interpolates, interpolates,
grad_outputs=torch.ones(disc_interpolates.size()).to(device),
create_graph=True, retain_graph=True)[0] #使用autograd必需先将tensor数据包成Variable,autograd.grad()函数实现自动求导
gradients = gradients.view(gradients.size(0), -1)
gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * LAMBDA #.main() 求平均
return gradient_penalty
autograd.grad(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=False, only_inputs=True, allow_unused=False)
def cal_reward(fake_data, real_data):
return target_netD(torch.cat([real_data, fake_data], 1)) #cat是concatnate的意思:拼接,联系在一起。按维数1拼接(横着拼)
def save_gan(path):
netD.cpu()
torch.save(netD.state_dict(),'{}/wgan.pkl'.format(path))
netD.to(device)
def load_gan(path):
netD.load_state_dict(torch.load('{}/wgan.pkl'.format(path)))
def update(fake_data, real_data):
fake_data = fake_data.detach() #返回一个新的tensor,从当前计算图中分离下来的,但是仍指向原变量的存放位置,不同之处只是requires_grad为false,得到的这个tensor永远不需要计算其梯度,不具有grad。
real_data = real_data.detach()
fake = torch.cat([real_data, fake_data], 1)
real = torch.cat([real_data, real_data], 1)
D_real = netD(real)
D_fake = netD(fake)
gradient_penalty = cal_gradient_penalty(netD, real, fake, real.shape[0])
optimizerD.zero_grad() #将模型的参数梯度初始化为0
D_cost = D_fake.mean() - D_real.mean() + gradient_penalty
D_cost.backward()#.backward()是自动求导函数,out是一个标量的话(相当于一个神经网络有一个样本,这个样本有两个属性,神经网络有一个输出)那么此backward函数是不需要输入任何参数的。
optimizerD.step() #更新所有参数
soft_update(target_netD, netD, 0.001)
return D_fake.mean(), D_real.mean(), gradient_penalty
import numpy as np
from utils.util import *
class Evaluator(object):
def __init__(self, args, writer): #args可以当成字典argument,把值传给self
self.validate_episodes = args.validate_episodes
self.max_step = args.max_step
self.env_batch = args.env_batch
self.writer = writer
self.log = 0
def __call__(self, env, policy, debug=False):
observation = None
for episode in range(self.validate_episodes):
# reset at the start of episode
observation = env.reset(test=True, episode=episode)
episode_steps = 0
episode_reward = 0.
assert observation is not None #用于在调试过程中捕捉程序错误
# start episode
episode_reward = np.zeros(self.env_batch)
while (episode_steps < self.max_step or not self.max_step): #小于且不等于
action = policy(observation) #看输入里的函数
observation, reward, done, (step_num) = env.step(action)
episode_reward += reward
episode_steps += 1
env.save_image(self.log, episode_steps)
dist = env.get_dist()
self.log += 1
return episode_reward, dist
import cv2
import torch
import numpy as np
from env import Paint
from utils.util import *
from DRL.ddpg import decode
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class fastenv():
def __init__(self,
max_episode_length=10, env_batch=64, \
writer=None):
self.max_episode_length = max_episode_length
self.env_batch = env_batch
self.env = Paint(self.env_batch, self.max_episode_length)
self.env.load_data()
self.observation_space = self.env.observation_space
self.action_space = self.env.action_space
self.writer = writer
self.test = False
self.log = 0
def save_image(self, log, step):
for i in range(self.env_batch):
if self.env.imgid[i] <= 10:
canvas = cv2.cvtColor((to_numpy(self.env.canvas[i].permute(1, 2, 0))), cv2.COLOR_BGR2RGB)
self.writer.add_image('{}/canvas_{}.png'.format(str(self.env.imgid[i]), str(step)), canvas, log)
if step == self.max_episode_length:
for i in range(self.env_batch):
if self.env.imgid[i] < 50:
gt = cv2.cvtColor((to_numpy(self.env.gt[i].permute(1, 2, 0))), cv2.COLOR_BGR2RGB)
canvas = cv2.cvtColor((to_numpy(self.env.canvas[i].permute(1, 2, 0))), cv2.COLOR_BGR2RGB)
self.writer.add_image(str(self.env.imgid[i]) + '/_target.png', gt, log)
self.writer.add_image(str(self.env.imgid[i]) + '/_canvas.png', canvas, log)
def step(self, action):
with torch.no_grad():
ob, r, d, _ = self.env.step(torch.tensor(action).to(device))
if d[0]:
if not self.test:
self.dist = self.get_dist()
for i in range(self.env_batch):
self.writer.add_scalar('train/dist', self.dist[i], self.log)
self.log += 1
return ob, r, d, _
def get_dist(self):
return to_numpy((((self.env.gt.float() - self.env.canvas.float()) / 255) ** 2).mean(1).mean(1).mean(1))
def reset(self, test=False, episode=0):
self.test = test
ob = self.env.reset(self.test, episode * self.env_batch)
return ob
import numpy as np
import random
import torch
import pickle as pickle
class rpm(object):
# replay memory 起到存储最近的样本的作用,使同一样本可以多次参加训练
def __init__(self, buffer_size):
self.buffer_size = buffer_size
self.buffer = []
self.index = 0
def append(self, obj):
if self.size() > self.buffer_size:
print('buffer size larger than set value, trimming...') #当存储容量大就修剪
self.buffer = self.buffer[(self.size() - self.buffer_size):]
elif self.size() == self.buffer_size:
self.buffer[self.index] = obj
self.index += 1
self.index %= self.buffer_size
else:
self.buffer.append(obj)
def size(self):
return len(self.buffer)
def sample_batch(self, batch_size, device, only_state=False):
if self.size() < batch_size:
batch = random.sample(self.buffer, self.size()) #从指定序中随机获取指定长度的 片断并随机排列,结果以列表的形式返回
else:
batch = random.sample(self.buffer, batch_size)
if only_state:
res = torch.stack(tuple(item[3] for item in batch), dim=0) #在维度上连接(concatenate)若干个张量。(这些张量形状相同) ,tuple元组格式
return res.to(device)
else:
item_count = 5
res = []
for i in range(5):
k = torch.stack(tuple(item[i] for item in batch), dim=0)
res.append(k.to(device))
return res[0], res[1], res[2], res[3], res[4]