NAS with RL代码

目录

原代码地址:

修改后代码:

代码学习:


原代码地址:

https://github.com/Longcodedao/NAS-With-RL

修改后代码:

代码从上面下载后,修改为以下代码后可以运行: 

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn


class Params:
    NUM_EPOCHS = 50
    ALPHA = 0.005
    BATCH_SIZE = 64
    HIDDEN_SIZE = 64  # Number of Hidden Units in Controller
    BETA = 0.1  # The entropy bonus multiplier
    INPUT_SIZE = 3
    ACTION_SPACE = 2
    NUM_STEPS = 4
    GAMMA = 0.99


transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))]
)

trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                      download=True, transform=transform)
testset = torchvision.datasets.MNIST(root='./data', train=False,
                                     download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                          shuffle=True, num_workers=2)
testlaoder = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)


class Controller(nn.Module):
    def __init__(self, search_space,
                 hidden_size=64, max_layer=4, device=''):

        super(Controller, self).__init__()

        self.search_space = search_space
        self.DEVICE = device
        self.hidden_size = hidden_size
        self.length_search = len(search_space)  # num_steps = max_layer * length_search_space
        self.list_length = [len(space) for space in search_space.values()]
        self.max_layer = max_layer
        self.total_layer = torch.randint(1, self.max_layer, (1,)).item()  # --------------添加这一行
        self.lstm = nn.ModuleList()
        self.fc = nn.ModuleList()

        self.lstm.append(nn.LSTMCell(self.list_length[-1], self.hidden_size).to(self.DEVICE))

        for i in range(1, self.length_search):
            self.lstm.append(nn.LSTMCell(self.list_length[i - 1], self.hidden_size).to(self.DEVICE))

        for i in range(0, self.length_search):
            self.fc.append(nn.Linear(self.hidden_size, self.list_length[i]).to(self.DEVICE))

    def init_hidden(self):
        h_t = torch.zeros(1, self.hidden_size, dtype=torch.float, device=self.DEVICE)
        c_t = torch.zeros(1, self.hidden_size, dtype=torch.float, device=self.DEVICE)

        return (h_t, c_t)

    def forward(self, input):
        # self.total_layer = torch.randint(1, self.max_layer, (1,)).item()
        outputs = {}

        self.hidden = [self.init_hidden() for _ in range(self.length_search)]

        for num_layer in range(self.max_layer):

            for i, (key, val) in enumerate(self.search_space.items()):
                h_t, c_t = self.hidden[i]
                h_t, c_t = self.lstm[i](input, (h_t, c_t))
                self.hidden[i] = (h_t, c_t)
                output = self.fc[i](h_t)
                # print(output)
                input = output

                if key not in outputs.keys():
                    outputs[key] = [output]
                else:
                    outputs[key].extend([output])

        # print(outputs)`

        # for _ in range(self.length_search):
        #     h_t, c_t = self.hidden[i]
        #     h_t.detach_()
        #     c_t.detach_()
        #     self.hidden[i] = (h_t, c_t)

        for i, (key, val) in enumerate(outputs.items()):
            outputs[key] = torch.stack(outputs[key]).squeeze(1)

        return outputs


# 0: nn.ReLU, 1: nn.Tanh, 2: nn.Sigmoid

search_space = {
    "hidden_units": [8, 16, 32, 64],
    "activation": [0, 1, 2]
}

device = 'cuda' if torch.cuda.is_available() else 'cpu'

controller = Controller(search_space, max_layer=4, device=device)
print(f"Total Layer: {controller.total_layer}")
print(f"List Length: {controller.list_length}")
print(controller)
input = torch.tensor([[1.0, 2.0, 3.0]]).to(device)
outputs = controller(input)


# print(outputs)


class NASModel(nn.Module):
    def __init__(self, architectures, input_size, output_size):
        super(NASModel, self).__init__()
        self.architectures = architectures
        self.length_layers = len(self.architectures['hidden_units'])
        self.output_size = output_size

        layers = []

        for layer in range(self.length_layers):
            hidden_units = self.architectures['hidden_units'][layer].item()
            activation = self.architectures['activation'][layer].item()
            # print(activation)

            if (activation == 0):
                activation = nn.ReLU()
            elif (activation == 1):
                activation = nn.Tanh()
            elif (activation == 2):
                activation = nn.Sigmoid()

            if layer == 0:
                layers.append(nn.Linear(input_size, hidden_units))
                layers.append(activation)

            else:
                layers.append(nn.Linear(self.architectures['hidden_units'][layer - 1].item(),
                                        hidden_units))
                layers.append(activation)

        layers.append(nn.Linear(self.architectures['hidden_units'][self.length_layers - 1].item(), self.output_size))
        layers.append(nn.Softmax(dim=1))

        # print(layers)
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)


from torch.distributions import Categorical
from torch.nn.functional import one_hot, log_softmax, softmax, normalize

architecture = {}
episode_total_log_probs = {}
controller = Controller(search_space, max_layer=4, device=device)
episode_logits = controller(input)

print(f"Number of layers is: {controller.total_layer}")
for key, space in search_space.items():
    logits = episode_logits[key]

    action_index = Categorical(logits=logits).sample().unsqueeze(0)  # ------------unsqueeze的1改为0
    # print(action_index)
    actions_space = torch.tensor([space] * controller.total_layer).to(device)
    action = torch.gather(actions_space, 1, action_index).to(device)
    architecture[key] = action.squeeze(0)  # squeeze的1改为0

    # print(action_index.int().squeeze(1))

    mask = one_hot(action_index, num_classes=len(space))
    episode_log_probs = torch.sum(mask.float() * log_softmax(logits, dim=1), dim=1)
    episode_total_log_probs[key] = episode_log_probs

print(architecture)

print(episode_total_log_probs)

model = NASModel(architecture, 784, 10)
print(model)

from torch.distributions import Categorical
from torch.nn.functional import one_hot, log_softmax, softmax, normalize
import torch.optim as optim
import tqdm


def play_episode(controller):
    architecture = {}
    episode_total_log_probs = {}

    input = torch.tensor([[1.0, 2.0, 3.0]]).to(device)

    # print(controller)
    episode_logits = controller(input)

    for key, space in search_space.items():
        logits = episode_logits[key]

        action_index = Categorical(logits=logits).sample().unsqueeze(0)     # unsqueeze的1改为0
        actions_space = torch.tensor([space] * controller.total_layer).to(device)
        action = torch.gather(actions_space, 1, action_index).to(device)
        architecture[key] = action.squeeze(0)   # squeeze的1改为0

        # print(action_index.int().squeeze(1))

        mask = one_hot(action_index, num_classes=len(space))
        episode_log_probs = torch.sum(mask.float() * log_softmax(logits, dim=1), dim=1)
        episode_total_log_probs[key] = episode_log_probs

    model = NASModel(architecture, 784, 10).to(device)
    print(f'{model}\n')

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9)

    for epoch in range(10):
        model.train()
        running_loss = 0.0
        for i, data in enumerate(trainloader):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            inputs = inputs.view(-1, 784)

            outputs = model(inputs)

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

        running_loss /= len(trainloader)
        print(f"Epoch {epoch + 1}: Loss = {running_loss}")

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testlaoder:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images.view(-1, 784))
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    acc = 100 * correct / total
    print('Accuracy of the network on the 10000 test images: {}'.format(acc))

    # compute the reward
    reward = acc

    reward = torch.tensor(reward, device=device).detach()

    sum_weighted_log_probs = {}

    sum_weighted_log_probs['hidden_units'] = torch.sum(-episode_total_log_probs['hidden_units'] * reward).unsqueeze(0)
    sum_weighted_log_probs['activation'] = torch.sum(-episode_total_log_probs['activation'] * reward).unsqueeze(0)

    sum_weighted_loss = sum_weighted_log_probs['hidden_units'] + \
                        sum_weighted_log_probs['activation']

    return sum_weighted_loss, episode_total_log_probs, reward



controller = Controller(search_space, max_layer = 4, device = device)
print(controller)
optimizer = optim.Adam(controller.parameters(), lr = 0.001)
total_rewards = []

controller.train()
for epoch in range(10):

  optimizer.zero_grad()
  epoch_log_probs = torch.empty((0,), device = device)

  for i in range(3):
    (sum_weighted_loss, episode_logits,
        reward) = play_episode(controller)
    print(sum_weighted_loss)
    epoch_log_probs = torch.cat((epoch_log_probs, sum_weighted_loss))

  loss = torch.mean(epoch_log_probs)

  loss.backward()
  optimizer.step()

  # for name, param in controller.named_parameters():
  #   print(name, param.grad)

  print(f"Loss in {epoch} is: {loss}")


代码学习(修改后):

# 日期:  2024/1/23 21:34
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
from torch.distributions import Categorical
from torch.nn.functional import one_hot, log_softmax, softmax, normalize
import torch.optim as optim
import tqdm


class Params:
    NUM_EPOCHS = 50
    ALPHA = 0.005
    BATCH_SIZE = 64
    HIDDEN_SIZE = 64  # Number of Hidden Units in Controller
    BETA = 0.1  # The entropy bonus multiplier
    INPUT_SIZE = 3
    ACTION_SPACE = 2
    NUM_STEPS = 4
    GAMMA = 0.99


# 设置数据转换函数
# Compose()将多个transforms合并,参数是由多个transform对象组合成的列表
# ToTensor()转化为tensor格式图片。输入参数,自动调用__call__方法,把图片变成tensor格式
# Normalize()实例化,输入了均值mean和方差std两个参数,``output[channel] = (input[channel] - mean[channel]) / std[channel]``
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))]
)

# 准备数据集
# 参数train为TRUE则返回训练集,为FALSE则返回测试集,download设置为TRUE则自动从网上下载
train_set = torchvision.datasets.MNIST(root='./data', train=True,
                                       download=True, transform=transform)
test_set = torchvision.datasets.MNIST(root='./data', train=False,
                                      download=True, transform=transform)

# 用dataloader加载数据集
# 参数:dataset即为数据集;batch_size为多少个为一组;shuffle为TRUE时每次加载顺序不同,为FALSE时每次加载数据集的顺序相同
# drop_last默认为false,为TRUE时最后一组数据不满batch_size时舍去,为FALSE时不舍
# num_workers使用多少个子进程来加载数据。 0表示数据将在主进程中加载。 (默认值:0)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=False)


# 定义控制器,LSTM网络
class Controller(nn.Module):
    def __init__(self, search_space,
                 hidden_size=64, max_layer=4, device=''):

        super(Controller, self).__init__()

        self.search_space = search_space  # 包括隐藏单元的数量和激活函数的种类
        self.DEVICE = device  # 指定是在gpu还是cpu上
        self.hidden_size = hidden_size  # 控制器中隐藏单元的数量
        # 搜索长度为搜索空间的大小:2  # num_steps = max_layer * length_search_space= 4 * 2
        self.length_search = len(search_space)  # num_steps = max_layer * length_search_space
        self.list_length = [len(space) for space in search_space.values()]  # 搜索空间中每个类别的列表长度:[隐藏单元数量的种类, 激活函数种类]:[4, 3]
        self.max_layer = max_layer  # 最大层数:4
        # 总层数:生成1到4之间的随机整数,不包括4,维度为1*1, 类型为tensor,然后用item读取tensor值,最终返回一个int
        self.total_layer = torch.randint(1, self.max_layer, (1,)).item()
        # 在列表中保存子模块
        self.lstm = nn.ModuleList()
        self.fc = nn.ModuleList()

        # 添加LSTM子模块,input_size = 输入大小(输入x中预期特征的数量,这里是激活函数种类3个),hidden_size = 隐藏特征数量(隐藏状态h的特征数量)
        self.lstm.append(nn.LSTMCell(self.list_length[-1], self.hidden_size).to(self.DEVICE))
        # 继续添加LSTM子模块,最终的lstm模块包含两层LSTM,第一层输入特征为激活函数类别个数,第二层为隐藏单元个数,对输入序列应用LSTM RNN
        for i in range(1, self.length_search):
            self.lstm.append(nn.LSTMCell(self.list_length[i - 1], self.hidden_size).to(self.DEVICE))

        # 添加self.length_search层(2层)全连接层
        for i in range(0, self.length_search):
            # linear参数:输入特征大小,输出特征大小。作用:对传入数据应用线性变换
            self.fc.append(nn.Linear(self.hidden_size, self.list_length[i]).to(self.DEVICE))

    # 这个方法是用来初始化隐藏状态(h_t)和单元状态(c_t)的。
    # 这些状态是 LSTM 的内部状态,用来记录和处理序列信息。得到的状态矩阵的形状是 (1, self.hidden_size),并且他们都被初始化为全0.
    def init_hidden(self):
        h_t = torch.zeros(1, self.hidden_size, dtype=torch.float, device=self.DEVICE)
        c_t = torch.zeros(1, self.hidden_size, dtype=torch.float, device=self.DEVICE)

        return h_t, c_t

    # 前向传播
    def forward(self, input):
        # outputs用来存放搜索空间中每个类别内容的概率分布。这个字典的每一个键值对表示搜索空间中的一个元素经过LSTM及全连接层处理后的输出序列。
        outputs = {}
        # 对self.length_search个隐藏层状态进行初始化。
        self.hidden = [self.init_hidden() for _ in range(self.length_search)]

        # 对于每一层来说
        for num_layer in range(self.max_layer):
            # 先按顺序遍历self.search_space的每个元素,这个元素在这里被命名为(key, val)。然后,取出与i索引相对应的隐藏状态(h_t, c_t),计算LSTM层的输出。
            for i, (key, val) in enumerate(self.search_space.items()):
                h_t, c_t = self.hidden[i]
                # lstm的输入是输入数据input和当前的隐藏状态 (h_t, c_t)。输出是新的隐藏状态和单元状态 (h_t, c_t),再赋值给与i索引对应的隐藏状态。
                h_t, c_t = self.lstm[i](input, (h_t, c_t))
                self.hidden[i] = (h_t, c_t)
                # 新的隐藏状态h_t经过全连接层(self.fc[i])计算,得到output。这个output又被作为下一个LSTM层的输入。
                output = self.fc[i](h_t)
                # print(output)
                input = output

                # 全连接层的输出被添加到outputs字典的对应键key下。如果key在outputs中不存在,那么就创建一个新的键值对;否则,就在已有的键值对后添加新的输出。
                if key not in outputs.keys():
                    outputs[key] = [output]
                else:
                    outputs[key].extend([output])

        # print(outputs)`

        # for _ in range(self.length_search):
        #     h_t, c_t = self.hidden[i]
        #     h_t.detach_()
        #     c_t.detach_()
        #     self.hidden[i] = (h_t, c_t)

        # 整理outputs,将每一个 key 对应的一系列 tensor 堆叠起来,并压缩掉所有不必要的维度,让每个 key 对应一个形状更加整洁、方便处理的 tensor。
        for i, (key, val) in enumerate(outputs.items()):
            outputs[key] = torch.stack(outputs[key]).squeeze(1)

        return outputs


# 神经网络架构搜索部分,输出一个搜索完毕的DNN模型model
class NASModel(nn.Module):
    def __init__(self, architectures, input_size, output_size):
        super(NASModel, self).__init__()
        self.architectures = architectures  # 搜索空间的类别及其采取的动作
        # {'hidden_units': tensor([32, 64, 64,  8], device='cuda:0'),
        # 'activation': tensor([1, 2, 0, 1], device='cuda:0')}
        self.length_layers = len(self.architectures['hidden_units'])    # 隐藏层单元的长度,及隐藏层数量
        self.output_size = output_size  # 输入大小784,输出大小10

        layers = []
        # 对每个隐藏层而言,将每个隐藏层的单元个数和激活函数赋给每层
        for layer in range(self.length_layers):
            hidden_units = self.architectures['hidden_units'][layer].item()
            activation = self.architectures['activation'][layer].item()
            # print(activation)

            if activation == 0:
                activation = nn.ReLU()
            elif activation == 1:
                activation = nn.Tanh()
            elif activation == 2:
                activation = nn.Sigmoid()
            # 对于第一层,输入为输入大小,输出为第一层隐藏单元数量
            if layer == 0:
                layers.append(nn.Linear(input_size, hidden_units))
                layers.append(activation)
            # 对于其他各层,输入为上一层隐藏单元数量,输出为当前隐藏层数量
            else:
                layers.append(nn.Linear(self.architectures['hidden_units'][layer - 1].item(),
                                        hidden_units))
                layers.append(activation)
        # 最后一个线性层,输入为最后一层单元数量,输出为输出大小
        layers.append(nn.Linear(self.architectures['hidden_units'][self.length_layers - 1].item(), self.output_size))
        # 加上softmax层,Softmax函数是将多分类输出值转换为概率分布的函数,它可以将输出值范围映射到 [0, 1],并且约束输出值的和为1
        layers.append(nn.Softmax(dim=1))

        # print(layers)
        # 用sequential将layers构建到一起
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        # 输入经过创建好的模型的前向传播,返回输出
        return self.model(x)


def play_episode(controller):
    # 定义字典,用来储存搜索的结果,隐藏单元数和激活函数种类
    architecture = {}  # 搜索空间中类别采用的动作,如{'hidden_units': tensor([64,  8,  8, 64]), 'activation': tensor([0, 0, 1, 1])}
    # 每一个动作的概率都被计算并添加到episode_total_log_probs字典中。
    episode_total_log_probs = {}
    # {'hidden_units': tensor([[-2.5390,  0.0000,  0.0000, -3.0257]], grad_fn=),
    # 'activation': tensor([[-2.1069, -2.2470,  0.0000]], grad_fn=)}
    input = torch.tensor([[1.0, 2.0, 3.0]]).to(device)

    # 控制器的输出,是一个字典,这个字典的每一个键值对表示搜索空间中的一个元素经过LSTM及全连接层处理后的输出序列,是一个概率。
    # 通过输入数据input计算得到历次操作的概率分布
    episode_logits = controller(input)
    # episode_logits: {'hidden_units': tensor([[ 0.0253, -0.0500, -0.0533, -0.0923],
    #         [-0.0050, -0.0620, -0.0510, -0.0730],
    #         [-0.0128, -0.0644, -0.0466, -0.0669],
    #         [-0.0160, -0.0668, -0.0416, -0.0650]], grad_fn=),
    #         'activation': tensor([[-0.0373,  0.0469, -0.0961],
    #         [-0.0274,  0.0363, -0.0952],
    #         [-0.0214,  0.0308, -0.0930],
    #         [-0.0175,  0.0282, -0.0909]], grad_fn=)}

    # 对于搜索空间的每一个元素
    # 隐藏单元的数量, 激活函数的类别
    for key, space in search_space.items():
        logits = episode_logits[key]

        # 从对应概率中通过采样取出一个动作,创建由probs或logits参数化的分类分布(但不能同时使用两者)。
        action_index = Categorical(logits=logits).sample().unsqueeze(0)
        # action_index: tensor([[0, 0, 3, 0]])
        # 为每一个动作创建一个动作空间,其中的值为搜索空间的键对应的候选值
        actions_space = torch.tensor([space] * controller.total_layer).to(device)
        # actions_space: tensor([[ 8, 16, 32, 64],
        #         [ 8, 16, 32, 64],
        #         [ 8, 16, 32, 64]])
        # 根据action_index从动作空间中取出对应的动作。第一次输出隐藏单元数量的action,第二次输出激活函数类别的tensor
        action = torch.gather(actions_space, 1, action_index).to(device)
        # action:循环的第一次输出为tensor([[ 8,  8, 64,  8]]),第二次为tensor([[0, 0, 0, 2]])

        # 每一个动作都被添加到architecture字典
        architecture[key] = action.squeeze(0)

        # print(action_index.int().squeeze(1))
        # 代码使用了one_hot函数和log_softmax函数来计算每个操作的one-hot向量和对数概率。
        mask = one_hot(action_index, num_classes=len(space))
        episode_log_probs = torch.sum(mask.float() * log_softmax(logits, dim=1), dim=1)
        # episode_log_probs: tensor([[-3.3254,  0.0000, -1.1439]], grad_fn=)
        # 每一个动作的对数概率都被计算并添加到episode_total_log_probs字典中。
        episode_total_log_probs[key] = episode_log_probs
        # {'hidden_units': tensor([[-4.1869,  0.0000,  0.0000, -1.3777]], grad_fn=),
        # 'activation': tensor([[-3.3254,  0.0000, -1.1439]], grad_fn=)}

    # 生成一个DNN网络
    model = NASModel(architecture, 784, 10).to(device)
    print(f'{model}\n')

    # 定义损失函数CrossEntropyLoss
    criterion = nn.CrossEntropyLoss()
    # 使用optim时,必须构造一个优化器对象,它将保存当前状态,并将根据计算的梯度更新参数。
    # 要构造一个Optimizer,你必须给它一个可迭代对象,其中包含要优化的参数(都应该是变量s)。然后,您可以指定特定于优化器的选项,如学习率、权重衰减等。
    # 定义优化器SGD
    optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9)

    # 训练DNN
    for epoch in range(10):
        model.train()
        running_loss = 0.0
        for i, data in enumerate(train_loader):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            inputs, labels = inputs.to(device), labels.to(device)

            # 在每次更新权重前,先清零所有被优化变量(通常是模型的参数)的梯度
            optimizer.zero_grad()

            # forward + backward + optimize
            inputs = inputs.view(-1, 784)

            outputs = model(inputs)

            loss = criterion(outputs, labels)   # 计算损失
            loss.backward()     # 反向传播
            optimizer.step()    # 用优化器优化参数

            # print statistics
            running_loss += loss.item()

        running_loss /= len(train_loader)
        print(f"Epoch {epoch + 1}: Loss = {running_loss}")

    model.eval()    # 设置模式为评估,开始测试步骤
    # 以测试集上的损失或者正确率来判断模型是否训练的好
    # 验证集与测试集不一样的,验证集是在训练中用的,反正模型过拟合,测试集是在模型完全训练好后使用的
    # 验证集用来调整超参数,相当于真题,测试集是考试
    correct = 0     # 验证集准确率
    total = 0   # 验证集数量
    # 只需要进行测试,不需要对梯度进行调整,所以设置下面这行
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images.view(-1, 784))
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    acc = 100 * correct / total
    print('Accuracy of the network on the 10000 test images: {}'.format(acc))

    # compute the reward
    reward = acc

    reward = torch.tensor(reward, device=device).detach()

    sum_weighted_log_probs = {}

    sum_weighted_log_probs['hidden_units'] = torch.sum(-episode_total_log_probs['hidden_units'] * reward).unsqueeze(0)
    sum_weighted_log_probs['activation'] = torch.sum(-episode_total_log_probs['activation'] * reward).unsqueeze(0)

    sum_weighted_loss = sum_weighted_log_probs['hidden_units'] + \
                        sum_weighted_log_probs['activation']

    return sum_weighted_loss, episode_total_log_probs, reward


# 0: nn.ReLU, 1: nn.Tanh, 2: nn.Sigmoid
search_space = {
    "hidden_units": [8, 16, 32, 64],
    "activation": [0, 1, 2]
}

device = 'cuda' if torch.cuda.is_available() else 'cpu'
controller = Controller(search_space, max_layer=4, device=device)
input = torch.tensor([[1.0, 2.0, 3.0]]).to(device)
outputs = controller(input)

optimizer = optim.Adam(controller.parameters(), lr=0.001)
total_rewards = []

controller.train()
for epoch in range(10):

    optimizer.zero_grad()
    epoch_log_probs = torch.empty((0,), device=device)

    for i in range(3):
        (sum_weighted_loss, episode_logits,
         reward) = play_episode(controller)
        print(sum_weighted_loss)
        epoch_log_probs = torch.cat((epoch_log_probs, sum_weighted_loss))

    loss = torch.mean(epoch_log_probs)

    loss.backward()
    optimizer.step()

    # for name, param in controller.named_parameters():
    #   print(name, param.grad)

    print(f"Loss in {epoch} is: {loss}")

你可能感兴趣的:(DRL,深度学习,pytorch,人工智能)