Torch7学习(七)——Neural-Style代码解析

torch7学习(一)——Tensor
Torch7学习(二) —— Torch与Matlab的语法对比
Torch7学习(三)——学习神经网络包的用法(1)
Torch7学习(四)——学习神经网络包的用法(2)
Torch7学习(五)——学习神经网路包的用法(3)
Torch7学习(六)——学习神经网络包的用法(4)——利用optim进行训练
Torch7学习(七)——Neural-Style代码解析

Neural-style用的可能是最容易入门的代码之一吧。比较简单,写的很清晰。只涉及到最简单的网络构建方式,同时也是最基本最重要的写法。直接看neural-style的代码吧。代码附有大量的注释,而在最后也有一定的分析。
值得注意的是,最新的该论文的代码已经是多GPU版本的了。可以指定不同层放入不同的GPU进行训练。,主要是加入了 Controlling Perceptual Factors in Neural Style Transfer代码的支持。个人觉得下面的版本比较简单,看懂了再看最新的版本的吧。https://github.com/jcjohnson/neural-style

代码

require 'torch'
require 'nn'
require 'image'
require 'optim'

require 'loadcaffe'

--------------------------------------------------------------------------------

local cmd = torch.CmdLine()

-- Basic options
cmd:option('-style_image', 'examples/inputs/starry_night.jpg',
           'Style target image')
cmd:option('-style_blend_weights', 'nil')
cmd:option('-content_image', 'examples/inputs/5.jpg',
           'Content target image')
cmd:option('-image_size', 512, 'Maximum height / width of generated image')
cmd:option('-gpu', 0, 'Zero-indexed ID of the GPU to use; for CPU mode set -gpu = -1')

-- Optimization options
cmd:option('-content_weight', 5e0)
cmd:option('-style_weight', 1e2)
cmd:option('-tv_weight', 1e-3)
cmd:option('-num_iterations', 1000)
cmd:option('-normalize_gradients', false)
cmd:option('-init', 'random', 'random|image')
cmd:option('-optimizer', 'lbfgs', 'lbfgs|adam')
cmd:option('-learning_rate', 1e1)

-- Output options
cmd:option('-print_iter', 50)
cmd:option('-save_iter', 100)
cmd:option('-output_image', 'out.png')

-- Other options
cmd:option('-style_scale', 1.0)
cmd:option('-pooling', 'max', 'max|avg')
cmd:option('-proto_file', 'models/VGG_ILSVRC_19_layers_deploy.prototxt')
cmd:option('-model_file', 'models/VGG_ILSVRC_19_layers.caffemodel')
cmd:option('-backend', 'nn', 'nn|cudnn|clnn')
cmd:option('-cudnn_autotune', false)
cmd:option('-seed', -1)

cmd:option('-content_layers', 'relu4_2', 'layers for content')
cmd:option('-style_layers', 'relu1_1,relu2_1,relu3_1,relu4_1,relu5_1', 'layers for style')

local function main(params)
  if params.gpu >= 0 then
    if params.backend ~= 'clnn' then
      require 'cutorch'
      require 'cunn'
      cutorch.setDevice(params.gpu + 1)
    else
      require 'clnn'
      require 'cltorch'
      cltorch.setDevice(params.gpu + 1)
    end
  else
    params.backend = 'nn'
  end

  if params.backend == 'cudnn' then
    require 'cudnn'
    if params.cudnn_autotune then
      cudnn.benchmark = true
    end
    cudnn.SpatialConvolution.accGradParameters = nn.SpatialConvolutionMM.accGradParameters -- ie: nop
  end

  local loadcaffe_backend = params.backend
  if params.backend == 'clnn' then loadcaffe_backend = 'nn' end
  local cnn = loadcaffe.load(params.proto_file, params.model_file, loadcaffe_backend):float()
  if params.gpu >= 0 then
    if params.backend ~= 'clnn' then
      cnn:cuda()
    else
      cnn:cl()
    end
  end

  local content_image = image.load(params.content_image, 3)
  content_image = image.scale(content_image, params.image_size, 'bilinear')
  local content_image_caffe = preprocess(content_image):float()

  local style_size = math.ceil(params.style_scale * params.image_size)
  local style_image_list = params.style_image:split(',')
  local style_images_caffe = {}
  for _, img_path in ipairs(style_image_list) do
    local img = image.load(img_path, 3)
    img = image.scale(img, style_size, 'bilinear')
    local img_caffe = preprocess(img):float()
    table.insert(style_images_caffe, img_caffe)
  end

  -- Handle style blending weights for multiple style inputs
  local style_blend_weights = nil
  if params.style_blend_weights == 'nil' then
    -- Style blending not specified, so use equal weighting
    style_blend_weights = {}
    -- #表示长度
    for i = 1, #style_image_list do
      table.insert(style_blend_weights, 1.0)
    end
  else
    style_blend_weights = params.style_blend_weights:split(',')
    assert(#style_blend_weights == #style_image_list,
      '-style_blend_weights and -style_images must have the same number of elements')
  end
  -- Normalize the style blending weights so they sum to 1
  local style_blend_sum = 0
  for i = 1, #style_blend_weights do
    style_blend_weights[i] = tonumber(style_blend_weights[i])
    style_blend_sum = style_blend_sum + style_blend_weights[i]
  end
  for i = 1, #style_blend_weights do
    style_blend_weights[i] = style_blend_weights[i] / style_blend_sum
  end


  if params.gpu >= 0 then
    if params.backend ~= 'clnn' then
      content_image_caffe = content_image_caffe:cuda()
      for i = 1, #style_images_caffe do
        style_images_caffe[i] = style_images_caffe[i]:cuda()
      end
    else
      content_image_caffe = content_image_caffe:cl()
      for i = 1, #style_images_caffe do
        style_images_caffe[i] = style_images_caffe[i]:cl()
      end
    end
  end

  local content_layers = params.content_layers:split(",")
  local style_layers = params.style_layers:split(",")

  -- Set up the network, inserting style and content loss modules
  local content_losses, style_losses = {}, {}
  local next_content_idx, next_style_idx = 1, 1
  local net = nn.Sequential()
  if params.tv_weight > 0 then
    local tv_mod = nn.TVLoss(params.tv_weight):float()
    if params.gpu >= 0 then
      if params.backend ~= 'clnn' then
        tv_mod:cuda()
      else
        tv_mod:cl()
      end
    end
    net:add(tv_mod)
  end
  for i = 1, #cnn do
    if next_content_idx <= #content_layers or next_style_idx <= #style_layers then
      local layer = cnn:get(i)
      local name = layer.name
      local layer_type = torch.type(layer)
      local is_pooling = (layer_type == 'cudnn.SpatialMaxPooling' or layer_type == 'nn.SpatialMaxPooling')
      if is_pooling and params.pooling == 'avg' then
        assert(layer.padW == 0 and layer.padH == 0)
        local kW, kH = layer.kW, layer.kH
        local dW, dH = layer.dW, layer.dH
        local avg_pool_layer = nn.SpatialAveragePooling(kW, kH, dW, dH):float()
        if params.gpu >= 0 then
          if params.backend ~= 'clnn' then
            avg_pool_layer:cuda()
          else
            avg_pool_layer:cl()
          end
        end
        local msg = 'Replacing max pooling at layer %d with average pooling'
        print(string.format(msg, i))
        net:add(avg_pool_layer)
      else
        -- 如果不是pooling层,直接add这一层
        net:add(layer)
      end
      if name == content_layers[next_content_idx] then
        print("Setting up content layer", i, ":", layer.name)
        -- 如果这一层是content的话,那么就要加入loss_module,而loss_module则需要:content_weight, target, norm来进行初始化类。
        -- target就是“输入”经过前面网络所有层得到的输出。因此target = net:forward(content_image_caffe):clone.
        local target = net:forward(content_image_caffe):clone()
        local norm = params.normalize_gradients
        local loss_module = nn.ContentLoss(params.content_weight, target, norm):float()
        if params.gpu >= 0 then
          if params.backend ~= 'clnn' then
            loss_module:cuda()
          else
            loss_module:cl()
          end
        end
        net:add(loss_module)
        table.insert(content_losses, loss_module)
        next_content_idx = next_content_idx + 1
      end

      if name == style_layers[next_style_idx] then
        print("Setting up style layer  ", i, ":", layer.name)
        local gram = GramMatrix():float()
        if params.gpu >= 0 then
          if params.backend ~= 'clnn' then
            gram = gram:cuda()
          else
            gram = gram:cl()
          end
        end
        local target = nil
        -- style_images_caffe是众多style_images的Tensor组成的table。
        -- 因此要将每个style_images_caffe[i]送入net中得到相应的输出。每个输出要经过gram处理,
        -- 得到相应的grams值,每张style图片所附的权值得到target
        for i = 1, #style_images_caffe do
          local target_features = net:forward(style_images_caffe[i]):clone()
          local target_i = gram:forward(target_features):clone()
          target_i:div(target_features:nElement())
          target_i:mul(style_blend_weights[i])
          if i == 1 then
            target = target_i
          else
            target:add(target_i)
          end
        end

        local norm = params.normalize_gradients
        local loss_module = nn.StyleLoss(params.style_weight, target, norm):float()
        if params.gpu >= 0 then
          if params.backend ~= 'clnn' then
            loss_module:cuda()
          else
            loss_module:cl()
          end
        end
        net:add(loss_module)
        table.insert(style_losses, loss_module)
        next_style_idx = next_style_idx + 1
      end
    end
  end

  -- We don't need the base CNN anymore, so clean it up to save memory.
  cnn = nil
  for i=1,#net.modules do
    local module = net.modules[i]
    if torch.type(module) == 'nn.SpatialConvolutionMM' then
        -- remove these, not used, but uses gpu memory
        module.gradWeight = nil
        module.gradBias = nil
    end
  end
  collectgarbage()

  -- Initialize the image
  if params.seed >= 0 then
    torch.manualSeed(params.seed)
  end
  local img = nil
  if params.init == 'random' then
    img = torch.randn(content_image:size()):float():mul(0.001)
  elseif params.init == 'image' then
    img = content_image_caffe:clone():float()
  else
    error('Invalid init type')
  end
  if params.gpu >= 0 then
    if params.backend ~= 'clnn' then
      img = img:cuda()
    else
      img = img:cl()
    end
  end

  -- Run it through the network once to get the proper size for the gradient
  -- All the gradients will come from the extra loss modules, so we just pass
  -- zeros into the top of the net on the backward pass.
  local y = net:forward(img)
  local dy = img.new(#y):zero()

  -- Declaring this here lets us access it in maybe_print
  local optim_state = nil
  if params.optimizer == 'lbfgs' then
    optim_state = {
      maxIter = params.num_iterations,
      verbose=true,
    }
  elseif params.optimizer == 'adam' then
    optim_state = {
      learningRate = params.learning_rate,
    }
  else
    error(string.format('Unrecognized optimizer "%s"', params.optimizer))
  end

  local function maybe_print(t, loss)
    local verbose = (params.print_iter > 0 and t % params.print_iter == 0)
    if verbose then
      print(string.format('Iteration %d / %d', t, params.num_iterations))
      for i, loss_module in ipairs(content_losses) do
        print(string.format('  Content %d loss: %f', i, loss_module.loss))
      end
      for i, loss_module in ipairs(style_losses) do
        print(string.format('  Style %d loss: %f', i, loss_module.loss))
      end
      print(string.format('  Total loss: %f', loss))
    end
  end

  local function maybe_save(t)
    local should_save = params.save_iter > 0 and t % params.save_iter == 0
    should_save = should_save or t == params.num_iterations
    if should_save then
      local disp = deprocess(img:double())
      disp = image.minmax{tensor=disp, min=0, max=1}
      local filename = build_filename(params.output_image, t)
      if t == params.num_iterations then
        filename = params.output_image
      end
      image.save(filename, disp)
    end
  ed

  -- Function to evaluate loss and gradient. We run the net forward and
  -- backward to get the gradient, and sum up losses from the loss modules.
  -- optim.lbfgs internally handles iteration and calls this fucntion many
  -- times, so we manually count the number of iterations to handle printing
  -- and saving intermediate results.
  local num_calls = 0
  local function feval(x)
    num_calls = num_calls + 1
    -- net:forward(x)会让content_module以及style_module各层计算loss
    net:forward(x)
    -- 由于并没有groundtruth的图。所以直接dy赋值为全是0的tensor
    local grad = net:updateGradInput(x, dy)
    -- loss包括content层的以及style层的,然后让这些loss加起来,让loss最小即可。
    local loss = 0
    for _, mod in ipairs(content_losses) do
      loss = loss + mod.loss
    end
    for _, mod in ipairs(style_losses) do
      loss = loss + mod.loss
    end
    maybe_print(num_calls, loss)
    maybe_save(num_calls)

    collectgarbage()
    -- 返回的第一个参数是loss,是要最小化的目标,而传入feval的参数是要优化的参数。
    -- optim.lbfgs expects a vector for gradients
    return loss, grad:view(grad:nElement())
  end

  -- Run optimization.
  if params.optimizer == 'lbfgs' then
    print('Running optimization with L-BFGS')
    local x, losses = optim.lbfgs(feval, img, optim_state)
  elseif params.optimizer == 'adam' then
    print('Running optimization with ADAM')
    for t = 1, params.num_iterations do
      local x, losses = optim.adam(feval, img, optim_state)
    end
  end
end


-- 下面是用到的函数



function build_filename(output_image, iteration)
  local ext = paths.extname(output_image)
  local basename = paths.basename(output_image, ext)
  local directory = paths.dirname(output_image)
  return string.format('%s/%s_%d.%s',directory, basename, iteration, ext)
end


-- Preprocess an image before passing it to a Caffe model.
-- We need to rescale from [0, 1] to [0, 255], convert from RGB to BGR,
-- and subtract the mean pixel.
function preprocess(img)
  local mean_pixel = torch.DoubleTensor({103.939, 116.779, 123.68})
  local perm = torch.LongTensor{3, 2, 1}
  img = img:index(1, perm):mul(256.0)
  mean_pixel = mean_pixel:view(3, 1, 1):expandAs(img)
  img:add(-1, mean_pixel)
  return img
end


-- Undo the above preprocessing.
function deprocess(img)
  local mean_pixel = torch.DoubleTensor({103.939, 116.779, 123.68})
  mean_pixel = mean_pixel:view(3, 1, 1):expandAs(img)
  img = img + mean_pixel
  local perm = torch.LongTensor{3, 2, 1}
  img = img:index(1, perm):div(256.0)
  return img
end


-- Define an nn Module to compute content loss in-place
local ContentLoss, parent = torch.class('nn.ContentLoss', 'nn.Module')

function ContentLoss:__init(strength, target, normalize)
  parent.__init(self)
  self.strength = strength
  self.target = target
  self.normalize = normalize or false
  self.loss = 0
  self.crit = nn.MSECriterion()
end


-- 标准写法: 

function ContentLoss:updateOutput(input)
  if input:nElement() == self.target:nElement() then
  -- contentLoss层是在类似relu1_1之后的,主要是用来计算loss的。这个target当然是relu1_1传出的东西。
  -- 也就是net:forward(content_image_caffe):clone,这里的net只加到了类似relu1_1层。

  -- 一般在自定义的层会有updateOutput和updateGradInput两个函数的改写。
  -- 注意: 只有  更新输入/输出 两个函数。没有更新权值!
  -- 显然,更新输出只要传入输入就行,而updateGradInput当然要传入input和dLoss_doutput,才能进行反向传播啊!

  -- 因为这一层是内容损失层,因此updateOutput就是self.loss = self.crit:forward(input, self.target) * self.strength啊
    self.loss = self.crit:forward(input, self.target) * self.strength
  else
    print('WARNING: Skipping content loss')
  end
  -- 由于这里还是要进行output的,但是这一层并不改变输出,因此 self.output = input;
  self.output = input
  return self.output
end

--想想怎么自定义写updateGradInput的吧,显然传入input和gradOutput没啥说的。
--这个函数主要是计算dLoss_dInput,由于这里是contentloss层,因此是self.crit:backward(input,self.target)
--然后对返回的gradInput进行自定义的修改。
function ContentLoss:updateGradInput(input, gradOutput)
  if input:nElement() == self.target:nElement() then
    self.gradInput = self.crit:backward(input, self.target)
  end
  if self.normalize then
    self.gradInput:div(torch.norm(self.gradInput, 1) + 1e-8)
  end
  self.gradInput:mul(self.strength)
  self.gradInput:add(gradOutput)
  return self.gradInput
end

-- Returns a network that computes the CxC Gram matrix from inputs
-- of size C x H x W
function GramMatrix()
  local net = nn.Sequential()
  -- s
  net:add(nn.View(-1):setNumInputDims(2))
  local concat = nn.ConcatTable()
  concat:add(nn.Identity())
  concat:add(nn.Identity())
  net:add(concat)
  net:add(nn.MM(false, true))
  return net
end


-- Define an nn Module to compute style loss in-place
local StyleLoss, parent = torch.class('nn.StyleLoss', 'nn.Module')

function StyleLoss:__init(strength, target, normalize)
  parent.__init(self)
  self.normalize = normalize or false
  self.strength = strength
  self.target = target
  self.loss = 0

  self.gram = GramMatrix()
  self.G = nil
  self.crit = nn.MSECriterion()
end

function StyleLoss:updateOutput(input)
  self.G = self.gram:forward(input)
  self.G:div(input:nElement())
  self.loss = self.crit:forward(self.G, self.target)
  self.loss = self.loss * self.strength
  self.output = input
  return self.output
end

function StyleLoss:updateGradInput(input, gradOutput)
  local dG = self.crit:backward(self.G, self.target)
  dG:div(input:nElement())
  self.gradInput = self.gram:backward(input, dG)
  if self.normalize then
    self.gradInput:div(torch.norm(self.gradInput, 1) + 1e-8)
  end
  self.gradInput:mul(self.strength)
  self.gradInput:add(gradOutput)
  return self.gradInput
end


local TVLoss, parent = torch.class('nn.TVLoss', 'nn.Module')

function TVLoss:__init(strength)
  parent.__init(self)
  self.strength = strength
  self.x_diff = torch.Tensor()
  self.y_diff = torch.Tensor()
end

function TVLoss:updateOutput(input)
  self.output = input
  return self.output
end

-- TV loss backward pass inspired by kaishengtai/neuralart
function TVLoss:updateGradInput(input, gradOutput)
  self.gradInput:resizeAs(input):zero()
  local C, H, W = input:size(1), input:size(2), input:size(3)
  self.x_diff:resize(3, H - 1, W - 1)
  self.y_diff:resize(3, H - 1, W - 1)
  self.x_diff:copy(input[{{}, {1, -2}, {1, -2}}])
  self.x_diff:add(-1, input[{{}, {1, -2}, {2, -1}}])
  self.y_diff:copy(input[{{}, {1, -2}, {1, -2}}])
  self.y_diff:add(-1, input[{{}, {2, -1}, {1, -2}}])
  self.gradInput[{{}, {1, -2}, {1, -2}}]:add(self.x_diff):add(self.y_diff)
  self.gradInput[{{}, {1, -2}, {2, -1}}]:add(-1, self.x_diff)
  self.gradInput[{{}, {2, -1}, {1, -2}}]:add(-1, self.y_diff)
  self.gradInput:mul(self.strength)
  self.gradInput:add(gradOutput)
  return self.gradInput
end


local params = cmd:parse(arg)
main(params)

代码分析

  • 网络到底有哪些层
    这个net是最后并没有吧VGG的19层全部拿来,而是截取!比如对于
cmd:option('-content_layers', 'relu4_2', 'layers for content')
cmd:option('-style_layers', 'relu1_1,relu2_1,relu3_1,relu4_1,relu5_1', 'layers for style')

那么net的最后一层就是relu5_1。当然这么说是有问题的,因为代码是将VGG的模型一层一层拿来,如果是普通层,直接加,如果是MaxPooling,则换成AvargePooling,如果是content层,加入contentLoss的loss_module层。如果是style层,那么加入styleLoss的loss_module。对于styleloss的loss_module,每一个是由gramNet构成的。

  • 自定义层
    需要自定义特殊类,比如contentLoss层。对于自定义层,至少要有3个重载,__init,updateOutput和updateGradInput。

    • updateOutput中是对该层如果输入input,则该层输出的output是什么。比如在这里自定义contentloss层,那么就要根据实际情况,进行self.loss = self.crit:forward(input)。然后还要更新self.output。
    • updateGradInput(input, gradOutput), 这个函数主要是计算dLoss_dInput,由于这里是contentloss层,因此是self.crit:backward(input,self.target),然后对返回的gradInput进行自定义的修改。
  • Gram矩阵
    GramMatrix是构建一个输入View层,好让输入复制成两份,然后构建ConcatTable。每份都加入一个nn.Identity(),net加入concat后,再将两个输出矩阵相乘。其中一个转置。返回自定义的gramNet。将gramNet看成一个整体会有利于理解。

function GramMatrix()
  local net = nn.Sequential()
  -- s
  net:add(nn.View(-1):setNumInputDims(2))
  local concat = nn.ConcatTable()
  concat:add(nn.Identity())
  concat:add(nn.Identity())
  net:add(concat)
  net:add(nn.MM(false, true))
  return net
end
  • Lstyle Glij 的生成
    net1…net5代表网络从第一层到relu1_1,,,relu5_1的那前半部分网络。那么我们要得到 Lstyle , Lstyle=Ll=0wlEl ,而 El=14N2lM2l(GlijAlij)2 ,而 Glij 就是各个style_images分别传入net1 ,…,net5再按照一定的权值加和得到的 Gnet1ij,...,Gnet5ij ,这些 Gnetxij 就是 netx 层所对应的target.
        local target = nil
        for i = 1, #style_images_caffe do
          local target_features = net:forward(style_images_caffe[i]):clone()
          local target_i = gram:forward(target_features):clone()
          target_i:div(target_features:nElement())
          target_i:mul(style_blend_weights[i])
          if i == 1 then
            target = target_i
          else
            target:add(target_i)
          end
        end

然后在 netx 后面加入styleLoss的loss_module.而loss_module的初始化是

local norm = params.normalize_gradients
local loss_module = nn.StyleLoss(params.style_weight, target, norm):float()

在StyleLoss中,__init中有:

...
self.gram = GramMatrix()
self.G = nil
self.crit = nn.MSECriterion()

在updateOutput中,还是比较容易理解的

function StyleLoss:updateOutput(input)
  self.G = self.gram:forward(input)
  self.G:div(input:nElement())
  self.loss = self.crit:forward(self.G, self.target)
  self.loss = self.loss * self.strength
  self.output = input
  return self.output
end

在updateGradInput中,self.G已经成了 netx 的真实输出(输入是随机噪声),目标是输出self.target(由style图片传到这一层的输出) 因此
self.crit:backward(self.G,self.target) 就可以得到dLoss_dG。然后就可以 self.gram:backward(input,dG) 得到gradInput。

function StyleLoss:updateGradInput(input, gradOutput)
--在updateOutput中,self.G已经成了gramNet的输出。因此dG由下式子得出。
  local dG = self.crit:backward(self.G, self.target)
  dG:div(input:nElement())
  self.gradInput = self.gram:backward(input, dG)
  if self.normalize then
    self.gradInput:div(torch.norm(self.gradInput, 1) + 1e-8)
  end
  self.gradInput:mul(self.strength)
  -- 这里为什么加起来啊??
  self.gradInput:add(gradOutput)
  return self.gradInput
end
  • Lstyle Alij 的生成。
    由于 Alij 是网络输入经过各个style的loss_module层时得到的。这一点是在feval函数中net:forward(x)中体现的。由于forward会调用updateOutput(input), 而backward会调用[gradInput] updateGradInput (input, gradOutput)和accGradParameters(input, gradOutput),因此,经过net:forward(x)之后,当传入自定义层(style_loss层和content_loss层,会调用重载的updateOutput)loss就有了,将loss全部加起来就是A
 local y = net:forward(img)
 local dy = img.new(#y):zero()
 local num_calls = 0
 local function feval(x)
   num_calls = num_calls + 1
   -- net:forward(x)会让content_module以及style_module各层计算loss
   net:forward(x)
   -- 由于并没有groundtruth的图。所以直接dy赋值为全是0的tensor
   local grad = net:updateGradInput(x, dy)
   -- loss包括content层的以及style层的,然后让这些loss加起来,让loss最小即可。
   local loss = 0
   for _, mod in ipairs(content_losses) do
     loss = loss + mod.loss
   end
   for _, mod in ipairs(style_losses) do
     loss = loss + mod.loss
   end
   maybe_print(num_calls, loss)
   maybe_save(num_calls)

   collectgarbage()
   -- 返回的第一个参数是loss,是要最小化的目标,而传入feval的参数是要优化的参数。
   -- optim.lbfgs expects a vector for gradients
   return loss, grad:view(grad:nElement())
 end

 -- Run optimization.
 if params.optimizer == 'lbfgs' then
   print('Running optimization with L-BFGS')
   local x, losses = optim.lbfgs(feval, img, optim_state)
 elseif params.optimizer == 'adam' then
   print('Running optimization with ADAM')
   for t = 1, params.num_iterations do
     local x, losses = optim.adam(feval, img, optim_state)
   end
 end
  • feval函数
    feval函数的参数x是需要优化的参数,这里是要优化网络的输入x,一般的网络是优化整个网络的权值w。feval中的返回必须是2个参数,第一个参数就是loss,第二个参数是dloss_dx。就是第一个返回参数是要优化的目标,第二个返回参数是优化目标对于输入参数的梯度。
  • 还是那个,为啥在自定义的updateGradInput中self.gradInput:add(gradOutput)呢??
    这个问题,代码的作者之一给出了解答
When we have multiple style/content loss modules, we would need to make sure that the gradients generated by one module are passed back all the way to the input layer. Hence if we don't add the gradient that is coming from the modules ahead of the current layer in the network, we would essentially use gradients generated by a loss module only until another loss module occurs and then they would be ignored after that, hence they would not be propagated all the way till the input layer as we expect them to do so.

以后可能会加上更加复杂的代码解析。主要是一些关于ContentTable的用法以及一些nn.Replicate, nn.CDivTable之类的,其实很多操作都可以定义层,用内置的层的操作来替代自己写具体操作。这是很不错的事情。

你可能感兴趣的:(Deep,Learning,Lua,Torch7入门教程)