本系列采用GitHub上的tensorflow实现的ENAS。
github:https://github.com/melodyguan/enas
首先是操作集文件common_ops.py。
import numpyas np
import tensorflowas tf
def lstm(x, prev_c, prev_h, w):
ifog = tf.matmul(tf.concat([x, prev_h],axis=1), w)
i, f, o, g = tf.split(ifog,4,axis=1)
i = tf.sigmoid(i)
f = tf.sigmoid(f)
o = tf.sigmoid(o)
g = tf.tanh(g)
next_c = i * g + f * prev_c
next_h = o * tf.tanh(next_c)
return next_c, next_h
def stack_lstm(x, prev_c, prev_h, w):
next_c, next_h = [], []
for layer_id, (_c, _h, _w) in enumerate(zip(prev_c, prev_h, w)):
inputs = xif layer_id ==0 else next_h[-1]
curr_c, curr_h = lstm(inputs, _c, _h, _w)
next_c.append(curr_c)
next_h.append(curr_h)
return next_c, next_h
def create_weight(name, shape, initializer=None, trainable=True, seed=None):
if initializeris None:
initializer = tf.contrib.keras.initializers.he_normal(seed=seed)
return tf.get_variable(name, shape,initializer=initializer,trainable=trainable)
def create_bias(name, shape, initializer=None):
if initializeris None:
initializer = tf.constant_initializer(0.0,dtype=tf.float32)
return tf.get_variable(name, shape,initializer=initializer)
定义一段基本的LSTM程序。
def lstm(x, prev_c, prev_h, w):
ifog = tf.matmul(tf.concat([x, prev_h],axis=1), w)
i, f, o, g = tf.split(ifog,4,axis=1)
i = tf.sigmoid(i)
f = tf.sigmoid(f)
o = tf.sigmoid(o)
g = tf.tanh(g)
next_c = i * g + f * prev_c
next_h = o * tf.tanh(next_c)
return next_c, next_h
定义层叠的LSTM,即把多个LSTM接起来。这里的prev_c,prev_h,w都是List。
具体来说,就是先建立了空的LSTM中间状态和输出列表。然后对每一层执行如下:
如果是第0层,那么输入设置为x,如果不是,那么输入为上一个的输出。next_h[-1]为列表的倒数第一个元素。
用刚才的LSTM函数求出当前的中间状态c和输出h。
用新建的next_c和next_h的List收集curr_c和curr_h。
def stack_lstm(x, prev_c, prev_h, w):
next_c, next_h = [], []
for layer_id, (_c, _h, _w) in enumerate(zip(prev_c, prev_h, w)):
//enumerate将一个可迭代变量变成编号加元素的形式
//zip将一个
inputs = x if layer_id ==0 else next_h[-1]
curr_c, curr_h = lstm(inputs, _c, _h, _w)
next_c.append(curr_c)
next_h.append(curr_h)
return next_c, next_h
新建权重。
在没有initializer的情况下,给出he normal的initializer。其中he normal参考https://www.cv-foundation.org/openaccess/content_iccv_2015/html/He_Delving_Deep_into_ICCV_2015_paper.html。
返回一个形状为shape的矩阵。
def create_weight(name, shape, initializer=None, trainable=True, seed=None):
if initializeris None:
initializer = tf.contrib.keras.initializers.he_normal(seed=seed)
return tf.get_variable(name, shape,initializer=initializer,trainable=trainable)
新建偏置。
细节和上面类似。
def create_bias(name, shape, initializer=None):
if initializeris None:
initializer = tf.constant_initializer(0.0,dtype=tf.float32)
return tf.get_variable(name, shape,initializer=initializer)