- 关于
build
函数的疑惑,build
函数会在__call__
之前被调用一次,但是如果已经调用过了那么就不会被调用,看是否被调用的标志是self.built
是否为True
,如果是True
,那么下一次__call__
的时候就不会调用,所以我们调用官方的layer
s的时候是不需要额外的build
的。
- 只有自定义层的
__init__
需要super
了,而build
中只需要手动的设置self.built = True
即可。下面是官方最新的例子。
from keras import backend as K
from keras.layers import Layer
class MyLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
assert isinstance(input_shape, list)
# Create a trainable weight variable for this layer.
self.kernel = self.add_weight(name='kernel',
shape=(input_shape[0][1], self.output_dim),
initializer='uniform',
trainable=True)
super(MyLayer, self).build(input_shape) # Be sure to call this at the end
def call(self, x):
assert isinstance(x, list)
a, b = x
return [K.dot(a, self.kernel) + b, K.mean(b, axis=-1)]
def compute_output_shape(self, input_shape):
assert isinstance(input_shape, list)
shape_a, shape_b = input_shape
return [(shape_a[0], self.output_dim), shape_b[:-1]]
- 再来一个源码作为的例子,不用另外每调用一个
layer
就build
一次,也不用在build
里面super
了:
@tf_export('keras.layers.Dense')
class Dense(Layer):
"""Just your regular densely-connected NN layer.
`Dense` implements the operation:
`output = activation(dot(input, kernel) + bias)`
where `activation` is the element-wise activation function
passed as the `activation` argument, `kernel` is a weights matrix
created by the layer, and `bias` is a bias vector created by the layer
(only applicable if `use_bias` is `True`).
Note: if the input to the layer has a rank greater than 2, then
it is flattened prior to the initial dot product with `kernel`.
Example:
python
# as first layer in a sequential model:
model = Sequential()
model.add(Dense(32, input_shape=(16,)))
# now the model will take as input arrays of shape (*, 16)
# and output arrays of shape (*, 32)
# after the first layer, you don't need to specify
# the size of the input anymore:
model.add(Dense(32))
Arguments:
units: Positive integer, dimensionality of the output space.
activation: Activation function to use.
If you don't specify anything, no activation is applied
(ie. "linear" activation: `a(x) = x`).
use_bias: Boolean, whether the layer uses a bias vector.
kernel_initializer: Initializer for the `kernel` weights matrix.
bias_initializer: Initializer for the bias vector.
kernel_regularizer: Regularizer function applied to
the `kernel` weights matrix.
bias_regularizer: Regularizer function applied to the bias vector.
activity_regularizer: Regularizer function applied to
the output of the layer (its "activation")..
kernel_constraint: Constraint function applied to
the `kernel` weights matrix.
bias_constraint: Constraint function applied to the bias vector.
Input shape:
nD tensor with shape: `(batch_size, ..., input_dim)`.
The most common situation would be
a 2D input with shape `(batch_size, input_dim)`.
Output shape:
nD tensor with shape: `(batch_size, ..., units)`.
For instance, for a 2D input with shape `(batch_size, input_dim)`,
the output would have shape `(batch_size, units)`.
"""
def __init__(self,
units,
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
**kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(Dense, self).__init__(
activity_regularizer=regularizers.get(activity_regularizer), **kwargs)
self.units = int(units)
self.activation = activations.get(activation)
self.use_bias = use_bias
self.kernel_initializer = initializers.get(kernel_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.bias_constraint = constraints.get(bias_constraint)
self.supports_masking = True
self.input_spec = InputSpec(min_ndim=2)
def build(self, input_shape):
input_shape = tensor_shape.TensorShape(input_shape)
if input_shape[-1].value is None:
raise ValueError('The last dimension of the inputs to `Dense` '
'should be defined. Found `None`.')
self.input_spec = InputSpec(min_ndim=2,
axes={-1: input_shape[-1].value})
self.kernel = self.add_weight(
'kernel',
shape=[input_shape[-1].value, self.units],
initializer=self.kernel_initializer,
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint,
dtype=self.dtype,
trainable=True)
if self.use_bias:
self.bias = self.add_weight(
'bias',
shape=[self.units,],
initializer=self.bias_initializer,
regularizer=self.bias_regularizer,
constraint=self.bias_constraint,
dtype=self.dtype,
trainable=True)
else:
self.bias = None
self.built = True
def call(self, inputs):
inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
rank = common_shapes.rank(inputs)
if rank > 2:
# Broadcasting is required for the inputs.
outputs = standard_ops.tensordot(inputs, self.kernel, [[rank - 1], [0]])
# Reshape the output back to the original ndim of the input.
if not context.executing_eagerly():
shape = inputs.get_shape().as_list()
output_shape = shape[:-1] + [self.units]
outputs.set_shape(output_shape)
else:
outputs = gen_math_ops.mat_mul(inputs, self.kernel)
if self.use_bias:
outputs = nn.bias_add(outputs, self.bias)
if self.activation is not None:
return self.activation(outputs) # pylint: disable=not-callable
return outputs
def compute_output_shape(self, input_shape):
input_shape = tensor_shape.TensorShape(input_shape)
input_shape = input_shape.with_rank_at_least(2)
if input_shape[-1].value is None:
raise ValueError(
'The innermost dimension of input_shape must be defined, but saw: %s'
% input_shape)
return input_shape[:-1].concatenate(self.units)
def get_config(self):
config = {
'units': self.units,
'activation': activations.serialize(self.activation),
'use_bias': self.use_bias,
'kernel_initializer': initializers.serialize(self.kernel_initializer),
'bias_initializer': initializers.serialize(self.bias_initializer),
'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
'bias_regularizer': regularizers.serialize(self.bias_regularizer),
'activity_regularizer':
regularizers.serialize(self.activity_regularizer),
'kernel_constraint': constraints.serialize(self.kernel_constraint),
'bias_constraint': constraints.serialize(self.bias_constraint)
}
base_config = super(Dense, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class Actionselect(object):
def __init__(self,
action_class,
**kwargs):
self.multiclass_dense_layer = K.layers.Dense(action_class)
def __call__(self,input_data):
return self.multiclass_dense_layer(input_data)
- 基本上一个层需要在两个地方进行初始化一个是
__init__
的时候,一个是build
的时候,前提是里面没有引入其他layer
层,如果引入了那么因为引入的过程就是一个初始化的过程,所以还需要额外的在build
里面self.multiclass_dense_layer.build(input_shape)
。
-
super(SoftmaxLoss, self).__init__(**kwargs)
是为了初始化layer
,
self.multiclass_dense_layer = K.layers.Dense(self.vocab_size)
里面再加入的层就不需要显示的初始化了,因为将self.vocab_size
给他的过程就是一个初始化的过程。
- 如果要有多个输入那么就了输入到网络之前进行concatenate,有多个输出更简单,在call的时候返回多个几个即可。
import tensorflow as tf
K = tf.keras
class SoftmaxLoss(K.layers.Layer):
"""Softmax xentropy loss with candidate sampling."""
def __init__(self,
vocab_size,
num_candidate_samples=-1,
vocab_freqs=None,
**kwargs):
self.vocab_size = vocab_size
self.num_candidate_samples = num_candidate_samples
self.vocab_freqs = vocab_freqs
super(SoftmaxLoss, self).__init__(**kwargs)
self.multiclass_dense_layer = K.layers.Dense(self.vocab_size)
def build(self, input_shape):
input_shape = input_shape[0]
with tf.device('/cpu:0'):
self.lin_w = self.add_weight(
shape=(input_shape[-1], self.vocab_size),
name='lm_lin_w',
initializer=K.initializers.glorot_uniform())
self.lin_b = self.add_weight(
shape=(self.vocab_size,),
name='lm_lin_b',
initializer=K.initializers.glorot_uniform())
self.multiclass_dense_layer.build(input_shape)
super(SoftmaxLoss, self).build(input_shape)
def call(self, inputs):
x, labels, weights = inputs
if self.num_candidate_samples > -1:
assert self.vocab_freqs is not None
labels_reshaped = tf.reshape(labels, [-1])
labels_reshaped = tf.expand_dims(labels_reshaped, -1)
sampled = tf.nn.fixed_unigram_candidate_sampler(
true_classes=labels_reshaped,
num_true=1,
num_sampled=self.num_candidate_samples,
unique=True,
range_max=self.vocab_size,
unigrams=self.vocab_freqs)
inputs_reshaped = tf.reshape(x, [-1, int(x.get_shape()[2])])
lm_loss = tf.nn.sampled_softmax_loss(
weights=tf.transpose(self.lin_w),
biases=self.lin_b,
labels=labels_reshaped,
inputs=inputs_reshaped,
num_sampled=self.num_candidate_samples,
num_classes=self.vocab_size,
sampled_values=sampled)
lm_loss = tf.reshape(
lm_loss,
[int(x.get_shape()[0]), int(x.get_shape()[1])])
else:
logits = self.multiclass_dense_layer(x)
lm_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=labels)
lm_loss = tf.identity(
tf.reduce_sum(lm_loss * weights) / _num_labels(weights),
name='lm_xentropy_loss')
return lm_loss