import numpy as np
import tensorflow as tf
class MAML:
def __init__(self, d, c, nway, meta_lr=1e-3, train_lr=1e-2):
"""
定义了图片大小、通道、样本类way、学习率
:param d:图片大小
:param c:通道
:param nway:类数
:param meta_lr:
:param train_lr:
"""
self.d = d
self.c = c
self.nway = nway
self.meta_lr = meta_lr
self.train_lr = train_lr
print('img shape:', self.d, self.d, self.c, 'meta-lr:', meta_lr, 'train-lr:', train_lr)
def build(self, support_xb, support_yb, query_xb, query_yb, K, meta_batchsz, mode='train'):
"""
输入训练数据,调用conv_weights()函数获取权重,然后在子函数meta_task()函数中调用forward()函数获取logits计算交叉熵损失,进而计算出梯度,然后进行第一次梯度更新,同时使用更新后的权重计算元测试集上的损失;
然后在多个任务上循环,分别计算每个任务的损失,进行第二次梯度下降,同时求出元测试集上的损失;
最后求出训练集、测试集上的平均损失和平均准确率。
:param support_xb: [b, setsz, 84*84*3]
:param support_yb: [b, setsz, n-way]
:param query_xb: [b, querysz, 84*84*3]
:param query_yb: [b, querysz, n-way]
:param K: train update steps
:param meta_batchsz:tasks number
:param mode: train/eval/test, for training, we build train&eval network meanwhile.
:return:
"""
# 创建或重用网络变量,不包括batch_norm变量,因此我们需要额外的重用机制来重用batch_norm变量。
self.weights = self.conv_weights()#weights有5个conv层和5个偏移量b
print('self.weights:',self.weights.values())
# TODO: meta-test is sort of test stage.
training = True if mode is 'train' else False#True/False
def meta_task(input):
"""
map_fn only support one parameters, so we need to unpack from tuple.
map_fn仅支持一个参数,所以我们需要从元组中解包
:param support_x: [setsz, 84*84*3]
:param support_y: [setsz, n-way]
:param query_x: [querysz, 84*84*3]
:param query_y: [querysz, n-way]
:param training: training or not, for batch_norm
:return:
"""
support_x, support_y, query_x, query_y = input
print('input:',input)
# to record the op in t update step.
query_preds, query_losses, query_accs = [], [], []
support_pred = self.forward(support_x, self.weights, training)
support_loss = tf.nn.softmax_cross_entropy_with_logits(logits=support_pred, labels=support_y)
support_acc = tf.contrib.metrics.accuracy(tf.argmax(tf.nn.softmax(support_pred, dim=1), axis=1),tf.argmax(support_y, axis=1))
# 计算梯度
grads = tf.gradients(support_loss, list(self.weights.values()))
print('梯度:',grads)
# grad and variable dict
# zip函数为了减少内存,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表。
gvs = dict(zip(self.weights.keys(), grads))
print('gvs:',gvs)
# theta_pi = theta - alpha * grads 第一次梯度更新
fast_weights = dict(zip(self.weights.keys(),
[self.weights[key] - self.train_lr * gvs[key] for key in self.weights.keys()]))
print('fast_weights:',fast_weights)
# use theta_pi to forward meta-test
query_pred = self.forward(query_x, fast_weights, training)
# meta-test loss
query_loss = tf.nn.softmax_cross_entropy_with_logits(logits=query_pred, labels=query_y)
# record T0 pred and loss for meta-test
query_preds.append(query_pred)
query_losses.append(query_loss)
print('query_preds:',query_preds)#[]
print('query_losses',query_losses)
# continue to build T1-TK steps graph
for _ in range(1, K):
# T_k loss on meta-train
# we need meta-train loss to fine-tune the task and meta-test loss to update theta
loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.forward(support_x, fast_weights, training),
labels=support_y)
# compute gradients
grads = tf.gradients(loss, list(fast_weights.values()))
# compose grad and variable dict
gvs = dict(zip(fast_weights.keys(), grads))
# update theta_pi according to varibles 第二次梯度更新
fast_weights = dict(zip(fast_weights.keys(),
[fast_weights[key] - self.train_lr * gvs[key] for key in fast_weights.keys()]))
# forward on theta_pi
query_pred = self.forward(query_x, fast_weights, training)
# we need accumulate all meta-test losses to update theta
query_loss = tf.nn.softmax_cross_entropy_with_logits(logits=query_pred, labels=query_y)
query_preds.append(query_pred)
query_losses.append(query_loss)
# compute every steps' accuracy on query set
for i in range(K):
query_accs.append(tf.contrib.metrics.accuracy(tf.argmax(tf.nn.softmax(query_preds[i], dim=1), axis=1),
tf.argmax(query_y, axis=1)))
# we just use the first step support op: support_pred & support_loss, but igonre these support op
# at step 1:K-1.
# however, we return all pred&loss&acc op at each time steps.
result = [support_pred, support_loss, support_acc, query_preds, query_losses, query_accs]
return result
# return: [support_pred, support_loss, support_acc, query_preds, query_losses, query_accs]
out_dtype = [tf.float32, tf.float32, tf.float32, [tf.float32] * K, [tf.float32] * K, [tf.float32] * K]
#tf.map_fn:从0维度的 elems 中解压的张量列表上的映射;elems作为meta_task的参数调用
result = tf.map_fn(meta_task, elems=(support_xb, support_yb, query_xb, query_yb),
dtype=out_dtype, parallel_iterations=meta_batchsz, name='map_fn')
support_pred_tasks, support_loss_tasks, support_acc_tasks, \
query_preds_tasks, query_losses_tasks, query_accs_tasks = result
if mode is 'train':
# average loss
self.support_loss = support_loss = tf.reduce_sum(support_loss_tasks) / meta_batchsz
# [avgloss_t1, avgloss_t2, ..., avgloss_K]
self.query_losses = query_losses = [tf.reduce_sum(query_losses_tasks[j]) / meta_batchsz
for j in range(K)]
# average accuracy
self.support_acc = support_acc = tf.reduce_sum(support_acc_tasks) / meta_batchsz
# average accuracies
self.query_accs = query_accs = [tf.reduce_sum(query_accs_tasks[j]) / meta_batchsz
for j in range(K)]
# # add batch_norm ops before meta_op
# update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
# with tf.control_dependencies(update_ops):
# # TODO: the update_ops must be put before tf.train.AdamOptimizer,
# # otherwise it throws Not in same Frame Error.
# meta_loss = tf.identity(self.query_losses[-1])
# meta-train optim Adam优化算法:是一个寻找全局最优点的优化算法,引入了二次方梯度校正。
optimizer = tf.train.AdamOptimizer(self.meta_lr, name='meta_optim')
# 元训练梯度, query_losses[-1] 是跨任务的累计损失
gvs = optimizer.compute_gradients(self.query_losses[-1])
# meta-train grads clipping
gvs = [(tf.clip_by_norm(grad, 10), var) for grad, var in gvs]
# update theta
self.meta_op = optimizer.apply_gradients(gvs)
else: # test & eval
# average loss
self.test_support_loss = support_loss = tf.reduce_sum(support_loss_tasks) / meta_batchsz
# [avgloss_t1, avgloss_t2, ..., avgloss_K]
self.test_query_losses = query_losses = [tf.reduce_sum(query_losses_tasks[j]) / meta_batchsz
for j in range(K)]
# average accuracy
self.test_support_acc = support_acc = tf.reduce_sum(support_acc_tasks) / meta_batchsz
# average accuracies
self.test_query_accs = query_accs = [tf.reduce_sum(query_accs_tasks[j]) / meta_batchsz
for j in range(K)]
# NOTICE: every time build model, support_loss will be added to the summary, but it's different.
#用来显示标量信息
tf.summary.scalar(mode + ':support loss', support_loss)
tf.summary.scalar(mode + ':support acc', support_acc)
for j in range(K):
tf.summary.scalar(mode + ':query loss, step ' + str(j + 1), query_losses[j])
tf.summary.scalar(mode + ':query acc, step ' + str(j + 1), query_accs[j])
print('ok!!!')
def conv_weights(self):
'''
获取卷积网络中的weights
'''
weights = {}
conv_initializer = tf.contrib.layers.xavier_initializer_conv2d()
fc_initializer = tf.contrib.layers.xavier_initializer()
k = 3
'''
使用AUTO_REUSE是一种方便的做法, 但是要避免产生意外的结果, 这样会导致权值共享问题.
比如定义一个卷积层, 但是要避免对卷积的权值参数进行复用.
'''
with tf.variable_scope('MAML', reuse=tf.AUTO_REUSE):
weights['conv1'] = tf.get_variable('conv1w', [k, k, 3, 32], initializer=conv_initializer)
weights['b1'] = tf.get_variable('conv1b', initializer=tf.zeros([32]))
weights['conv2'] = tf.get_variable('conv2w', [k, k, 32, 32], initializer=conv_initializer)
weights['b2'] = tf.get_variable('conv2b', initializer=tf.zeros([32]))
weights['conv3'] = tf.get_variable('conv3w', [k, k, 32, 32], initializer=conv_initializer)
weights['b3'] = tf.get_variable('conv3b', initializer=tf.zeros([32]))
weights['conv4'] = tf.get_variable('conv4w', [k, k, 32, 32], initializer=conv_initializer)
weights['b4'] = tf.get_variable('conv4b', initializer=tf.zeros([32]))
# assumes max pooling
weights['w5'] = tf.get_variable('fc1w', [32 * 5 * 5, self.nway], initializer=fc_initializer)
weights['b5'] = tf.get_variable('fc1b', initializer=tf.zeros([self.nway]))
return weights
def conv_block(self, x, weight, bias, scope, training):
"""
build a block with conv2d->batch_norm->pooling
建立一个卷积层到批量归一化到池化的块
输入数据、权重、偏移量,通过卷积、批量归一化、激活、池化得到新的数据,返回值用于forward中隐藏层。
:param x:
:param weight:
:param bias:
:param scope:
:param training:
:return:
"""
# conv
x = tf.nn.conv2d(x, weight, [1, 1, 1, 1], 'SAME', name=scope + '_conv2d') + bias
# batch norm, activation_fn=tf.nn.relu,
# NOTICE: must have tf.layers.batch_normalization
# x = tf.contrib.layers.batch_norm(x, activation_fn=tf.nn.relu)
with tf.variable_scope('MAML'):
# train is set to True ALWAYS, please refer to https://github.com/cbfinn/maml/issues/9
# when FLAGS.train=True, we still need to build evaluation network
x = tf.layers.batch_normalization(x, training=True, name=scope + '_bn', reuse=tf.AUTO_REUSE)
# relu
x = tf.nn.relu(x, name=scope + '_relu')
# pooling
x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID', name=scope + '_pool')
return x
def forward(self, x, weights, training):
"""
输入训练集和权重,返回值用于交叉熵损失中,作为logits参数
:param x:
:param weights:
:param training:
:return:返回值用于交叉熵损失中logits参数
"""
# [b, 84, 84, 3]
x = tf.reshape(x, [-1, self.d, self.d, self.c], name='reshape1')
hidden1 = self.conv_block(x, weights['conv1'], weights['b1'], 'conv0', training)
hidden2 = self.conv_block(hidden1, weights['conv2'], weights['b2'], 'conv1', training)
hidden3 = self.conv_block(hidden2, weights['conv3'], weights['b3'], 'conv2', training)
hidden4 = self.conv_block(hidden3, weights['conv4'], weights['b4'], 'conv3', training)
# get_shape is static shape, (5, 5, 5, 32)
# print('flatten:', hidden4.get_shape())
# flatten layer
hidden4 = tf.reshape(hidden4, [-1, np.prod([int(dim) for dim in hidden4.get_shape()[1:]])], name='reshape2')
#第4层隐藏层和权重w5相乘之后再相加b5
output = tf.add(tf.matmul(hidden4, weights['w5']), weights['b5'], name='fc1')
return output
if __name__ == '__main__':
from data_generator import DataGenerator
kshot = 1
kquery = 15
nway = 5
meta_batchsz = 4
K = 5
db = DataGenerator(nway, kshot, kquery, meta_batchsz, 2000)
image_tensor, label_tensor = db.make_data_tensor(training=True)
print(image_tensor,' 123 ',label_tensor)
#构成样本和标签
support_x = tf.slice(image_tensor, [0, 0, 0], [-1, nway * kshot, -1], name='support_x')
query_x = tf.slice(image_tensor, [0, nway * kshot, 0], [-1, -1, -1], name='query_x')
support_y = tf.slice(label_tensor, [0, 0, 0], [-1, nway * kshot, -1], name='support_y')
query_y = tf.slice(label_tensor, [0, nway * kshot, 0], [-1, -1, -1], name='query_y')
model = MAML(84, 3, 5)
model.build(support_x, support_y, query_x, query_y, K, meta_batchsz, mode='train')