论文:Segmentation-Based Deep-Learning Approach for Surface-Defect Detection
环境:python3.5, cuda10 ,cudnn7.6 , tensorflow1.13
参考:https://blog.csdn.net/jinxiaonian11/article/details/94316567
记录原因:
每个gpu中都存有模型,并共享所有的变量,分别在不同的gpu中计算不同batch的数据,得到loss_1,loss_2,…,loss_num_gpus,最后将所有loss放到cpu中进行mean,最后update。
def average_gradients(self,tower_grads):
average_grads=[]
# tower_grads保存的是gradients和weight的元组
for grad_and_vars in zip(*tower_grads):
grads=[]
for g,_ in grad_and_vars:
expend_g=tf.expand_dims(g,0)
grads.append(expend_g)
grad=tf.concat(grads,0)
grad=tf.reduce_mean(grad,0)
v=grad_and_vars[0][1]
grad_and_var=(grad,v)
average_grads.append(grad_and_var)
return average_grads
def build_model(self):
def SegmentNet(input, scope, is_training, reuse=None):
with tf.variable_scope(scope, reuse=reuse):
with slim.arg_scope([slim.conv2d],
padding='SAME',
activation_fn=tf.nn.relu,
normalizer_fn=slim.batch_norm):
net = slim.conv2d(input, 32, [5, 5],scope='conv1')
net = slim.conv2d(net, 32, [5, 5], scope='conv2')
net=slim.max_pool2d(net,[2,2],[2,2],scope='pool1')
net = slim.conv2d(net, 64, [5, 5],scope='conv3')
net = slim.conv2d(net, 64, [5, 5], scope='conv4')
net = slim.conv2d(net, 64, [5, 5], scope='conv5')
net=slim.max_pool2d(net,[2,2],[2,2],scope='pool2')
net = slim.conv2d(net, 64, [5, 5],scope='conv6')
net = slim.conv2d(net, 64, [5, 5], scope='conv7')
net = slim.conv2d(net, 64, [5, 5],scope='conv8')
net = slim.conv2d(net, 64, [5, 5], scope='conv9')
net=slim.max_pool2d(net,[2,2],[2,2],scope='pool3')
net = slim.conv2d(net, 1024, [15, 15], scope='conv10')
features=net
net = slim.conv2d(net, 1, [1, 1],activation_fn=None, scope='conv11')
logits_pixel=net
net=tf.sigmoid(net, name=None)
mask=net
return features,logits_pixel,mask
def DecisionNet(feature,mask, scope, is_training,num_classes=2, reuse=None):
with tf.variable_scope(scope, reuse=reuse):
with slim.arg_scope([slim.conv2d],
padding='SAME',
activation_fn=tf.nn.relu,
normalizer_fn=slim.batch_norm):
net=tf.concat([feature,mask],axis=3)
net = slim.max_pool2d(net, [2, 2], [2, 2], scope='pool1')
net = slim.conv2d(net, 8, [5, 5], scope='conv1')
net = slim.max_pool2d(net, [2, 2], [2, 2], scope='pool2')
net = slim.conv2d(net, 16, [5, 5], scope='conv2')
net = slim.max_pool2d(net, [2, 2], [2, 2], scope='pool3')
net = slim.conv2d(net, 32, [5, 5], scope='conv3')
vector1=math_ops.reduce_mean(net,[1,2],name='pool4', keepdims=True)
vector2=math_ops.reduce_max(net,[1,2],name='pool5', keepdims=True)
vector3=math_ops.reduce_mean(mask,[1,2],name='pool6', keepdims=True)
vector4=math_ops.reduce_max(mask,[1,2],name='pool7', keepdims=True)
vector=tf.concat([vector1,vector2,vector3,vector4],axis=3)
vector=tf.squeeze(vector,axis=[1,2])
logits = slim.fully_connected(vector, num_classes,activation_fn=None)
output=tf.argmax(logits,axis=1,name='output')
return logits,output
# 多GPU的网络模型结果(关键是根据self.__num_gpus的取值来决定loss计算方式)
tower_grads=[]
tower_grads_seg=[]
tower_grads_dec=[]
# 网络的整体输入
Image = tf.placeholder(tf.float32, shape=(None, IMAGE_SIZE[0],IMAGE_SIZE[1], 1), name='Image')
PixelLabel=tf.placeholder(tf.float32,shape=(None, IMAGE_SIZE[0]/8,IMAGE_SIZE[1]/8, 1), name='PixelLabel')
Label=tf.placeholder(tf.int32, shape=(None), name='Label')
opt=tf.train.GradientDescentOptimizer(self.__learn_rate)
with tf.variable_scope(tf.get_variable_scope()):
# 当测试的时候,网络计算应该默认gpu为1
itera=self.__num_gpus if self.__mode == 'training' else 1
for i in range(itera):
# 用以统计的全局值
loss_pixel_global=0
loss_class_global=0
loss_total_global=0
mask_lst=[]
logits_pixel_lst=[]
# 设置在第i块GPU上进行训练
with tf.device(utils.assign_to_device('/gpu:{}'.format(i),ps_device='/cpu:0')):
# 取对应batch的数据进行训练
_Image=Image[i*self.__batch_size:(i+1)*self.__batch_size]
_PixelLabel=PixelLabel[i*self.__batch_size:(i+1)*self.__batch_size]
_Label=Label[i*self.__batch_size:(i+1)*self.__batch_size]
features, logits_pixel, mask=SegmentNet(_Image,'segment',self.is_training)
logits_class,output_class=DecisionNet(features,mask, 'decision', self.is_training)
logits_pixel=tf.reshape(logits_pixel,[self.__batch_size,-1])
PixelLabel_reshape=tf.reshape(_PixelLabel,[self.__batch_size,-1])
mask_lst.append(mask)
logits_pixel_lst.append(logits_pixel)
# 两个stage的loss计算
loss_pixel=tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits_pixel,labels=PixelLabel_reshape))
loss_class = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_class,labels=_Label))
loss_total=loss_pixel+loss_class
loss_pixel_global+=loss_pixel
loss_class_global+=loss_class
loss_total_global+=loss_total
# 获取变量列表
train_var_list = [v for v in tf.trainable_variables() ]
train_segment_var_list = [v for v in tf.trainable_variables() if 'segment' in v.name ]
train_decision_var_list = [v for v in tf.trainable_variables() if 'decision' in v.name]
# 计算grad
grads_pixel=opt.compute_gradients(loss_pixel,train_segment_var_list)
grads_class=opt.compute_gradients(loss_class,train_decision_var_list)
grads_total=opt.compute_gradients(loss_total,train_var_list)
# 记录不同gpus算出的grad
tower_grads.append(grads_total)
tower_grads_dec.append(grads_class)
tower_grads_seg.append(grads_pixel)
# 共享变量
tf.get_variable_scope().reuse_variables()
# 做mean
grads=self.average_gradients(tower_grads)
grads_seg=self.average_gradients(tower_grads_seg)
grads_dec=self.average_gradients(tower_grads_dec)
# 更新gradient
train_op=opt.apply_gradients(grads)
train_op_seg=opt.apply_gradients(grads_seg)
train_op_dec=opt.apply_gradients(grads_dec)
init_op=tf.global_variables_initializer()
# 以下均为第-1个gpu的变量值
self.Image=Image
self.PixelLabel = PixelLabel
self.Label = Label
self.features = features
self.logits_class=logits_class
self.output_class=output_class
# 以下为n个gpu训练n*num_gpus的全局变量值
self.loss_pixel = loss_pixel_global
self.loss_class = loss_class_global
self.loss_total = loss_total_global
self.logits_pixel=logits_pixel_lst
self.mask = mask_lst
self.init_op=init_op
self.train_op=train_op
self.train_op_seg=train_op_seg
self.train_op_dec=train_op_dec
utils.py中的assign_to_device方法
PS_OPS = ['Variable', 'VariableV2', 'AutoReloadVariable']
def assign_to_device(device, ps_device='/cpu:0'):
def _assign(op):
node_def = op if isinstance(op, tf.NodeDef) else op.node_def
if node_def.op in PS_OPS:
return ps_device
else:
return device
return _assign
前者用来计算梯度,后者用来更新对应的variable,两者配合使用,和optimizer.minimize作用相同。
def minimize(self,loss,global_step=None,var_list=None,name=None):
# 输出为[(gradient,variable),(gradient,variable),...]
grads_and_vars=self.compute_gradient(loss,var_list=var_list)
vars_with_grad=[v for g,v in grads_and_vars if g is not None]
if not vars_with_grad:
raise ValueError(...)
return self.apply_gradients(grads_and_vars, global_step=global_step,name=name)
tower_grads的list长度为gpu个数
tower_grads[i]代表第i个gpu的所有变量和对应的梯度信息,是元组的list,每个元祖的形式是(tf.Tensor (gradient for …), tf.Variable(…)),即每个variable以及对应的梯度。所以命名为“梯度塔
总的来说,相对于源码的改动就是写了个循环,然后将minimize拆分,最后计算出一个num_gpus*batchsize的数据量后,算一个mean,然后再利用apply_gradients更新参数的值。