我们都知道在计算losses 的时候结果是scale,同时batch大于1时要进行每个sample 加和之后除以batch,但是对于一个sample,比如回归多个点时,默认的计算方法 还是除以回归的点个数,这是看源码知道的内容,如有错误,请多多指教。
下面是几个losses的计算代码:(tensorflow 中的)
def absolute_difference(
labels, predictions, weights=1.0, scope=None,
loss_collection=ops.GraphKeys.LOSSES,
reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
"""Adds an Absolute Difference loss to the training procedure.
`weights` acts as a coefficient for the loss. If a scalar is provided, then
the loss is simply scaled by the given value. If `weights` is a `Tensor` of
shape `[batch_size]`, then the total loss for each sample of the batch is
rescaled by the corresponding element in the `weights` vector. If the shape of
`weights` matches the shape of `predictions`, then the loss of each
measurable element of `predictions` is scaled by the corresponding value of
`weights`.
Args:
labels: The ground truth output tensor, same dimensions as 'predictions'.
predictions: The predicted outputs.
weights: Optional `Tensor` whose rank is either 0, or the same rank as
`labels`, and must be broadcastable to `labels` (i.e., all dimensions must
be either `1`, or the same as the corresponding `losses` dimension).
scope: The scope for the operations performed in computing the loss.
loss_collection: collection to which this loss will be added.
reduction: Type of reduction to apply to loss.
Returns:
Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
shape as `labels`; otherwise, it is scalar.
Raises:
ValueError: If the shape of `predictions` doesn't match that of
`labels` or if the shape of `weights` is invalid or if `labels`
or `predictions` is None.
@compatibility(eager)
The `loss_collection` argument is ignored when executing eagerly. Consider
holding on to the return value or collecting losses via a `tf.keras.Model`.
@end_compatibility
"""
if labels is None:
raise ValueError("labels must not be None.")
if predictions is None:
raise ValueError("predictions must not be None.")
with ops.name_scope(scope, "absolute_difference",
(predictions, labels, weights)) as scope:
predictions = math_ops.to_float(predictions)
labels = math_ops.to_float(labels)
predictions.get_shape().assert_is_compatible_with(labels.get_shape())
losses = math_ops.abs(math_ops.subtract(predictions, labels))
return compute_weighted_loss(
losses, weights, scope, loss_collection, reduction=reduction)
@tf_export(v1=["losses.softmax_cross_entropy"])
def softmax_cross_entropy(
onehot_labels, logits, weights=1.0, label_smoothing=0, scope=None,
loss_collection=ops.GraphKeys.LOSSES,
reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
"""Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits_v2.
`weights` acts as a coefficient for the loss. If a scalar is provided,
then the loss is simply scaled by the given value. If `weights` is a
tensor of shape `[batch_size]`, then the loss weights apply to each
corresponding sample.
If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
new_onehot_labels = onehot_labels * (1 - label_smoothing)
+ label_smoothing / num_classes
Note that `onehot_labels` and `logits` must have the same shape,
e.g. `[batch_size, num_classes]`. The shape of `weights` must be
broadcastable to loss, whose shape is decided by the shape of `logits`.
In case the shape of `logits` is `[batch_size, num_classes]`, loss is
a `Tensor` of shape `[batch_size]`.
Args:
onehot_labels: One-hot-encoded labels.
logits: Logits outputs of the network.
weights: Optional `Tensor` that is broadcastable to loss.
label_smoothing: If greater than 0 then smooth the labels.
scope: the scope for the operations performed in computing the loss.
loss_collection: collection to which the loss will be added.
reduction: Type of reduction to apply to loss.
Returns:
Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
`NONE`, this has shape `[batch_size]`; otherwise, it is scalar.
Raises:
ValueError: If the shape of `logits` doesn't match that of `onehot_labels`
or if the shape of `weights` is invalid or if `weights` is None. Also if
`onehot_labels` or `logits` is None.
@compatibility(eager)
The `loss_collection` argument is ignored when executing eagerly. Consider
holding on to the return value or collecting losses via a `tf.keras.Model`.
@end_compatibility
"""
if onehot_labels is None:
raise ValueError("onehot_labels must not be None.")
if logits is None:
raise ValueError("logits must not be None.")
with ops.name_scope(scope, "softmax_cross_entropy_loss",
(logits, onehot_labels, weights)) as scope:
logits = ops.convert_to_tensor(logits)
onehot_labels = math_ops.cast(onehot_labels, logits.dtype)
logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())
if label_smoothing > 0:
num_classes = math_ops.cast(
array_ops.shape(onehot_labels)[1], logits.dtype)
smooth_positives = 1.0 - label_smoothing
smooth_negatives = label_smoothing / num_classes
onehot_labels = onehot_labels * smooth_positives + smooth_negatives
onehot_labels = array_ops.stop_gradient(
onehot_labels, name="labels_stop_gradient")
losses = nn.softmax_cross_entropy_with_logits_v2(
labels=onehot_labels, logits=logits, name="xentropy")
return compute_weighted_loss(
losses, weights, scope, loss_collection, reduction=reduction)
他们都是compute_weights_loss 方法返回的值。代码如下:
@tf_export(v1=["losses.compute_weighted_loss"])
def compute_weighted_loss(
losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES,
reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
"""Computes the weighted loss.
Args:
losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
weights: Optional `Tensor` whose rank is either 0, or the same rank as
`losses`, and must be broadcastable to `losses` (i.e., all dimensions must
be either `1`, or the same as the corresponding `losses` dimension).
scope: the scope for the operations performed in computing the loss.
loss_collection: the loss will be added to these collections.
reduction: Type of reduction to apply to loss.
Returns:
Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
`NONE`, this has the same shape as `losses`; otherwise, it is scalar.
Raises:
ValueError: If `weights` is `None` or the shape is not compatible with
`losses`, or if the number of dimensions (rank) of either `losses` or
`weights` is missing.
Note:
When calculating the gradient of a weighted loss contributions from
both `losses` and `weights` are considered. If your `weights` depend
on some model parameters but you do not want this to affect the loss
gradient, you need to apply `tf.stop_gradient` to `weights` before
passing them to `compute_weighted_loss`.
@compatibility(eager)
The `loss_collection` argument is ignored when executing eagerly. Consider
holding on to the return value or collecting losses via a `tf.keras.Model`.
@end_compatibility
"""
Reduction.validate(reduction)
with ops.name_scope(scope, "weighted_loss", (losses, weights)):
# Save the `reduction` argument for loss normalization when distributing
# to multiple replicas.
# TODO(josh11b): Associate it with the returned op for more precision.
ops.get_default_graph()._last_loss_reduction = reduction # pylint: disable=protected-access
with ops.control_dependencies((
weights_broadcast_ops.assert_broadcastable(weights, losses),)):
losses = ops.convert_to_tensor(losses)
input_dtype = losses.dtype
losses = math_ops.to_float(losses)
weights = math_ops.to_float(weights)
weighted_losses = math_ops.multiply(losses, weights)
if reduction == Reduction.NONE:
loss = weighted_losses
else:
loss = math_ops.reduce_sum(weighted_losses)
if reduction == Reduction.MEAN:
loss = _safe_mean(
loss,
math_ops.reduce_sum(array_ops.ones_like(losses) * weights))
elif (reduction == Reduction.SUM_BY_NONZERO_WEIGHTS or
reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS):
loss = _safe_mean(loss, _num_present(losses, weights))
elif reduction == Reduction.SUM_OVER_BATCH_SIZE:
loss = _safe_mean(loss, _num_elements(losses))
# Convert the result back to the input type.
loss = math_ops.cast(loss, input_dtype)
util.add_loss(loss, loss_collection)
return loss
def _num_present(losses, weights, per_batch=False):
"""Computes the number of elements in the loss function induced by `weights`.
A given weights tensor induces different numbers of usable elements in the
`losses` tensor. The `weights` tensor is broadcast across `losses` for all
possible dimensions. For example, if `losses` is a tensor of dimension
`[4, 5, 6, 3]` and `weights` is a tensor of shape `[4, 5]`, then `weights` is,
in effect, tiled to match the shape of `losses`. Following this effective
tile, the total number of present elements is the number of non-zero weights.
Args:
losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
weights: `Tensor` of shape `[]`, `[batch_size]` or
`[batch_size, d1, ... dK]`, where K < N.
per_batch: Whether to return the number of elements per batch or as a sum
total.
Returns:
The number of present (non-zero) elements in the losses tensor. If
`per_batch` is `True`, the value is returned as a tensor of size
`[batch_size]`. Otherwise, a single scalar tensor is returned.
"""
if ((isinstance(weights, float) and weights != 0.0) or
(context.executing_eagerly() and weights._rank() == 0 # pylint: disable=protected-access
and not math_ops.equal(weights, 0.0))):
return _num_elements(losses)
with ops.name_scope(None, "num_present", (losses, weights)) as scope:
weights = math_ops.to_float(weights)
present = array_ops.where(
math_ops.equal(weights, 0.0),
array_ops.zeros_like(weights),
array_ops.ones_like(weights))
present = weights_broadcast_ops.broadcast_weights(present, losses)
if per_batch:
return math_ops.reduce_sum(
present,
axis=math_ops.range(1, array_ops.rank(present)),
keepdims=True,
name=scope)
return math_ops.reduce_sum(present, name=scope)
所以在默认reduction下是losses的和 去除以weight 广播到losses 相同shape 后,weight 中的非零个数。
另外reduction是SUM_OVER_BATCH_SIZE 更加好理解了,losses的和直接去除以了losses的元素个数。