近日做道路识别,由于样本占比较小,考虑每个batch计算交叉熵时针对性地增加所占权重,因此需要对label进行计数,计算出道路占比多少。
如果是两分类的话(0/1),直接使用tf.count_nonzero()函数即可;
如果是多分类,那么就需要使用tf.bincount()或者tf.unique_with_counts()。
TIPS:
1. 两个函数都慢于Counter()以及np.bincount()函数。
2. tf.bincount()类似于np.bincount(),返回一个数组;tf.unique_with_counts()返回三个变量(y, idx, count)。建议使用前者。
3. 使用tf.bincount()时,对于n分类,如果分类n的count为0,那么返回的数组长度为n-1而不是n(类似于np.bincount()),在后续操作时有可能报错;可以使用”minlength=n”参数来调整。
2. 除了这几个函数以外,好像tf.unsorted_segment_sum()也可以,待研究。
源代码介绍如下:
def bincount(arr,
weights=None,
minlength=None,
maxlength=None,
dtype=dtypes.int32):
"""Counts the number of occurrences of each value in an integer array.
If `minlength` and `maxlength` are not given, returns a vector with length
`tf.reduce_max(arr) + 1` if `arr` is non-empty, and length 0 otherwise.
If `weights` are non-None, then index `i` of the output stores the sum of the
value in `weights` at each index where the corresponding value in `arr` is
`i`.
Args:
arr: An int32 tensor of non-negative values.
weights: If non-None, must be the same shape as arr. For each value in
`arr`, the bin will be incremented by the corresponding weight instead
of 1.
minlength: If given, ensures the output has length at least `minlength`,
padding with zeros at the end if necessary.
maxlength: If given, skips values in `arr` that are equal or greater than
`maxlength`, ensuring that the output has length at most `maxlength`.
dtype: If `weights` is None, determines the type of the output bins.
Returns:
A vector with the same dtype as `weights` or the given `dtype`. The bin
values.
"""
arr = ops.convert_to_tensor(arr, name="arr", dtype=dtypes.int32)
array_is_nonempty = reduce_prod(array_ops.shape(arr)) > 0
output_size = cast(array_is_nonempty, dtypes.int32) * (reduce_max(arr) + 1)
if minlength is not None:
minlength = ops.convert_to_tensor(
minlength, name="minlength", dtype=dtypes.int32)
output_size = gen_math_ops.maximum(minlength, output_size)
if maxlength is not None:
maxlength = ops.convert_to_tensor(
maxlength, name="maxlength", dtype=dtypes.int32)
output_size = gen_math_ops.minimum(maxlength, output_size)
if weights is not None:
weights = ops.convert_to_tensor(weights, name="weights")
return gen_math_ops.unsorted_segment_sum(weights, arr, output_size)
weights = constant_op.constant([], dtype)
return gen_math_ops.bincount(arr, output_size, weights)
def unique_with_counts(x, out_idx=_dtypes.int32, name=None):
r"""Finds unique elements in a 1-D tensor.
This operation returns a tensor `y` containing all of the unique elements of `x`
sorted in the same order that they occur in `x`. This operation also returns a
tensor `idx` the same size as `x` that contains the index of each value of `x`
in the unique output `y`. Finally, it returns a third tensor `count` that
contains the count of each element of `y` in `x`. In other words:
`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
For example:
```
# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
y, idx, count = unique_with_counts(x)
y ==> [1, 2, 4, 7, 8]
idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
count ==> [2, 1, 3, 1, 2]
```
Args:
x: A `Tensor`. 1-D.
out_idx: An optional `tf.DType` from: `tf.int32, tf.int64`. Defaults to `tf.int32`.
name: A name for the operation (optional).
Returns:
A tuple of `Tensor` objects (y, idx, count).
y: A `Tensor`. Has the same type as `x`. 1-D.
idx: A `Tensor` of type `out_idx`. 1-D.
count: A `Tensor` of type `out_idx`. 1-D.
"""
if out_idx is None:
out_idx = _dtypes.int32
out_idx = _execute.make_type(out_idx, "out_idx")
_ctx = _context.context()
if _ctx.in_graph_mode():
_, _, _op = _op_def_lib._apply_op_helper(
"UniqueWithCounts", x=x, out_idx=out_idx, name=name)
_result = _op.outputs[:]
_inputs_flat = _op.inputs
_attrs = ("T", _op.get_attr("T"), "out_idx", _op.get_attr("out_idx"))
else:
_attr_T, (x,) = _execute.args_to_matching_eager([x], _ctx)
_attr_T = _attr_T.as_datatype_enum
_inputs_flat = [x]
_attrs = ("T", _attr_T, "out_idx", out_idx)
_result = _execute.execute(b"UniqueWithCounts", 3, inputs=_inputs_flat,
attrs=_attrs, ctx=_ctx, name=name)
_execute.record_gradient(
"UniqueWithCounts", _inputs_flat, _attrs, _result, name)
_result = _UniqueWithCountsOutput._make(_result)
return _result