MP:行为预测骨架的归一化操作

MP:行为预测骨架的归一化操作_第1张图片

原始版本

import numpy as np

# TODO
def normalise_bounding_box(trajectory_coordinates, video_resolution):
    trajectory_length = trajectory_coordinates.shape[0]
    for step_idx in range(trajectory_length):
        coordinates = trajectory_coordinates[step_idx, :]
        if coordinates.any():#if any(coordinates):
            left, right, top, bottom = compute_bounding_box(coordinates, video_resolution=video_resolution)
            coordinates_reshaped = coordinates.reshape(-1, 2)
            xs, ys = coordinates_reshaped[:, 0], coordinates_reshaped[:, 1]
            xs, ys = np.where(xs == 0.0, float(left), xs), np.where(ys == 0.0, float(top), ys)
            xs = (xs - left) / (right - left)
            ys = (ys - top) / (bottom - top)
            coordinates_reshaped[:, 0], coordinates_reshaped[:, 1] = xs, ys
            coordinates = coordinates_reshaped.reshape(-1)
        trajectory_coordinates[step_idx, :] = coordinates

    return trajectory_coordinates



# TODO
def compute_bounding_box(keypoints, video_resolution, return_discrete_values=True):
    """Compute the bounding box of a set of keypoints.

    Argument(s):
        keypoints -- A numpy array, of shape (num_keypoints * 2,), containing the x and y values of each
            keypoint detected.
        video_resolution -- A numpy array, of shape (2,) and dtype float32, containing the width and the height of
            the video.

    Return(s):
        The bounding box of the keypoints represented by a 4-uple of integers. The order of the corners is: left,
        right, top, bottom.
    """
    width, height = video_resolution
    keypoints_reshaped = keypoints.reshape(-1, 2)
    x, y = keypoints_reshaped[:, 0], keypoints_reshaped[:, 1]
    x, y = x[x != 0.0], y[y != 0.0]
    try:
        left, right, top, bottom = np.min(x), np.max(x), np.min(y), np.max(y)
    except ValueError:
        # print('All joints missing for input skeleton. Returning zeros for the bounding box.')
        return 0, 0, 0, 0

    extra_width, extra_height = 0.1 * (right - left + 1), 0.1 * (bottom - top + 1)
    left, right = np.clip(left - extra_width, 0, width - 1), np.clip(right + extra_width, 0, width - 1)
    top, bottom = np.clip(top - extra_height, 0, height - 1), np.clip(bottom + extra_height, 0, height - 1)
    # left, right = left - extra_width, right + extra_width
    # top, bottom = top - extra_height, bottom + extra_height

    if return_discrete_values:
        return int(round(left)), int(round(right)), int(round(top)), int(round(bottom))
    else:
        return left, right, top, bottom


keypoints = np.random.random([100, 2])*100
video_resolution = np.array([300,600])# video_resolution是在计算框的大小时会一定的大小,为了防止超出图像范围,所以用长宽做个界限
res = compute_bounding_box(keypoints,video_resolution)
print(res)

trajectory_coordinates = np.random.random([3,100*2])*100
aaa = normalise_bounding_box(trajectory_coordinates,video_resolution)
print(aaa)

无须resolution版本

import numpy as np

# TODO
def normalise_bounding_box(trajectory_coordinates):
    trajectory_length = trajectory_coordinates.shape[0]
    for step_idx in range(trajectory_length):
        coordinates = trajectory_coordinates[step_idx, :]
        if coordinates.any():#if any(coordinates):
            left, right, top, bottom = compute_bounding_box(coordinates)
            coordinates_reshaped = coordinates.reshape(-1, 2)
            xs, ys = coordinates_reshaped[:, 0], coordinates_reshaped[:, 1]
            xs, ys = np.where(xs == 0.0, float(left), xs), np.where(ys == 0.0, float(top), ys)
            xs = (xs - left) / (right - left)
            ys = (ys - top) / (bottom - top)
            coordinates_reshaped[:, 0], coordinates_reshaped[:, 1] = xs, ys
            coordinates = coordinates_reshaped.reshape(-1)
        trajectory_coordinates[step_idx, :] = coordinates

    return trajectory_coordinates



# TODO
def compute_bounding_box(keypoints, return_discrete_values=True):
    """Compute the bounding box of a set of keypoints.

    Argument(s):
        keypoints -- A numpy array, of shape (num_keypoints * 2,), containing the x and y values of each
            keypoint detected.
        video_resolution -- A numpy array, of shape (2,) and dtype float32, containing the width and the height of
            the video.

    Return(s):
        The bounding box of the keypoints represented by a 4-uple of integers. The order of the corners is: left,
        right, top, bottom.
    """
    width, height = video_resolution
    keypoints_reshaped = keypoints.reshape(-1, 2)
    x, y = keypoints_reshaped[:, 0], keypoints_reshaped[:, 1]
    x, y = x[x != 0.0], y[y != 0.0]
    try:
        left, right, top, bottom = np.min(x), np.max(x), np.min(y), np.max(y)
    except ValueError:
        # print('All joints missing for input skeleton. Returning zeros for the bounding box.')
        return 0, 0, 0, 0

    extra_width, extra_height = 0.1 * (right - left + 1), 0.1 * (bottom - top + 1)
    left, right = left - extra_width, right + extra_width
    top, bottom = top - extra_height, bottom + extra_height

    if return_discrete_values:
        return int(round(left)), int(round(right)), int(round(top)), int(round(bottom))
    else:
        return left, right, top, bottom


# keypoints = np.random.random([100, 2])*100
# video_resolution = np.array([300,600])# video_resolution是在计算框的大小时会一定的大小,为了防止超出图像范围,所以用长宽做个界限
# res = compute_bounding_box(keypoints)
# print(res)

trajectory_coordinates = np.random.random([300,17*2])*100
aaa = normalise_bounding_box(trajectory_coordinates)
print(aaa.shape)

你可能感兴趣的:(深度学习,numpy,python)