图像处理入门100题(三)

图像处理入门100题(三)

本篇记录了自己写的GIthub上的ImageProcessing 100 Wen的问题21-30的答案,注释里包含了一些自己的感悟。为了方便,注释是用英文写的。

进一步感受到了图像处理的本质就是矩阵运算,所以线性代数的基础一定要牢靠。此外,Python的数据包也要用的熟练和巧妙,时刻有一个“矩阵运算”意识。

问题序号 题目
21 直方图归一化
22 直方图操作
23 直方图均衡化
24 伽马校正
25 最邻近插值
26 双线性插值
27 双立方插值
28 仿射变换:平移
29 仿射变换:缩放
30 仿射变换:旋转

Python答案如下,结尾包含了问题的验证。

import cv2.cv2
import numpy as np
import matplotlib.pyplot as plt


# Question 21
# Histogram Normalization
# Answer

def histogram_normalization(img, a=0, b=255):
    c = np.min(img)  # c is the min value of all pixels
    d = np.max(img)  # d is the max value of all pixels

    out = img.copy()

    out = (b - a) / (d - c) * (out - c) + a

    # The follow three steps are very important!
    out[out < 0] = 0  # step 1
    out[out > 255] = 255  # step 2

    out = out.astype(np.uint8)  # step 3

    return out


# Question 22
# to make the histogram of the image more plane, mean = 128, sigma = 52
# Answer

def histogram_mean(img, m0=128, sigma0=52):
    m = np.mean(img)  # the old mean value of img
    sigma = np.std(img)  # the old standard value of img

    out = img.copy()

    out = sigma0 / sigma * (out - m) + m0

    # The follow three steps are very important!
    out[out < 0] = 0  # step 1
    out[out > 255] = 255  # step 2

    out = out.astype(np.uint8)  # step 3

    return out


# Question 23
# histogram equalization
# out = Zmax / S * SUM(h), Zmax is the max value of the pixels
# S is the number of pixels, h is the distribution function
# Answer

def histogram_equalization(img, Zmax=255):
    if len(img.shape) == 3:
        H, W, C = img.shape
    else:
        img = np.expand_dims(img, axis=-1)
        H, W, C = img.shape

    S = H * W * C

    sum_h = 0
    out = img.copy()

    for i in range(1, 255):
        sum_h = sum_h + len(img[img == i])
        out[img == i] = sum_h / S * Zmax  # the density function of probability times the max value

    out = out.astype(np.uint8)

    return out


# Question 24
# Gamma correction
# Answer

def gamma_correction(img, c=1.0, g=2.2):
    out = img / 255
    out = 1 / c * np.power(out, 1 / g)

    out = out * 255
    out = out.astype(np.uint8)

    return out


# Question 25
# Nearest neighbor interpolation
# Answer

def nearest_neighbor_interpolation(img, a=1.5):
    if len(img.shape) == 3:
        H, W, C = img.shape
    else:
        np.expand_dims(img, axis=-1)
        H, W, C = img.shape

    Ha = int(H * a)
    Wa = int(W * a)
    out = np.zeros((Ha, Wa, C), dtype=np.float)

    for y in range(Ha):
        for x in range(Wa):
            for c in range(C):
                out[y, x, c] = img[int(y / a), int(x / a), c]

    out = out.astype(np.uint8)

    return out


# Question 26
# Bilinear Interpolation
# Answer

def bilinear_interpolation(img, a=1.5):
    if len(img.shape) == 3:
        H, W, C = img.shape
    else:
        img = np.expand_dims(img, axis=-1)
        H, W, C = img.shape

    aH = int(a * H)
    aW = int(a * W)

    # get the resized positions
    '''
    The follow two operations is important
    
    the first objective is to build a y matrix like:
    array([[0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1],
       [2, 2, 2, 2, 2],
       [3, 3, 3, 3, 3],
       [4, 4, 4, 4, 4]])
    
    the second objective is to build a x matrix like:
    array([[0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4]])
    '''
    y = np.arange(aH).repeat(aW).reshape(aH, -1)
    x = np.tile(np.arange(aW), (aH, 1))

    # get the original positions
    # float
    y = (y / a)
    x = (x / a)

    # int
    ix = np.floor(x).astype(np.int)
    iy = np.floor(y).astype(np.int)

    # to avoid overflow, the original positions must <= (x_max-1, y_max-1)
    # to make ix where ix > W-2 change to W-2. why W-2? because numpy.arange generate (0, W-1), W-2 = W-1-1 = x_max -1
    ix = np.minimum(ix, W - 2)
    iy = np.minimum(iy, H - 2)

    # get distance
    dx = x - ix
    dy = y - iy

    # by now, dx and dy is a 2*2 matrix, but an image is generally 2*2*3 matrix, because there are 3 channels RGB
    # so we should expand dx and dy to 2*2*3
    # and keep the distance is the same to 3 channels

    dx = np.expand_dims(dx, axis=-1)
    dx = np.repeat(dx, C, axis=-1)  # repeat the channel dimension

    dy = np.expand_dims(dy, axis=-1)
    dy = np.repeat(dy, C, axis=-1)

    # interpolation
    out = (1 - dx) * (1 - dy) * img[iy, ix] + dx * (1 - dy) * img[iy, ix + 1] + (1 - dx) * dy * img[
        iy + 1, ix] + dx * dy * \
          img[iy + 1, ix + 1]

    out = np.clip(out, 0, 255)

    out = out.astype(np.uint8)

    return out


# Question 27
# bicubic interpolation

def bicubic_interpolation(img, a=1.5):
    if len(img.shape) == 3:
        H, W, C = img.shape
    else:
        img = np.expand_dims(img, axis=-1)
        H, W, C = img.shape

    aH = int(a * H)
    aW = int(a * W)

    # get positions of resized image
    y = np.arange(aH).repeat(aW).reshape(aH, -1)
    x = np.tile(np.arange(aW), (aH, 1))

    # get positions of original image
    # float
    y = y / a
    x = x / a

    # int
    iy = np.floor(y).astype(np.int)
    ix = np.floor(x).astype(np.int)

    # to avoid overflow
    iy = np.minimum(iy, H - 2)
    ix = np.minimum(ix, W - 2)

    # compute distances
    dx1 = np.abs(x - ix + 1)
    dx2 = np.abs(x - ix)
    dx3 = np.abs(x - ix - 1)
    dx4 = np.abs(x - ix - 2)

    dx = [dx1, dx2, dx3, dx4]

    dy1 = np.abs(y - iy + 1)
    dy2 = np.abs(y - iy)
    dy3 = np.abs(y - iy - 1)
    dy4 = np.abs(y - iy - 2)

    dy = [dy1, dy2, dy3, dy4]

    # compute weight according to distance
    def h(t):
        m = -1
        t_abs = np.abs(t)

        w = np.zeros_like(t)

        condition1 = t_abs <= 1
        w[condition1] = ((m + 2) * np.power(t_abs, 3) - (m + 3) * np.power(t_abs, 2) + 1)[condition1]

        condition2 = (t_abs > 1) & (t_abs <= 2)
        w[condition2] = (m * np.power(t, 3) - 5 * m * np.power(t_abs, 2) + 8 * m * t_abs - 4 * m)[condition2]

        return w

    sum_h = np.zeros((aH, aW, C), dtype=np.float)
    out = np.zeros((aH, aW, C), dtype=np.float)

    for j in range(1, 5):
        for i in range(1, 5):
            # to avoid overflow
            index_x = np.minimum(np.maximum(ix + i - 2, 0), W - 1)
            index_y = np.minimum(np.maximum(iy + j - 2, 0), H - 1)
            # i-1, j-1
            h_dx = h(dx[i - 1])
            h_dy = h(dy[j - 1])

            h_dx = np.expand_dims(h_dx, axis=-1)
            h_dx = np.repeat(h_dx, C, axis=-1)

            h_dy = np.expand_dims(h_dy, axis=-1)
            h_dy = np.repeat(h_dy, C, axis=-1)

            sum_h = sum_h + h_dx * h_dy

            out = out + img[index_y, index_x] * h_dx * h_dy

    out = out / sum_h
    out = np.clip(out, 0, 255)
    out = out.astype(np.uint8)

    return out


# Question 28-29
# afine transformations
# Answer

def afine_image(img, a, b, c, d, tx=30, ty=-30):
    if len(img.shape) == 3:
        H, W, C = img.shape
    else:
        img = np.expand_dims(img, axis=-1)
        H, W, C = img.shape

    # to make the place after afine black
    img_expand = np.zeros((H + 2, W + 2, C), dtype=np.float)
    img_expand[1:H + 1, 1:W + 1] = img

    # get new image shape
    H_new = np.round(H * d).astype(np.int)
    W_new = np.round(W * a).astype(np.int)

    out = np.zeros((H_new + 1, W_new + 1, C), dtype=np.float)

    y_new = np.arange(H_new).repeat(W_new).reshape(H_new, -1)
    x_new = np.tile(np.arange(W_new), (H_new, 1))

    adbc = a * d - b * c

    x = np.round((d * x_new - b * y_new) / adbc - tx).astype(np.int) + 1
    y = np.round((-c * x_new + a * y_new) / adbc - ty).astype(
        np.int) + 1  # please note that img_expand[1:H+1, 1:W+1]

    x = np.minimum(np.maximum(x, 0), W + 1).astype(np.int)
    y = np.minimum(np.maximum(y, 0), H + 1).astype(np.int)

    out[y_new, x_new] = img_expand[y, x]

    out = out[:H_new, :W_new]
    out = out.astype(np.uint8)

    return out


# Question 30
# rotation
# Answer

def rotation_image(img, a, b, c, d, tx=30, ty=-30):
    if len(img.shape) == 3:
        H, W, C = img.shape
    else:
        img = np.expand_dims(img, axis=-1)
        H, W, C = img.shape

    # to make the place after afine black
    img_expand = np.zeros((H + 2, W + 2, C), dtype=np.float)
    img_expand[1:H + 1, 1:W + 1] = img

    # get new image shape
    H_new = np.round(H).astype(np.int)
    W_new = np.round(W).astype(np.int)

    out = np.zeros((H_new, W_new, C), dtype=np.float)

    y_new = np.arange(H_new).repeat(W_new).reshape(H_new, -1)
    x_new = np.tile(np.arange(W_new), (H_new, 1))

    adbc = a * d - b * c

    x = np.round((d * x_new - b * y_new) / adbc - tx).astype(np.int) + 1
    y = np.round((-c * x_new + a * y_new) / adbc - ty).astype(
        np.int) + 1  # please note that img_expand[1:H+1, 1:W+1]

    dcx = (x.max() + x.min()) // 2 - W // 2
    dcy = (y.max() + y.min()) // 2 - H // 2

    x -= dcx
    y -= dcy

    x = np.clip(x, 0, W + 1)
    y = np.clip(y, 0, H + 1)

    out[y_new, x_new] = img_expand[y, x]

    out = out.astype(np.uint8)

    return out


# Test

# Answer 21
# img = cv2.imread("imori_dark.jpg").astype(np.float)
# img21 = histogram_normalization(img)
#
# plt.hist(img21.ravel(), bins=255, range=(0, 255))
# plt.show()
#
# cv2.imshow("histogram normalization", np.hstack((img.astype(np.uint8), img21)))
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# Answer 22
# img = cv2.imread("imori_dark.jpg").astype(np.float)
# img22 = histogram_mean(img)
#
# plt.hist(img22.ravel(), bins=255, range=(0, 255))
# plt.show()
#
# cv2.imshow("histogram normalization", np.hstack((img.astype(np.uint8), img22)))
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# Answer 23
# img = cv2.imread("imori.jpg").astype(np.float)
# img23 = histogram_equalization(img)
#
# plt.hist(img23.ravel(), bins=255, range=(0, 255))
# plt.show()
#
# cv2.imshow("histogram equalization", np.hstack((img.astype(np.uint8), img23)))
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# Answer 24
# img = cv2.imread("imori_gamma.jpg").astype(np.float)
# img24 = gamma_correction(img)
#
# cv2.imshow("gamma correction", np.hstack((img.astype(np.uint8), img24)))
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# Answer 25
# img = cv2.imread("imori.jpg").astype(np.float)
# img25 = nearest_neighbor_interpolation(img)
#
# cv2.imshow("gamma correction", img25)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# Answer 26
# img = cv2.imread("imori.jpg").astype(np.float)
# img26 = bilinear_interpolation(img)
#
# cv2.imshow("bilinear interpolation", img26)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# Answer 27
# img = cv2.imread("imori.jpg").astype(np.float)
# img27 = bicubic_interpolation(img)
#
# cv2.imshow("bicubic interpolation", img27)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# img = cv2.imread("imori.jpg").astype(np.float)
# img28 = afine_image(img, a=1, b=0, c=0, d=1, tx=30, ty=-30)
#
# cv2.imshow("afine interpolation", img28)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# Answer 29
# img = cv2.imread("imori.jpg").astype(np.float)
# img29 = afine_image(img, a=1.3, b=0, c=0, d=0.8, tx=30, ty=-30)
#
# cv2.imshow("afine interpolation", img29)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# Answer 30
# img = cv2.imread("imori.jpg").astype(np.float)
# img30 = rotation_image(img, a=np.cos(-np.pi / 6.), b=-np.sin(-np.pi / 6.), c=np.sin(-np.pi / 6.), d=np.cos(-np.pi / 6.),
#                     tx=0, ty=0)
#
# cv2.imshow("rotation interpolation", img30)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

用到的图片
imori.jpg
图像处理入门100题(三)_第1张图片
imori_gamma.jpg
图像处理入门100题(三)_第2张图片
imori_dark.jpg
图像处理入门100题(三)_第3张图片

你可能感兴趣的:(图像处理)