本篇记录了自己写的GIthub上的ImageProcessing 100 Wen的问题21-30的答案,注释里包含了一些自己的感悟。为了方便,注释是用英文写的。
进一步感受到了图像处理的本质就是矩阵运算,所以线性代数的基础一定要牢靠。此外,Python的数据包也要用的熟练和巧妙,时刻有一个“矩阵运算”意识。
问题序号 | 题目 |
---|---|
21 | 直方图归一化 |
22 | 直方图操作 |
23 | 直方图均衡化 |
24 | 伽马校正 |
25 | 最邻近插值 |
26 | 双线性插值 |
27 | 双立方插值 |
28 | 仿射变换:平移 |
29 | 仿射变换:缩放 |
30 | 仿射变换:旋转 |
Python答案如下,结尾包含了问题的验证。
import cv2.cv2
import numpy as np
import matplotlib.pyplot as plt
# Question 21
# Histogram Normalization
# Answer
def histogram_normalization(img, a=0, b=255):
c = np.min(img) # c is the min value of all pixels
d = np.max(img) # d is the max value of all pixels
out = img.copy()
out = (b - a) / (d - c) * (out - c) + a
# The follow three steps are very important!
out[out < 0] = 0 # step 1
out[out > 255] = 255 # step 2
out = out.astype(np.uint8) # step 3
return out
# Question 22
# to make the histogram of the image more plane, mean = 128, sigma = 52
# Answer
def histogram_mean(img, m0=128, sigma0=52):
m = np.mean(img) # the old mean value of img
sigma = np.std(img) # the old standard value of img
out = img.copy()
out = sigma0 / sigma * (out - m) + m0
# The follow three steps are very important!
out[out < 0] = 0 # step 1
out[out > 255] = 255 # step 2
out = out.astype(np.uint8) # step 3
return out
# Question 23
# histogram equalization
# out = Zmax / S * SUM(h), Zmax is the max value of the pixels
# S is the number of pixels, h is the distribution function
# Answer
def histogram_equalization(img, Zmax=255):
if len(img.shape) == 3:
H, W, C = img.shape
else:
img = np.expand_dims(img, axis=-1)
H, W, C = img.shape
S = H * W * C
sum_h = 0
out = img.copy()
for i in range(1, 255):
sum_h = sum_h + len(img[img == i])
out[img == i] = sum_h / S * Zmax # the density function of probability times the max value
out = out.astype(np.uint8)
return out
# Question 24
# Gamma correction
# Answer
def gamma_correction(img, c=1.0, g=2.2):
out = img / 255
out = 1 / c * np.power(out, 1 / g)
out = out * 255
out = out.astype(np.uint8)
return out
# Question 25
# Nearest neighbor interpolation
# Answer
def nearest_neighbor_interpolation(img, a=1.5):
if len(img.shape) == 3:
H, W, C = img.shape
else:
np.expand_dims(img, axis=-1)
H, W, C = img.shape
Ha = int(H * a)
Wa = int(W * a)
out = np.zeros((Ha, Wa, C), dtype=np.float)
for y in range(Ha):
for x in range(Wa):
for c in range(C):
out[y, x, c] = img[int(y / a), int(x / a), c]
out = out.astype(np.uint8)
return out
# Question 26
# Bilinear Interpolation
# Answer
def bilinear_interpolation(img, a=1.5):
if len(img.shape) == 3:
H, W, C = img.shape
else:
img = np.expand_dims(img, axis=-1)
H, W, C = img.shape
aH = int(a * H)
aW = int(a * W)
# get the resized positions
'''
The follow two operations is important
the first objective is to build a y matrix like:
array([[0, 0, 0, 0, 0],
[1, 1, 1, 1, 1],
[2, 2, 2, 2, 2],
[3, 3, 3, 3, 3],
[4, 4, 4, 4, 4]])
the second objective is to build a x matrix like:
array([[0, 1, 2, 3, 4],
[0, 1, 2, 3, 4],
[0, 1, 2, 3, 4],
[0, 1, 2, 3, 4],
[0, 1, 2, 3, 4]])
'''
y = np.arange(aH).repeat(aW).reshape(aH, -1)
x = np.tile(np.arange(aW), (aH, 1))
# get the original positions
# float
y = (y / a)
x = (x / a)
# int
ix = np.floor(x).astype(np.int)
iy = np.floor(y).astype(np.int)
# to avoid overflow, the original positions must <= (x_max-1, y_max-1)
# to make ix where ix > W-2 change to W-2. why W-2? because numpy.arange generate (0, W-1), W-2 = W-1-1 = x_max -1
ix = np.minimum(ix, W - 2)
iy = np.minimum(iy, H - 2)
# get distance
dx = x - ix
dy = y - iy
# by now, dx and dy is a 2*2 matrix, but an image is generally 2*2*3 matrix, because there are 3 channels RGB
# so we should expand dx and dy to 2*2*3
# and keep the distance is the same to 3 channels
dx = np.expand_dims(dx, axis=-1)
dx = np.repeat(dx, C, axis=-1) # repeat the channel dimension
dy = np.expand_dims(dy, axis=-1)
dy = np.repeat(dy, C, axis=-1)
# interpolation
out = (1 - dx) * (1 - dy) * img[iy, ix] + dx * (1 - dy) * img[iy, ix + 1] + (1 - dx) * dy * img[
iy + 1, ix] + dx * dy * \
img[iy + 1, ix + 1]
out = np.clip(out, 0, 255)
out = out.astype(np.uint8)
return out
# Question 27
# bicubic interpolation
def bicubic_interpolation(img, a=1.5):
if len(img.shape) == 3:
H, W, C = img.shape
else:
img = np.expand_dims(img, axis=-1)
H, W, C = img.shape
aH = int(a * H)
aW = int(a * W)
# get positions of resized image
y = np.arange(aH).repeat(aW).reshape(aH, -1)
x = np.tile(np.arange(aW), (aH, 1))
# get positions of original image
# float
y = y / a
x = x / a
# int
iy = np.floor(y).astype(np.int)
ix = np.floor(x).astype(np.int)
# to avoid overflow
iy = np.minimum(iy, H - 2)
ix = np.minimum(ix, W - 2)
# compute distances
dx1 = np.abs(x - ix + 1)
dx2 = np.abs(x - ix)
dx3 = np.abs(x - ix - 1)
dx4 = np.abs(x - ix - 2)
dx = [dx1, dx2, dx3, dx4]
dy1 = np.abs(y - iy + 1)
dy2 = np.abs(y - iy)
dy3 = np.abs(y - iy - 1)
dy4 = np.abs(y - iy - 2)
dy = [dy1, dy2, dy3, dy4]
# compute weight according to distance
def h(t):
m = -1
t_abs = np.abs(t)
w = np.zeros_like(t)
condition1 = t_abs <= 1
w[condition1] = ((m + 2) * np.power(t_abs, 3) - (m + 3) * np.power(t_abs, 2) + 1)[condition1]
condition2 = (t_abs > 1) & (t_abs <= 2)
w[condition2] = (m * np.power(t, 3) - 5 * m * np.power(t_abs, 2) + 8 * m * t_abs - 4 * m)[condition2]
return w
sum_h = np.zeros((aH, aW, C), dtype=np.float)
out = np.zeros((aH, aW, C), dtype=np.float)
for j in range(1, 5):
for i in range(1, 5):
# to avoid overflow
index_x = np.minimum(np.maximum(ix + i - 2, 0), W - 1)
index_y = np.minimum(np.maximum(iy + j - 2, 0), H - 1)
# i-1, j-1
h_dx = h(dx[i - 1])
h_dy = h(dy[j - 1])
h_dx = np.expand_dims(h_dx, axis=-1)
h_dx = np.repeat(h_dx, C, axis=-1)
h_dy = np.expand_dims(h_dy, axis=-1)
h_dy = np.repeat(h_dy, C, axis=-1)
sum_h = sum_h + h_dx * h_dy
out = out + img[index_y, index_x] * h_dx * h_dy
out = out / sum_h
out = np.clip(out, 0, 255)
out = out.astype(np.uint8)
return out
# Question 28-29
# afine transformations
# Answer
def afine_image(img, a, b, c, d, tx=30, ty=-30):
if len(img.shape) == 3:
H, W, C = img.shape
else:
img = np.expand_dims(img, axis=-1)
H, W, C = img.shape
# to make the place after afine black
img_expand = np.zeros((H + 2, W + 2, C), dtype=np.float)
img_expand[1:H + 1, 1:W + 1] = img
# get new image shape
H_new = np.round(H * d).astype(np.int)
W_new = np.round(W * a).astype(np.int)
out = np.zeros((H_new + 1, W_new + 1, C), dtype=np.float)
y_new = np.arange(H_new).repeat(W_new).reshape(H_new, -1)
x_new = np.tile(np.arange(W_new), (H_new, 1))
adbc = a * d - b * c
x = np.round((d * x_new - b * y_new) / adbc - tx).astype(np.int) + 1
y = np.round((-c * x_new + a * y_new) / adbc - ty).astype(
np.int) + 1 # please note that img_expand[1:H+1, 1:W+1]
x = np.minimum(np.maximum(x, 0), W + 1).astype(np.int)
y = np.minimum(np.maximum(y, 0), H + 1).astype(np.int)
out[y_new, x_new] = img_expand[y, x]
out = out[:H_new, :W_new]
out = out.astype(np.uint8)
return out
# Question 30
# rotation
# Answer
def rotation_image(img, a, b, c, d, tx=30, ty=-30):
if len(img.shape) == 3:
H, W, C = img.shape
else:
img = np.expand_dims(img, axis=-1)
H, W, C = img.shape
# to make the place after afine black
img_expand = np.zeros((H + 2, W + 2, C), dtype=np.float)
img_expand[1:H + 1, 1:W + 1] = img
# get new image shape
H_new = np.round(H).astype(np.int)
W_new = np.round(W).astype(np.int)
out = np.zeros((H_new, W_new, C), dtype=np.float)
y_new = np.arange(H_new).repeat(W_new).reshape(H_new, -1)
x_new = np.tile(np.arange(W_new), (H_new, 1))
adbc = a * d - b * c
x = np.round((d * x_new - b * y_new) / adbc - tx).astype(np.int) + 1
y = np.round((-c * x_new + a * y_new) / adbc - ty).astype(
np.int) + 1 # please note that img_expand[1:H+1, 1:W+1]
dcx = (x.max() + x.min()) // 2 - W // 2
dcy = (y.max() + y.min()) // 2 - H // 2
x -= dcx
y -= dcy
x = np.clip(x, 0, W + 1)
y = np.clip(y, 0, H + 1)
out[y_new, x_new] = img_expand[y, x]
out = out.astype(np.uint8)
return out
# Test
# Answer 21
# img = cv2.imread("imori_dark.jpg").astype(np.float)
# img21 = histogram_normalization(img)
#
# plt.hist(img21.ravel(), bins=255, range=(0, 255))
# plt.show()
#
# cv2.imshow("histogram normalization", np.hstack((img.astype(np.uint8), img21)))
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# Answer 22
# img = cv2.imread("imori_dark.jpg").astype(np.float)
# img22 = histogram_mean(img)
#
# plt.hist(img22.ravel(), bins=255, range=(0, 255))
# plt.show()
#
# cv2.imshow("histogram normalization", np.hstack((img.astype(np.uint8), img22)))
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# Answer 23
# img = cv2.imread("imori.jpg").astype(np.float)
# img23 = histogram_equalization(img)
#
# plt.hist(img23.ravel(), bins=255, range=(0, 255))
# plt.show()
#
# cv2.imshow("histogram equalization", np.hstack((img.astype(np.uint8), img23)))
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# Answer 24
# img = cv2.imread("imori_gamma.jpg").astype(np.float)
# img24 = gamma_correction(img)
#
# cv2.imshow("gamma correction", np.hstack((img.astype(np.uint8), img24)))
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# Answer 25
# img = cv2.imread("imori.jpg").astype(np.float)
# img25 = nearest_neighbor_interpolation(img)
#
# cv2.imshow("gamma correction", img25)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# Answer 26
# img = cv2.imread("imori.jpg").astype(np.float)
# img26 = bilinear_interpolation(img)
#
# cv2.imshow("bilinear interpolation", img26)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# Answer 27
# img = cv2.imread("imori.jpg").astype(np.float)
# img27 = bicubic_interpolation(img)
#
# cv2.imshow("bicubic interpolation", img27)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# img = cv2.imread("imori.jpg").astype(np.float)
# img28 = afine_image(img, a=1, b=0, c=0, d=1, tx=30, ty=-30)
#
# cv2.imshow("afine interpolation", img28)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# Answer 29
# img = cv2.imread("imori.jpg").astype(np.float)
# img29 = afine_image(img, a=1.3, b=0, c=0, d=0.8, tx=30, ty=-30)
#
# cv2.imshow("afine interpolation", img29)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# Answer 30
# img = cv2.imread("imori.jpg").astype(np.float)
# img30 = rotation_image(img, a=np.cos(-np.pi / 6.), b=-np.sin(-np.pi / 6.), c=np.sin(-np.pi / 6.), d=np.cos(-np.pi / 6.),
# tx=0, ty=0)
#
# cv2.imshow("rotation interpolation", img30)
# cv2.waitKey(0)
# cv2.destroyAllWindows()