本篇记录了自己写的GIthub上的ImageProcessing 100 Wen的问题1-10的答案,注释里包含了一些自己的感悟。为了方便,注释是用英文写的。
问题序号 | 题目 |
---|---|
1 | 通道交换 |
2 | 灰度化 |
3 | 二值化 |
4 | 大津二值化算法(最佳阈值) |
5 | HSV与RGB的变换 |
6 | 减色处理 |
7 | 平均池化 |
8 | 最大池化 |
9 | 高斯滤波 |
10 | 中值滤波 |
Python答案如下,结尾包含了问题的验证。
import cv2.cv2
import numpy as np
# Question One
# change channels
# from RGB to BGR
# please note that the images are stored by BGR
# Answer
def RGB2BGR(img):
out = img.copy()
out[:, :] = out[:, :, (2, 1, 0)] # from 0:B 1:G 2:R <= 2:R 1:G 0:B
return out
# Question Two
# gray scale of images
# Y = 0.2126 * R + 0.7152 * G + 0.0722 * B
# Answer
def RGB2GRAY(img):
out = 0.2126 * img[:, :, 2] + 0.7152 * img[:, :, 1] + 0.0722 * img[:, :, 0]
out = out.astype(np.uint8)
return out
# Question Three
# Binary image
# threshold: if y < t then y := 0 else y := 1
# Answer
def BINARY(img, th=128):
out = RGB2GRAY(img)
out[out < th] = 0 # please not this usage, out < th return a matrix which has the same dimension with out
out[out >= th] = 255 # out < th is consisted of True and False
return out
# Question Four
# find best threshold for binary image
# Otsu's method:
# there are two classes: class0 that is less than threshold and class1 that is greater and equal than threshold
# w0: the rate of number of pixels of class0, the same as w1
# M0: the mean value of pixel value of class0, the same as M1
# objective function: make Sb^2 = w0 * w1 * (M0 - M1)^2 biggest
# Answer
def BESTTH(img):
best_t = 0
max_sb = 0
H = img.shape[0]
W = img.shape[1]
gray = RGB2GRAY(img)
for t in range(1, 255):
class0 = gray[gray < t]
class1 = gray[gray >= t]
w0 = len(class0) / (H * W)
w1 = len(class1) / (H * W)
M0 = np.mean(class0) if len(class0) > 0 else 0
M1 = np.mean(class1) if len(class1) > 0 else 0
sb = w0 * w1 * np.square(M0 - M1)
if sb > max_sb:
best_t = t
max_sb = sb
return best_t
# Question Five
# HSV to RGB, RGB to HSV
# Answer
def RGB2HSV(img):
hsv = np.zeros_like(img, dtype=np.float)
img = img / 255.0 # RGB in [0, 1]
max = np.max(img, axis=2) # max = max(R, G, B)
min = np.min(img, axis=2) # min = min(R, G, B)
min_index = np.argmin(img, axis=2)
# Compute H
hsv[:, :, 0][min == max] = 0 # if min == max, h = 0
# Note: hsv[:, :, 0] is a H * W * 1 matrix, hsv[:, :, 0][condition1] is those values that meet condition1
condition1 = (min_index == 0) # if min == B
hsv[:, :, 0][condition1] = (
60 * (img[:, :, 1][condition1] - img[:, :, 2][condition1]) / (max[condition1] - min[condition1]) + 60)
condition2 = (min_index == 1) # if min == G
hsv[:, :, 0][condition2] = (
60 * (img[:, :, 2][condition2] - img[:, :, 0][condition2]) / (max[condition2] - min[condition2]) + 300)
condition3 = (min_index == 2) # if min == R
hsv[:, :, 0][condition3] = (
60 * (img[:, :, 0][condition3] - img[:, :, 1][condition3]) / (max[condition3] - min[condition3]) + 180)
# Compute S
hsv[:, :, 1] = (max - min)
# Compute V
hsv[:, :, 2] = max
return hsv
def HSV2RGB(img):
bgr = np.zeros_like(img, dtype=np.float) # please note that the sequence of bgr
H = img[:, :, 0]
S = img[:, :, 1]
V = img[:, :, 2]
C = S
H = H / 60.0
X = C * (1 - np.abs(H % 2 - 1))
ZERO = np.zeros_like(H)
VALUE = ([C, X, ZERO],
[X, C, ZERO],
[ZERO, C, X],
[ZERO, X, C],
[X, ZERO, C],
[C, ZERO, X])
for i in range(0, 6):
condition = ((H >= i) & (H < i + 1))
bgr[:, :, 2][condition] = (V - C)[condition] + VALUE[i][0][condition] # R
bgr[:, :, 1][condition] = (V - C)[condition] + VALUE[i][1][condition] # G
bgr[:, :, 0][condition] = (V - C)[condition] + VALUE[i][2][condition] # B
bgr = np.clip(bgr, 0, 1)
bgr = (bgr * 255).astype(dtype=np.uint8)
return bgr
# Question Six
# color reduced
# make rgb only represented by 32 96 160 224
# Answer
def COLORREDUCED(img):
# use a classical algorithm to present this relationship
# it is a good solution for a reduced function like this
# in this example, 64 is the scale, so 64/2 = 32 is bias
out = img // 64 * 64 + 32 # Map 0-64 to 32, 64-128 to 64 and so on.
return out
# Question Seven
# average pooling
# this is an important operation in convolution neural network
# use the average pixel value of a block(kernel) to represent a block(kernel)
# Answer
def AVERAGEPOOLING(img, k=8):
H, W, C = img.shape
Hn = int(H / k)
Wn = int(W / k)
for y in range(Hn):
for x in range(Wn):
for c in range(C):
img[k * y: k * (y + 1), k * x: k * (x + 1), c] = np.mean(
img[k * y: k * (y + 1), k * x: k * (x + 1), c]).astype(np.uint8)
return img
# Question Eight
# max pooling
# as the name
# Answer
def MAXPOOLING(img, k=8):
H, W, C = img.shape
Hn = int(H / k)
Wn = int(W / k)
for y in range(Hn):
for x in range(Wn):
for c in range(C):
img[k * y: k * (y + 1), k * x: k * (x + 1), c] = np.max(img[k * y: k * (y + 1), k * x: k * (x + 1), c])
return img
# Question Nine
# Gaussian Filter
# to smooth the image or denoising using gaussian filter
# Answer
def GAUSSIANFILTER(img, k=3, sigma=1.3):
# an image is actually a matrix, and RGB image is a H*W*3 matrix, H*W is the spatial information, 3 is the channel
# just imagine that every point of a x-y plane, there is a 3*1 array, storing RGB information
if len(img.shape) == 3:
H, W, C = img.shape
else:
# this operation didn't change the information of img(matrix), just add a dimension
np.expand_dims(img, axis=-1) # gray scale image??
H, W, C = img.shape
# Zero Padding: to keep the image size after convolution
pad = int((k - 1) / 2) # k is generally odd??
out = np.zeros((H + pad * 2, W + pad * 2, C), dtype=np.float)
out[pad:H + pad, pad:W + pad] = img.astype(np.float)
# Compute Gaussian kernel: g(x, y, sigma) = 1/(2*PI*sigma^2) * exp(-(x^2+y+2)/2*sigma^2))
# please note that x and y is the value that put the center of kennel at the (0, 0)
kernel = np.zeros((k, k), dtype=np.float)
for y in range(-pad, k - pad):
for x in range(-pad, k - pad):
kernel[y + pad:x + pad] = np.exp(-(np.square(x) + np.square(y)) / (2 * np.square(sigma)))
kernel = kernel / (2 * np.pi * np.square(sigma))
# normalization: Sum(g) = 1
kernel = kernel / kernel.sum()
# Gaussian filter
# please note that must use the copy of img
out_old = out.copy()
for y in range(H):
for x in range(W):
for c in range(C):
out[pad + y, pad + x, c] = np.sum(kernel * out_old[y:y + k, x:x + k, c])
out = np.clip(out, 0, 255)
out = out[pad: pad + H, pad: pad + W].astype(np.uint8)
return out
# Question Ten
# Median filter
# use the median value to filter
# Answer
def MEDIANFILTER(img, k=3):
if (len(img.shape) == 3):
H, W, C = img.shape
else:
np.expand_dims(img, axis=-1)
H, W, C = img.shape
# Zero Padding
pad = int((k - 1) / 2)
out = np.zeros((H + 2 * pad, W + 2 * pad, C), dtype=np.float)
out[pad:H + pad, pad:W + pad] = img.astype(np.float)
# Median filter
out_old = out.copy()
for y in range(H):
for x in range(W):
for c in range(C):
out[pad + y, pad + x, c] = np.median(out_old[y:y + k, x: x + k, c])
out = np.clip(out, 0, 255)
out = out[pad:H + pad, pad:W + pad].astype(np.uint8)
return out
# Test
# Answer One
# img = cv2.imread("imori.jpg")
# img1 = RGB2BGR(img)
#
# cv2.imshow("RGN2BGR", np.hstack((img, img1)))
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# Answer Two
# img = cv2.imread("imori.jpg")
# img2 = RGB2GRAY(img)
#
# cv2.imshow("RGB2GRAY", img2)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# Answer Three
# img = cv2.imread("imori.jpg")
# img3 = BINARY(img)
#
# cv2.imshow("BINARY", img3)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# Answer Four
# img = cv2.imread("imori.jpg")
# best_t = BESTTH(img)
# print(best_t)
# img4 = BINARY(img, th=best_t)
#
# cv2.imshow("BINARY", img4)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# Answer Five
# img = cv2.imread("imori.jpg")
#
# hsv = RGB2HSV(img)
# hsv[:, :, 0] = (hsv[:, :, 0] + 180) % 360
# img5 = HSV2RGB(hsv)
#
# cv2.imshow("RGB2HSV -> H+180 -> HSV2BGR", img5)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# Answer Six
# img = cv2.imread("imori.jpg")
#
# img6 = COLORREDUCED(img)
#
# cv2.imshow("COLORREDUCED", img6)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# Answer Seven
# img = cv2.imread("imori.jpg")
#
# img7 = AVERAGEPOOLING(img)
#
# cv2.imshow("AVERAGEPOOLING", img7)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# Answer Eight
# img = cv2.imread("imori.jpg")
#
# img8 = MAXPOOLING(img)
#
# cv2.imshow("MAXPOOLING", img8)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# Answer Nine
# img = cv2.imread("imori_noise.jpg")
#
# img9 = GAUSSIANFILTER(img)
#
# cv2.imshow("GAUSSIANFILTER", img9)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# Answer 10
# img = cv2.imread("imori_noise.jpg")
#
# img10 = MEDIANFILTER(img)
#
# cv2.imshow("MEDIANFILTER", img10)
# cv2.waitKey(0)
# cv2.destroyAllWindows()