时间:2022年11月9日
不管是plt还是cv2.imshow,在python中只认numpy.array,展示图像时均使用matplotlib的plt.show()展示。但是由于cv2.imread 的图片是BGR,cv2.imshow 时相应的换通道显示。
opencv中的cv2模块
import cv2
img=cv2.imread(image_path) # 读入图像,转化成numpy的矩阵表示
cv2.imshow(img)
PIL模块
全称 Python Imaging Library,是 Python 平台一个功能非常强大而且简单易用的图像处理库。
from PIL import Image
img=Image.open(image_path)
img=np.array(img)# 获得numpy对象, np.ndarray, RGB
plt.imshow(img)
plt.show()
matplotlib.imread()
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
img=mpimg.imread(image_path)
plt.imshow(img)
plt.show()
skimage.io模块
import matplotlib.pyplot as plt
from skimage import io
img=io.imread(image_path)
plt.img
滤波:
高通滤波==锐化
低通滤波==模糊
滤波的过程类似于卷积的过程。
均值滤波
基本原理
使用模板内所有像素的平均值代替模板中心像素灰度值。
特点
易收到噪声的干扰,不能完全消除噪声,只能相对减弱噪声。
中值滤波
基本原理
将每一像素点的灰度值设置为该点某邻域窗口内的所有像素点灰度值的中值(中位数).是一种非线性滤波技术
特点
对噪声不是那么敏感,能够较好的消除椒盐噪声,但是容易导致图像的不连续性。
高斯滤波
基本原理
对图像邻域内像素进行平滑时,邻域内不同位置的像素被赋予不同的权值。
特点
对图像进行平滑的同时,同时能够更多的保留图像的总体灰度分布特征
双边滤波
基本原理
双边滤波(bilateral filtering)的基本思路是同时考虑将要被滤波的像素点的空域信息(domain)和值域信息(range)。因此是一种 combined 滤波方式,因此叫做 bilateral ,即同时考虑两方面的信息。首先,对于图像滤波来说,一个通常的intuition是:(自然)图像在空间中变化缓慢,因此相邻的像素点会更相近。但是这个假设在图像的边缘处变得不成立。如果在边缘处也用这种思路来进行滤波的话,即认为相邻相近,则得到的结果必然会模糊掉边缘。先根据像素值对要用来进行滤波的邻域做一个分割或分类,再给该点所属的类别相对较高的权重,然后进行邻域加权求和,得到最终结果。
read_pic 图片读取
使用的是matplotlib.image模块
def read_pic(file_path='pic/tiger.jpg', is_show=False, is_axis=False):
return_pic = mpimg.imread(file_path)
if is_show:
if not is_axis:
plt.axis('off')
plt.imshow(return_pic)
plt.show()
return return_pic
滤波
使用opencv的cv2模块
def blur(pic, blur='Gaussian'):
import cv2
"""Image filtering"""
if blur == 'mean':
# 均值滤波
ret_pic = cv2.blur(pic, (3, 3))
elif blur == 'Gaussian':
# 高斯滤波
ret_pic = cv2.GaussianBlur(pic, (3, 3), 0.5)
elif blur == 'median':
# 中值滤波
ret_pic = cv2.medianBlur(pic, 3)
elif blur == 'bilateral':
# 双边滤波
ret_pic = cv2.bilateralFilter(pic, 9, 75, 75)
else:
print(
"Error: there hava not " + blur + ", and you will get a default setting, i.e., Gaussian blur in the BagGenerator.row().")
return ret_pic
图像大小调整
使用的是skimage的transform模块
def resize_pic(pic, new_size=8):
"""Resize"""
from skimage.transform import resize
my_image = resize(pic, output_shape=(new_size, new_size))
return my_image
测试图像
来自该篇文章的截图
生成结果
[[0.61315505 0.61209802 0.48175298 0.23649614 0.21824554 0.12310909
0. 0.06760424 0.09095528]
[0.85152065 0.78285938 0.62916331 0.4767312 0.40912696 0.38577593
0.19627889 0.21652467 0.21158965]
[0.89360736 0.80470031 0.65593926 0.28045231 0.26020654 0.26514156
0.13197296 0.1120513 0.07474256]
[1. 0.93101462 0.8195623 0.34475824 0.36467991 0.40198864
0.30644398 0.34108197 0.33719849]
[0.93192162 0.82829825 0.72072941 0.17028722 0.13564923 0.13953271
0.37795523 0.33087972 0.32498486]
[0.79233199 0.73578413 0.63411015 0.09877597 0.14585148 0.15174634
0.29980926 0.26103389 0.23035215]
[0.73088833 0.71311584 0.6421236 0.17692194 0.21569731 0.24637905
0.35422942 0.31926336 0.28347971]
[0.61502451 0.63221808 0.59700949 0.12250178 0.15746784 0.19325149
0.00186946 0.0877243 0.20621178]]
代码实现
'''
@(#)The bag generators
Author: inki
Email: [email protected]
Created on May 01, 2020
Last Modified on May 03, 2020
'''
import SimpleTool
import numpy as np
import warnings
warnings.filterwarnings('ignore')
__all__ = ['Row']
def introduction(__all__=__all__):
SimpleTool.introduction(__all__)
def Row(file_path='D:/program/Java/eclipse-workspace/Python/data/image/1.jpg', blur='Gaussian', resize=8):
"""
:param resize:
:param file_path:
:param blur: 'mean', 'Gaussian', 'median', 'bilateral', the default setting is 'Gaussian'
resize: The size of the image after the representation, the default setting is 8.
:return: The mapping instances of a image (bag).
"""
temp_pic = SimpleTool.read_pic(file_path)
temp_pic = SimpleTool.blur(temp_pic, blur)
temp_pic = SimpleTool.resize_pic(temp_pic, resize)
# SimpleTool.show_pic(temp_pic)
"""Calculate the mean color of each row"""
temp_num_row = temp_pic.shape[0]
temp_num_column = temp_pic.shape[1]
temp_row_mean_RGB = np.zeros((temp_num_row, 3)) # The size is row times column.
for i in range(temp_num_row):
temp_row_mean_RGB[i][0] = sum(temp_pic[i, :, 0]) / temp_num_column
temp_row_mean_RGB[i][1] = sum(temp_pic[i, :, 1]) / temp_num_column
temp_row_mean_RGB[i][2] = sum(temp_pic[i, :, 2]) / temp_num_column
"""Generate the bag"""
"""First step: the first row."""
ret_bag = np.zeros((temp_num_row, 9)) # The size is row times 9.
ret_bag[:, : 3] = temp_row_mean_RGB # Current row.
ret_bag[0, 3: 6] = temp_row_mean_RGB[0] - temp_row_mean_RGB[-1] # Row above.
ret_bag[0, 6:] = temp_row_mean_RGB[0] - temp_row_mean_RGB[1] # Row below.
"""Second step: remove the first and last rows."""
for i in range(1, temp_num_row - 1):
ret_bag[i, 3: 6] = temp_row_mean_RGB[i] - temp_row_mean_RGB[i - 1]
ret_bag[i, 6:] = temp_row_mean_RGB[i] - temp_row_mean_RGB[i + 1]
"""Three step: the last row."""
ret_bag[-1, 3: 6] = temp_row_mean_RGB[-1] - temp_row_mean_RGB[-2] # Row above.
ret_bag[-1, 6:] = temp_row_mean_RGB[-1] - temp_row_mean_RGB[1]
return SimpleTool.normalize(ret_bag)
if __name__ == '__main__':
row_bag = Row(file_path="pic/tiger.jpg")
print(row_bag)
[[0.03656284 0.03709819 0. 1. 0.25529679 0.21283334
0.13478447 1. 0.03298249 0.04257345 0.00200616 1.
0.25555216 0.21203257 0.14005355 1. ]
[0.06184494 0.06341983 0.02186012 1. 0.2868399 0.26247226
0.20346899 1. 0.10163531 0.09987392 0.06585744 1.
0.32984782 0.31842249 0.27261381 1. ]
[0.04344594 0.04580409 0.02550849 1. 0.2220258 0.21402967
0.18578877 1. 0.05366401 0.05314746 0.03498448 1.
0.16280716 0.15769196 0.13544296 1. ]
[0.10177427 0.10714423 0.08897553 1. 0.20278404 0.21145714
0.19922227 1. 0.11630371 0.13919052 0.13392454 1.
0.1381647 0.16313502 0.16314397 1. ]
[0.17321925 0.15675356 0.08597858 1. 0.11654689 0.12249497
0.05609155 1. 0.35658811 0.30138476 0.22294204 1.
0.39464215 0.33044539 0.24701108 1. ]
[0.35912717 0.31310169 0.23922337 1. 0.31859206 0.23598439
0.15243135 1. 0.31423079 0.30161319 0.26028221 1.
0.41696089 0.35144976 0.28247185 1. ]
[0.2543122 0.22358433 0.19059536 1. 0.5019526 0.32901083
0.25793103 1. 0.19681496 0.18002387 0.1521344 1.
0.32995082 0.22919303 0.19018347 1. ]
[0.18106647 0.1861769 0.16610177 1. 0.23102491 0.22921572
0.20963159 1. 0.13363212 0.15356436 0.14478595 1.
0.17259701 0.19474437 0.18201985 1. ]
[0.11461375 0.12481699 0.04163 1. 0.09311211 0.09615186
0.03307826 1. 0.38858982 0.28913155 0.18040838 1.
0.37580257 0.30681168 0.20265536 1. ]
[0.44570331 0.31483268 0.1986 1. 0.49431378 0.39707822
0.26508443 1. 0.59741464 0.51428864 0.43241338 1.
0.61280315 0.54935577 0.44179207 1. ]
[0.47722006 0.35448486 0.27855055 1. 0.53364934 0.47246784
0.37280225 1. 0.3386246 0.26151143 0.21001365 1.
0.36751975 0.3231343 0.23379973 1. ]
[0.29579246 0.27328779 0.24253986 1. 0.26912716 0.22277068
0.15190145 1. 0.17934178 0.18775114 0.17290225 1.
0.14486762 0.12579196 0.08669438 1. ]
[0.10140652 0.11916343 0.02152555 1. 0.09073577 0.10740095
0.01411514 1. 0.16768109 0.15214943 0.04190944 1.
0.08770842 0.11448455 0.01690825 1. ]
[0.20464481 0.17307011 0.07691727 1. 0.07882583 0.08783391
0.01740798 1. 0.27455881 0.22702872 0.14770638 1.
0.08362094 0.0570406 0.01187206 1. ]
[0.29211627 0.24247478 0.14503239 1. 0.09845299 0.07266276
0.01181019 1. 0.21435035 0.20815931 0.0878082 1.
0.12812342 0.12324543 0.03190036 1. ]
[0.13635686 0.11949219 0.03339438 1. 0.09603848 0.08404059
0.02074274 1. 0.05820104 0.04746111 0.00317956 1.
0.04684092 0.05607154 0.0218221 1. ]]
'''
@(#)The bag generators
Author: inki
Email: [email protected]
Created on May 01, 2020
Last Modified on May 03, 2020
'''
import SimpleTool
import numpy as np
import warnings
from numpy import reshape
warnings.filterwarnings('ignore')
__all__ = ['SB']
def SB(file_path='image/1.jpg', blur='Gaussian', resize=8):
"""
:param blur: 'mean', 'Gaussian', 'median', 'bilateral', the default setting is 'Gaussian'
resize: The size of the image after the representation, the default setting is 8.
:return: The mapping instances of a image (bag).
"""
temp_pic = SimpleTool.read_pic(file_path)
temp_pic = SimpleTool.blur(temp_pic, blur)
temp_pic = SimpleTool.resize_pic(temp_pic, resize)
"""Avoid this case that the row numbers or column numbers is not even."""
temp_num_row = temp_pic.shape[0]
temp_num_column = temp_pic.shape[1]
if temp_num_row % 2 == 1:
temp_num_row -= 1
if temp_num_column % 2 == 1:
temp_num_column -= 1
"""In order to reduce the complexity of sampling; why 12? RGB = 3, and four blob."""
temp_bag = np.zeros((int(temp_num_row / 2), int(temp_num_column / 2), 16))
for i in range(0, temp_num_column - 1, 2):
for j in range(0, temp_num_row - 1, 2):
temp_bag[int((i + 1) / 2), int((j + 1) / 2), : 4] = temp_pic[i, j] # 1-st blob
temp_bag[int((i + 1) / 2), int((j + 1) / 2), 4: 8] = temp_pic[i, j + 1] # 2-st blob
temp_bag[int((i + 1) / 2), int((j + 1) / 2), 8: 12] = temp_pic[i + 1, j] # 3-st blob
temp_bag[int((i + 1) / 2), int((j + 1) / 2), 12:] = temp_pic[i + 1, j + 1] # 4-st blob
for i in range(16):
temp_bag[:, :, i] = temp_bag[:, :, i].T
temp_bag = temp_bag.reshape(int(temp_num_row * temp_num_column / 4), 16)
return SimpleTool.normalize(temp_bag)
if __name__ == '__main__':
bag = SB(file_path="pic/tiger.jpg")
print(bag)
生成结果
[[0.76585671 0.72063168 0.64803822 0.03321937 0.0799919 0.11174843
0.41040185 0.32703229 0.28541454 0.14265671 0.17170241 0.17475178
0.22233932 0.23737089 0.27754911]
[0.72602657 0.64485558 0.56275566 0.29413146 0.35135861 0.37548141
0.49799705 0.48362356 0.43602526 0.1268001 0.21381574 0.2306671
0.50550301 0.41673997 0.42899628]
[0.85092716 0.72233257 0.60812136 0.24026096 0.32363052 0.36524826
0.08846528 0.1860343 0.20576492 0. 0.13862038 0.17109135
0.35630003 0.36429397 0.40389149]
[0.72174116 0.70934299 0.66873082 0.11643328 0.12710069 0.13428798
0.60359025 0.5343081 0.49446894 0.36695205 0.32510695 0.28864063
0.20995761 0.20585671 0.21906445]
[0.82268462 0.75831283 0.69053455 0.23973337 0.29287852 0.31564481
0.51776766 0.51979551 0.4818808 0.30340082 0.30469233 0.29048734
0.23983457 0.2052009 0.23464806]
[1. 0.91831969 0.83786836 0.04707256 0.1163547 0.15619387
0.00809053 0.04306738 0.0455759 0.12013836 0.10409014 0.07770917
0.07104212 0.07695037 0.10679955]
[0.66286464 0.63267117 0.60025593 0.11813243 0.15064303 0.16311565
0.54436255 0.45395536 0.41175692 0.42832348 0.41329191 0.37311369
0.25335953 0.28857454 0.30126379]
[0.90619818 0.73626414 0.66642053 0.05027297 0.21234994 0.2544438
0.35647689 0.46629347 0.43820483 0.14515979 0.23392284 0.22166653
0.05911556 0.22727188 0.27787196]
[0.88189579 0.76129514 0.68668145 0.10630025 0.19670744 0.23890589
0.14344685 0.21526935 0.19413533 0.29436277 0.28636884 0.24677131
0.1470591 0.24554648 0.28994699]]
代码实现
'''
@(#)The bag generators
Author: inki
Email: [email protected]
Created on May 01, 2020
Last Modified on May 04, 2020
'''
import SimpleTool
import numpy as np
import warnings
warnings.filterwarnings('ignore')
__all__ = ['SBN']
def SBN(file_path='image/1.jpg', blur='Gaussian', resize=8):
"""
:param blur: 'mean', 'Gaussian', 'median', 'bilateral', the default setting is 'Gaussian'
resize: The size of the image after the representation, the default setting is 8.
:return: The mapping instances of a image (bag).
"""
temp_pic = SimpleTool.read_pic(file_path)
temp_pic = SimpleTool.blur(temp_pic, blur)
temp_pic = SimpleTool.resize_pic(temp_pic, resize)
"""Get the RGB mean of each blob."""
temp_num_row = temp_pic.shape[0]
temp_num_column = temp_pic.shape[1]
if resize != 4:
temp_mean_RGB = np.zeros((temp_num_row - 1, temp_num_column - 1, 3))
for i in range(temp_num_row - 1):
for j in range(temp_num_column - 1):
temp_mean_RGB[i, j, 0] = np.sum(temp_pic[i : i + 1, j : j + 1, 0]) / 4
temp_mean_RGB[i, j, 1] = np.sum(temp_pic[i : i + 1, j : j + 1, 1]) / 4
temp_mean_RGB[i, j, 2] = np.sum(temp_pic[i : i + 1, j : j + 1, 2]) / 4
if resize == 4: # Center, up - Center, down - Center, left - Center, right - Center.
ret_bag = np.zeros((4, 15))
for i in range(2):
for j in range(2):
temp_index = 2 * i + j
ret_bag[temp_index, : 3] = temp_pic[i + 1, j + 1] # Center.
ret_bag[temp_index, 3 : 6] = temp_pic[i + 1, j] - temp_pic[i + 1, j + 1] # Up - center
ret_bag[temp_index, 6 : 9] = temp_pic[i + 1, j + 2] - temp_pic[i + 1, j + 1] # Down - center
ret_bag[temp_index, 9 : 12] = temp_pic[i, j + 1] - temp_pic[i + 1, j + 1] # Lift - center
ret_bag[temp_index, 12 :] = temp_pic[i + 2, j + 1] - temp_pic[i + 1, j + 1] # Right - center
else:
ret_bag = np.zeros(((temp_num_row - 5) * (temp_num_column - 5), 15)) # The 5 unable generate bag.
for i in range(temp_num_row - 5):
for j in range(temp_num_column - 5):
temp_index = (temp_num_row - 5) * i + j
ret_bag[temp_index, : 3] = temp_mean_RGB[i + 2, j + 2]
ret_bag[temp_index, 3 : 6] = temp_mean_RGB[i + 2, j] - temp_mean_RGB[i + 2, j + 2]
ret_bag[temp_index, 6 : 9] = temp_mean_RGB[i + 2, j + 4] - temp_mean_RGB[i + 2, j + 2]
ret_bag[temp_index, 9 : 12] = temp_mean_RGB[i, j + 2] - temp_mean_RGB[i + 2, j + 2]
ret_bag[temp_index, 12 :] = temp_mean_RGB[i + 4, j + 2] - temp_mean_RGB[i + 2, j + 2]
return SimpleTool.normalize(ret_bag)
if __name__ == '__main__':
bag = SBN(file_path="pic/tiger.jpg")
print(bag)
GBR色彩空间图像
这个色彩空间是cv2读取图像时转化成的色彩空间,与常用的RGB只是通道顺序不同罢了,用RGB的方式输出便是:
Y_Cb_Cr色彩空间图像
YCBCR或是Y’CBCR,是色彩空间的一种,通常会用于影片中的影像连续处理,或是数字摄影系统中。Y’为颜色的亮度(luma)成分、而CB和CR则为蓝色和红色的浓度偏移量成份。Y’和Y是不同的,而Y就是所谓的亮度(luminance),表示光的浓度且为非线性,使用伽马修正(gamma correction)编码处理。
生成结果
[[5.32358092e-01 8.03967204e-01 7.17280661e-01 4.56908604e-03
3.69042748e-03 1.31372125e-04]
[2.20084303e-01 7.78685386e-01 7.30454631e-01 2.15942366e-03
2.15167736e-03 0.00000000e+00]
[1.00000000e+00 8.47680335e-01 6.71217169e-01 6.62664380e-03
5.28454964e-03 2.05130020e-04]]
代码实现
from matplotlib import pyplot as plt
import SimpleTool
import numpy as np
import cv2 as cv2
import pywt
import warnings
warnings.filterwarnings('ignore')
from sklearn.cluster import KMeans
def kmeansSeg(file_path='D:/program/Java/eclipse-workspace/Python/data/image/1.jpg', thresh_k=16, blobsize=[4, 4]):
"""
:param blobsize:
:param file_path:
:param thresh_k: , the default setting is 'Gaussian'
blobsize: The size of the blob, the default setting is 4 times 4.
:return: The mapping instances of a image (bag).
"""
temp_pic = cv2.imread(file_path)
temp_num_row = temp_pic.shape[0]
temp_num_column = temp_pic.shape[1]
# temp_pic = SimpleTool.resize_pic(temp_pic, 100)
# SimpleTool.show_pic(temp_pic)
plt.imsave("t_GBR.jpg", temp_pic)
"""Compute that: how many blobs can be generated on the row/column."""
temp_blob_row = int(np.floor(temp_num_row / blobsize[0]))
temp_blob_column = int(np.floor(temp_num_column / blobsize[1]))
"""Avoid the case that the picture row/column size less than blobsize[0]/[1]."""
temp_blob_row = 1 if temp_blob_row == 0 else temp_blob_row
temp_blob_column = 1 if temp_blob_column == 0 else temp_blob_column
"""Convert rgb to YCbCr"""
temp_pic = cv2.cvtColor(temp_pic, cv2.COLOR_BGR2YCR_CB)
SimpleTool.show_pic(temp_pic)
"""The results are not equal between MATLAB and Python"""
temp_Y, temp_Cb, temp_Cr = temp_pic[:, :, 0], temp_pic[:, :, 1], temp_pic[:, :, 2]
"""Initialize bag"""
temp_bag = np.zeros((temp_blob_row * temp_blob_column, 6))
temp_blob_map = np.zeros(temp_Y.shape)
temp_blob_idx = 0
for i in range(temp_blob_row):
for j in range(temp_blob_column):
"""Record the pixel indexes"""
temp_idx1 = list(range(i * blobsize[0], min(temp_num_row, (i + 1) * blobsize[0])))
temp_idx2 = list(range(j * blobsize[1], min(temp_num_column, (j + 1) * blobsize[1])))
"""The first 3 dimensions: mean of (Y, Cb, Cr)"""
temp_data = np.mat(SimpleTool.index2_select_datas(temp_Y, temp_idx1, temp_idx2))
temp_bag[temp_blob_idx, 0] = np.mean(SimpleTool.mean(temp_data))
temp_data1 = np.mat(SimpleTool.index2_select_datas(temp_Cb, temp_idx1, temp_idx2))
temp_bag[temp_blob_idx, 1] = np.mean(SimpleTool.mean(temp_data1))
temp_data1 = np.mat(SimpleTool.index2_select_datas(temp_Cr, temp_idx1, temp_idx2))
temp_bag[temp_blob_idx, 2] = np.mean(SimpleTool.mean(temp_data1))
"""The next 3 dimension: HL, LH and HH"""
_unused, (temp_HL, temp_LH, temp_HH) = pywt.dwt2(temp_data, 'db4')
temp_bag[temp_blob_idx, 3] = np.sqrt(np.mean(SimpleTool.mean(SimpleTool.dot_pow(temp_HL))))
temp_bag[temp_blob_idx, 4] = np.sqrt(np.mean(SimpleTool.mean(SimpleTool.dot_pow(temp_LH))))
temp_bag[temp_blob_idx, 5] = np.sqrt(np.mean(SimpleTool.mean(SimpleTool.dot_pow(temp_HH))))
temp_blob_map[temp_idx1[0]: temp_idx1[-1] + 1, temp_idx2[0]: temp_idx2[-1] + 1] = temp_blob_idx
temp_blob_idx += 1
"""K-means segmentation to segment"""
temp_thresh_D = 1e5
temp_thresh_der = 1e-12
temp_all_D = np.zeros(thresh_k)
temp_all_D[0] = 1e20
temp_k = 0
global temp_labels
for k in range(2, thresh_k):
temp_k = k
kmeans = KMeans(n_clusters=k).fit(temp_bag)
temp_labels = kmeans.labels_
temp_centers = kmeans.cluster_centers_
temp_dis = np.zeros((len(temp_labels), k))
for i in range(len(temp_labels)):
for j in range(len(temp_centers)):
temp_dis[i, j] = SimpleTool.eucliDist(temp_bag[i], temp_centers[j])
temp_all_D[k] = np.sum(SimpleTool.dot_pow(SimpleTool.Mymin(temp_dis, 1)))
if (temp_all_D[k] < temp_thresh_D) or (k >= 3 and (
(temp_all_D[k] - temp_all_D[k - 1]) / (temp_all_D[3] - temp_all_D[1]) / 2 < temp_thresh_der)):
break
if temp_blob_row == 1:
return SimpleTool.normalize(temp_bag)
else:
ret_bag = np.zeros((temp_k, 6))
for k in range(temp_k):
temp_idx = SimpleTool.find(temp_labels, k)
ret_bag[k] = SimpleTool.mean(SimpleTool.index_select_datas(temp_bag, temp_idx))
return SimpleTool.normalize(ret_bag)
if __name__ == '__main__':
bag = kmeansSeg(file_path="pic/tiger.jpg")
print(bag)
请参考大师兄的博客(134条消息) 论文阅读 (三):An empirical study on image bag generators for multi-instance learning (2016)_因吉的博客-CSDN博客