要求:
编写一个程序,输入两幅彩色图像和一个二维掩模图像,产生两幅图像混合的拉普拉斯金字塔。
这里我想解释一下我个人理解的具体流程:
英文题目要求:(Computer Vision: Algorithms and Applications, 2nd Edition,P185):
完整代码在最后
0327更. 上面的思路是不对的,这样没有达到图像融合的效果,仅仅是图像还原。封装代码如下,可交作业。
0331更. 试了一下使用黑白交界处渐变的掩膜, 同样无法融合,反而会丢失信息,导致连接处出现杂色. (更改generate_mask_binary
函数)
import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv
from typing import List
BR = cv.BORDER_REFLECT
LEFTDIR = './static/orange.jpg'
RIGHTDIR = './static/apple.jpg'
MASKDIR = './static/mask.jpg'
WORKDIR = '/assignment 5/'
class ImageMerge(object):
'''
l_g_p is short for left gaussian pyramids
r_g_p is short for right gaussian pyramids
l_l_p is short for left laplace pyramids
r_l_p is short for right laplace pyramids
l_m_g_p is short for left mask gaussian pyramids
l_m_l_p is short for left mask laplace pyramids
r_m_g_p is short for right mask gaussian pyramids
r_m_l_p is short for right mask laplace pyramids
mergeimg is the merged image of both images
m_g_p is short for merged gaussian pyramids
m_l_p is short for merged laplace pyramids
'''
leftimg = np.array([])
rightimg = np.array([])
leftmask = np.array([])
rightmask = np.array([])
l_g_p = []
r_g_p = []
l_l_p = []
r_l_p = []
l_m_g_p = []
l_m_l_p = []
r_m_g_p = []
r_m_l_p = []
mergedimg = np.array([])
m_g_p = []
m_l_p = []
def __init__(self,leftdir,rightdir,maskdir) -> None:
self.leftdir = leftdir
self.rightdir = rightdir
self.maskdir = maskdir
@staticmethod
def get_g_ps(src:np.ndarray,kernal_n=5,levels=5):
'''
function to get the gaussian pyramids of src
'''
gps = [np.array(i) for i in range(levels+1)]
src_temp = src.copy()
gps[0]=src_temp
for i in range(levels):
temp = cv.GaussianBlur(src_temp,ksize=(kernal_n,kernal_n),\
sigmaX=0,sigmaY=0,borderType=BR)
layer = cv.pyrDown(temp,borderType=BR)
# dstshape = (int(src_temp.shape[1]/2),int(src_temp.shape[0]/2))
# layer = cv.resize(temp,dstshape,interpolation=cv.INTER_LINEAR)
gps[i+1]=layer
src_temp = layer
return gps
@staticmethod
def get_l_ps(gps):
'''
function to get the laplace pyramids of gps,a gaussian pyramid
pls note that the count of layers will always be len(gps)-1
'''
l = gps.__len__()
lps = [np.array(i) for i in range(l-1)]
for i in range(l-1):
dstshape = (gps[i].shape[1],gps[i].shape[0])
up = cv.pyrUp(gps[i+1],borderType=cv.BORDER_DEFAULT,\
dstsize=dstshape)
lps[i]=gps[i]-up
return lps
@staticmethod
def read_imgs(dirs:List[str]):
# return (cv.imread(cv.samples.findFile(dir),cv.IMREAD_COLOR).astype('uint8') for dir in dirs)
return (cv.imread(cv.samples.findFile(dir),cv.IMREAD_COLOR)[:,:,::-1] for dir in dirs)
@staticmethod
def reverse_mask(mask):
# mask = np.array(mask,dtype='float')
reversed = 255-mask
return reversed
@staticmethod
def mask_img(src,mask):
mask=mask/255.0
return (src*mask).astype('uint8')
def merge_img_whole(self,kernal_n=5,levels=5):
self.leftimg,self.rightimg = self.read_imgs([self.leftdir,self.rightdir])
self.leftmask = cv.imread(self.maskdir,cv.IMREAD_UNCHANGED)
self.rightmask = self.reverse_mask(self.leftmask)
self.mergedimg = self.mask_img(self.leftimg,self.leftmask) \
+ self.mask_img(self.rightimg,self.rightmask)
# construct the pyramids
self.l_g_p = self.get_g_ps(self.leftimg,kernal_n=kernal_n,levels=levels)
self.r_g_p = self.get_g_ps(self.rightimg,kernal_n=kernal_n,levels=levels)
self.l_l_p = self.get_l_ps(self.l_g_p)
self.r_l_p = self.get_l_ps(self.r_g_p)
self.l_m_g_p = self.get_g_ps(self.leftmask,kernal_n=kernal_n,levels=levels)
self.l_m_l_p = self.get_l_ps(self.l_m_g_p)
self.r_m_g_p = self.get_g_ps(self.rightmask,kernal_n=kernal_n,levels=levels)
self.r_m_l_p = self.get_l_ps(self.r_m_g_p)
self.m_g_p = self.get_g_ps(self.mergedimg,kernal_n=kernal_n,levels=levels)
self.m_l_p = [self.mask_img(self.l_l_p[i],self.l_m_g_p[i])+\
self.mask_img(self.r_l_p[i],self.r_m_g_p[i])\
for i in range(len(self.l_l_p))]
reconstruction = []
for i in range(levels):
index=levels-i
dshape = (self.m_g_p[index-1].shape[1],self.m_g_p[index-1].shape[0])
up = cv.resize(self.m_g_p[index],dsize=dshape,interpolation=cv.INTER_CUBIC)
up = up+ self.m_l_p[index-1].astype('uint8')
reconstruction.append(up)
return reconstruction
import os
def generate_mask_binary(): # generate rgb mask
os.chdir(os.getcwd()+WORKDIR)
img = cv.imread(LEFTDIR,cv.IMREAD_COLOR)
mask = np.zeros(img.shape)
# square
# rows,cols,_ = mask.shape
# for i in range(rows):
# for j in range(cols):
# if(i%1000<500 and j%1000<500):
# mask[i,j]=255
# mid 渐变
mid = int(img.shape[1]/2)
a = int(img.shape[1]*0.03)
step = int(255/a/2+1)
for i in range(2*a):
mask[:,mid-a+i,:]=step*i
mask[:,mid+a:,:]=255
# mid
# mid = int(img.shape[1]/2)
# mask[:,0:mid,:]=255
cv.imwrite('./static/mask.jpg',mask)
def visualization(imglist):
'''
function for visualization of the imgs
'''
fig,axs = plt.subplots(1,imglist.__len__(),squeeze=True)
if(imglist.__len__()==1):
axs.imshow(imglist[0])
else:
for i,ele in enumerate(imglist):
axs[i].imshow(ele)
plt.show()
def test():
generate_mask_binary()
# generate_mask_alpha()
merge = ImageMerge(LEFTDIR,RIGHTDIR,MASKDIR)
outs = merge.merge_img_whole(kernal_n=5,levels=6)
# visualization(merge.l_g_p)
# visualization(merge.l_l_p)
# visualization(merge.r_g_p)
# visualization(merge.r_l_p)
visualization(merge.l_m_g_p)
# visualization(merge.l_m_l_p)
visualization(merge.r_m_g_p)
# visualization(merge.r_m_l_p)
# visualization(merge.m_g_p)
visualization(merge.m_l_p)
# visualization([merge.leftmask])
out=outs[-1]
# out = cv.cvtColor(outs[-1],code=cv.COLOR_BGR2RGB)
plt.imsave('./out.jpg',out)
if __name__ =='__main__':
test()
想要实现的效果(Computer Vision: Algorithms and Applications, 2nd Edition,P166):
因为只是简单的图像融合方法,所以很多细节需要自己调节,包括对齐啊,蒙版啊,他所能实现的只是平滑(verb)连接处而已。
这里我们使用如下的两张素材:
追求效果就是把Yeji的右眼替换为右图的眼睛。
因为选取素材的大小不一致,以及右边图像也需要一定的旋转,我们先预处理一下素材。
这一步就是变调节边观察,自己觉得差不多就行了,适当使用仿射变换即可。其实这里应该可以直接仿射位移,但是也不简单,就这样吧,苹果橘子混合就不需要很多变换了。
然后就是创建所谓的 掩膜 了,图像与掩膜相乘时,只保留白色部分,这里主要做眼睛的掩膜,反色一下就是Yeji的掩膜。也是一边做一边观察,很痛苦。
ok,虽然这样一看的话其实已经差不多了,但是我们假设连接处不是很平滑。。。重要的是重建图像。
然后就到了最重要的构建金字塔部分。
首先构建Yeji的高斯金字塔:
然后是眼睛的高斯金字塔:
然后是Yeji的laplace金字塔:
OK。。。反色输出一下看看。。。
emmmmm…ok…
眼睛的Laplace金字塔:
接着是创建掩膜的高斯金字塔:
构建用于重建图像的laplace金字塔,即过程中描述的fp3。fp1,fp2就不展示了
感觉没有什么区别的样子。。就是完美重建了拼接图像。。。效果只能说差强人意。。
我的问题,但是作业先交了再说。
完整notebook代码如下:
# %%
import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv
# %%
pajamas = cv.imread(cv.samples.findFile('./static/4.jpg'),cv.IMREAD_COLOR)[:,:,::-1]
eye = cv.imread(cv.samples.findFile('./static/eye.jpeg'),cv.IMREAD_COLOR)[:,:,::-1]
pajamas.shape,eye.shape
fig,axs = plt.subplots(1,2)
axs[0].imshow(pajamas)
axs[1].imshow(eye)
# %%
fig,axs = plt.subplots(1,3)
pajamas_1k = pajamas[100:1100,80:1080,:]
# 注意这里一定要设置 dtype=int, 如果是浮点数,imshow会当成 0~1 处理,导致都是白色
eyebg = np.zeros(pajamas_1k.shape,dtype=int)
eye_cropped = eye[0:767:4,0:800:4,:]
eye_fliped = eye_cropped[:,::-1,:]
loc = (eye_fliped.shape[0]/2,eye_fliped.shape[1]/2)
shape = (eye_fliped.shape[0],eye_fliped.shape[1])
M = cv.getRotationMatrix2D(loc,-3,1)
eye_rotated = cv.warpAffine(eye_fliped,M,shape)
eye_final = eye_rotated
eye_final = eye_final-5
xoffset,yoffset,z = eye_final.shape
xloc = 310
yloc = 460
eyebg[xloc:xloc+xoffset,yloc:yloc+yoffset,:]= eye_final
axs[0].imshow(pajamas_1k)
axs[1].imshow(eyebg)
pajamas_temp = pajamas_1k.copy()
pajamas_temp[eyebg!=0]=0
ob = pajamas_temp+eyebg
print(eye_final.shape)
axs[2].imshow(ob)
# %%
fig,axs = plt.subplots(1,3)
# 310,460 510,652
eye_mask = (eyebg!=0)*1
eye_mask[310:380,:,:]=0
eye_mask[460:510,:,:]=0
eye_mask[:,630:652,:]=0
eye_mask[:,460:490,:]=0
# 380,490 470,630
eye_mask[380:387,570:630,:]=0
eye_mask[380:400,490:550,:]=0
eye_mask[380:420,600:630,:]=0
eye_mask[420:470,490:500,:]=0
eye_mask[450:470,500:520,:]=0
eye_mask[450:460,500:540,:]=0
eye_mask[400:470,490:510,:]=0
axs[0].imshow(eye_mask[:,:,0],cmap='gray')
pajamas_temp = pajamas_1k.copy()
pajamas_temp[eyebg*eye_mask!=0]=0
ob = pajamas_temp+eyebg*eye_mask
axs[1].imshow(ob)
eye_mask = eye_mask[:,:,0]
yeji_mask = (eye_mask==0)*1
# plt.imshow(ob)
axs[2].imshow(yeji_mask,cmap='gray')
# %%
kernal = np.array([0.05,0.25,0.4,0.25,0.05])
gaussian_kernal = kernal.reshape((-1,1))*kernal
gaussian_kernal
# %%
fig,axs = plt.subplots(3,3,squeeze=True)
yeji_temp = pajamas_1k.copy()
pyramid_yeji = [np.array(i) for i in range(9)]
pyramid_yeji[0]=yeji_temp
for i in range(8):
temp = cv.GaussianBlur(yeji_temp,ksize=(5,5),sigmaX=0,sigmaY=0,borderType=cv.BORDER_REFLECT)
layer = cv.pyrDown(temp,borderType=cv.BORDER_REFLECT)
# pyramid_yeji.append(layer)
pyramid_yeji[i+1]=layer
yeji_temp =layer
for i in range(3):
for j in range(3):
axs[i][j].imshow(pyramid_yeji[i*3+j])
# %%
fig,axs = plt.subplots(3,3,squeeze=True)
# 注意这里踩了个坑,eyebg数据类型为int32,需要转换为uint8,会报错
# error: (-213:The function/feature is not implemented) Unsupported combination of source format (=20), and buffer format (=21) in function 'cv::opt_AVX2::getLinearRowFilter'
#
eye_temp = eyebg.copy().astype('uint8')
# eye_temp = eye_temp +pajamas_temp
pyramid_eye = [np.array(i) for i in range(9)]
pyramid_eye[0]=eye_temp
for i in range(8):
temp = cv.GaussianBlur(eye_temp,ksize=(5,5),sigmaX=0,sigmaY=0,borderType=cv.BORDER_REFLECT)
layer = cv.pyrDown(temp,borderType=cv.BORDER_REFLECT)
# pyramid_eye.append(layer)
pyramid_eye[i+1]=layer
eye_temp =layer
for i in range(3):
for j in range(3):
axs[i][j].imshow(pyramid_eye[i*3+j])
# %%
lapalce_yeji = [np.ndarray(i) for i in range(8)]
for i in range(8):
dstshape=(pyramid_yeji[i].shape[0],pyramid_yeji[i].shape[1])
up = cv.pyrUp(pyramid_yeji[i+1],borderType=cv.BORDER_DEFAULT,dstsize=dstshape)
lapalce_yeji[i] = pyramid_yeji[i]-up
rows,cols = 2,4
fig,axs = plt.subplots(rows,cols)
for i in range(rows):
for j in range(cols):
axs[i][j].imshow(lapalce_yeji[i*rows+j])
# %%
rows,cols = 2,4
fig,axs = plt.subplots(rows,cols)
for i in range(rows):
for j in range(cols):
axs[i][j].imshow(255-lapalce_yeji[i*rows+j])
# %%
lapalce_eye = [np.ndarray(i) for i in range(8)]
for i in range(8):
dstshape=(pyramid_eye[i].shape[0],pyramid_eye[i].shape[1])
up = cv.pyrUp(pyramid_eye[i+1],borderType=cv.BORDER_DEFAULT,dstsize=dstshape)
lapalce_eye[i] = pyramid_eye[i]-up
rows,cols = 2,4
fig,axs = plt.subplots(rows,cols)
for i in range(rows):
for j in range(cols):
axs[i][j].imshow(lapalce_eye[i*rows+j])
# %%
print(yeji_mask.shape,eye_mask.shape,yeji_mask.dtype,eye_mask.dtype)
# %%
fig,axs = plt.subplots(3,3,squeeze=True)
mask_temp = yeji_mask.copy().astype('uint8')
pyramid_mask_yeji = [np.array(i) for i in range(9)]
pyramid_mask_yeji[0]=mask_temp
for i in range(8):
temp = cv.GaussianBlur(mask_temp,ksize=(5,5),sigmaX=0,sigmaY=0,borderType=cv.BORDER_REFLECT)
layer = cv.pyrDown(temp,borderType=cv.BORDER_REFLECT)
pyramid_mask_yeji[i+1]=layer
layer[layer==254]=0
mask_temp =layer
for i in range(3):
for j in range(3):
axs[i][j].imshow(pyramid_mask_yeji[i*3+j],cmap='gray')
# %%
fig,axs = plt.subplots(3,3,squeeze=True)
mask_temp = eye_mask.copy().astype('uint8')
pyramid_mask_eye = [np.array(i) for i in range(9)]
pyramid_mask_eye[0] = mask_temp
for i in range(8):
temp = cv.GaussianBlur(mask_temp,ksize=(5,5),sigmaX=0,sigmaY=0,borderType=cv.BORDER_REFLECT)
layer = cv.pyrDown(temp,borderType=cv.BORDER_REFLECT)
pyramid_mask_eye[i+1]=layer
mask_temp =layer
for i in range(3):
for j in range(3):
axs[i][j].imshow(pyramid_mask_eye[i*3+j],cmap='gray')
# %%
def mask(src:np.ndarray,mask):
temp = np.zeros(src.shape,dtype='uint8')
for i in range(3):
temp[:,:,i]= src[:,:,i]*mask
return temp
# return np.array([src[:,:,i]*mask for i in range(3)])
fp1 = [mask(lapalce_yeji[i],pyramid_mask_yeji[i]) for i in range(len(lapalce_yeji))]
fp2 = [mask(lapalce_eye[i],pyramid_mask_eye[i]) for i in range(len(lapalce_eye))]
fp3 = [fp1[i]+fp2[i] for i in range(len(fp1))]
fig,axs = plt.subplots(2,4)
for i in range(2):
for j in range(4):
axs[i][j].imshow(fp3[i*4+j])
# %%
fig,axs = plt.subplots(3,3,squeeze=True)
final_temp = ob.copy().astype('uint8')
pyramid_final = [np.array(i) for i in range(9)]
pyramid_final[0]=final_temp
for i in range(8):
temp = cv.GaussianBlur(final_temp,ksize=(5,5),sigmaX=0,sigmaY=0,borderType=cv.BORDER_REFLECT)
layer = cv.pyrDown(temp,borderType=cv.BORDER_REFLECT)
# pyramid_final.append(layer)
pyramid_final[i+1]=layer
final_temp =layer
for i in range(3):
for j in range(3):
axs[i][j].imshow(pyramid_final[i*3+j])
# %%
p1_1 = [mask(pyramid_yeji[i],pyramid_mask_yeji[i]) for i in range(len(pyramid_yeji))]
p1_2 = [mask(pyramid_eye[i],pyramid_mask_eye[i]) for i in range(len(pyramid_eye))]
p1 = [p1_1[i] + p1_2[i] for i in range(len(pyramid_yeji))]
fig,axs = plt.subplots(3,3)
for i in range(3):
for j in range(3):
axs[i][j].imshow(p1[i*3+j])
# %%
startpoint = 7
endpoint = 1
construction = []
for i in range(startpoint-endpoint):
index = startpoint-i-1
temp = p1[index] + fp3[index]
# out = cv.pyrUp(temp)
dshape = (p1[index-1].shape[0],p1[index-1].shape[1])
out = cv.resize(temp,dsize=dshape,interpolation=cv.INTER_AREA)
construction.append(out)
plt.imshow(construction[-1])
# %%