用拉普拉斯金字塔进行图像混合


要求
​编写一个程序,输入两幅彩色图像和一个二维掩模图像,产生两幅图像混合的拉普拉斯金字塔。

  1. ​创建每个图像各自的拉普拉斯金字塔。‌
  2. ​创建两幅掩模图像的高斯金字塔(输入图像和它的补集)。
  3. 将每幅图像乘以对应的掩模,对图像求和。
  4. 从混合的拉普拉斯金字塔中重建最终图像

这里我想解释一下我个人理解的具体流程:

  1. 分别创建两个图像各自的高斯金字塔gp1,gp2,通过高斯金字塔建立laplace金字塔lp1,lp2。
  2. 对两个掩膜分别建立自己的高斯金字塔mp1,mp2,白色保留,黑色被过滤。
  3. 两个图像laplace金字塔分别乘以自己对应的掩膜金字塔(lp1mp2,lp2mp2),得到两个过滤过的laplace金字塔flp1,flp2,然后相加,得到融合图像的laplace金字塔fp3。
  4. 用简单拼接而成的图像建立高斯金字塔p1,选择合适的层次,分辨率由低到高,加上fp3对应的laplace层,插值,加上上一层laplace层,插值,重复过程直至原图分辨率,图像重建完成。

英文题目要求:(Computer Vision: Algorithms and Applications, 2nd Edition,P185):
用拉普拉斯金字塔进行图像混合_第1张图片

完整代码在最后
0327更. 上面的思路是不对的,这样没有达到图像融合的效果,仅仅是图像还原。封装代码如下,可交作业。

0331更. 试了一下使用黑白交界处渐变的掩膜, 同样无法融合,反而会丢失信息,导致连接处出现杂色. (更改generate_mask_binary函数)

import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv
from typing import List

BR = cv.BORDER_REFLECT
LEFTDIR = './static/orange.jpg'
RIGHTDIR = './static/apple.jpg'
MASKDIR = './static/mask.jpg'

WORKDIR = '/assignment 5/'

class ImageMerge(object):
    '''
    l_g_p is short for left gaussian pyramids
    r_g_p is short for right gaussian pyramids
    l_l_p is short for left laplace pyramids
    r_l_p is short for right laplace pyramids     
    l_m_g_p is short for left mask gaussian pyramids
    l_m_l_p is short for left mask laplace pyramids
    r_m_g_p is short for right mask gaussian pyramids
    r_m_l_p is short for right mask laplace pyramids
    mergeimg is the merged image of both images
    m_g_p is short for merged gaussian pyramids
    m_l_p is short for merged laplace pyramids
    '''
    leftimg = np.array([])
    rightimg = np.array([])
    leftmask = np.array([])
    rightmask = np.array([])
    l_g_p = []
    r_g_p = []
    l_l_p = []
    r_l_p = []
    l_m_g_p = []
    l_m_l_p = []
    r_m_g_p = []
    r_m_l_p = []
    mergedimg = np.array([])
    m_g_p = []
    m_l_p = []
    def __init__(self,leftdir,rightdir,maskdir) -> None:
        self.leftdir = leftdir
        self.rightdir = rightdir
        self.maskdir = maskdir  
    
    @staticmethod
    def get_g_ps(src:np.ndarray,kernal_n=5,levels=5):
        '''
        function to get the gaussian pyramids of src
        '''
        gps = [np.array(i) for i in range(levels+1)]
        src_temp = src.copy()
        gps[0]=src_temp
        for i in range(levels):
            temp = cv.GaussianBlur(src_temp,ksize=(kernal_n,kernal_n),\
                                   sigmaX=0,sigmaY=0,borderType=BR)
            layer = cv.pyrDown(temp,borderType=BR)
            # dstshape = (int(src_temp.shape[1]/2),int(src_temp.shape[0]/2))
            # layer = cv.resize(temp,dstshape,interpolation=cv.INTER_LINEAR)
            gps[i+1]=layer
            src_temp = layer
        return gps
    
    @staticmethod
    def get_l_ps(gps):
        '''
        function to get the laplace pyramids of gps,a gaussian pyramid
        pls note that the count of layers will always be len(gps)-1
        '''
        l = gps.__len__()
        lps = [np.array(i) for i in range(l-1)]
        for i in range(l-1):
            dstshape = (gps[i].shape[1],gps[i].shape[0])
            up = cv.pyrUp(gps[i+1],borderType=cv.BORDER_DEFAULT,\
                          dstsize=dstshape)
            lps[i]=gps[i]-up
        return lps
    
    @staticmethod
    def read_imgs(dirs:List[str]):
        # return (cv.imread(cv.samples.findFile(dir),cv.IMREAD_COLOR).astype('uint8') for dir in dirs)
        return (cv.imread(cv.samples.findFile(dir),cv.IMREAD_COLOR)[:,:,::-1] for dir in dirs)
    
    @staticmethod
    def reverse_mask(mask):
        # mask = np.array(mask,dtype='float')
        reversed = 255-mask
        return reversed
    
    @staticmethod
    def mask_img(src,mask):
        mask=mask/255.0
        return (src*mask).astype('uint8')

    def merge_img_whole(self,kernal_n=5,levels=5):
        self.leftimg,self.rightimg = self.read_imgs([self.leftdir,self.rightdir])
        self.leftmask = cv.imread(self.maskdir,cv.IMREAD_UNCHANGED)
        self.rightmask = self.reverse_mask(self.leftmask)
        self.mergedimg = self.mask_img(self.leftimg,self.leftmask) \
                        + self.mask_img(self.rightimg,self.rightmask)
        # construct the pyramids
        self.l_g_p = self.get_g_ps(self.leftimg,kernal_n=kernal_n,levels=levels)
        self.r_g_p = self.get_g_ps(self.rightimg,kernal_n=kernal_n,levels=levels)
        self.l_l_p = self.get_l_ps(self.l_g_p)
        self.r_l_p = self.get_l_ps(self.r_g_p)
        self.l_m_g_p = self.get_g_ps(self.leftmask,kernal_n=kernal_n,levels=levels)
        self.l_m_l_p = self.get_l_ps(self.l_m_g_p)
        self.r_m_g_p = self.get_g_ps(self.rightmask,kernal_n=kernal_n,levels=levels)
        self.r_m_l_p = self.get_l_ps(self.r_m_g_p)
        self.m_g_p = self.get_g_ps(self.mergedimg,kernal_n=kernal_n,levels=levels)
        
        self.m_l_p = [self.mask_img(self.l_l_p[i],self.l_m_g_p[i])+\
                      self.mask_img(self.r_l_p[i],self.r_m_g_p[i])\
                        for i in range(len(self.l_l_p))]
        reconstruction = []

        for i in range(levels):
            index=levels-i
            dshape = (self.m_g_p[index-1].shape[1],self.m_g_p[index-1].shape[0])
            up = cv.resize(self.m_g_p[index],dsize=dshape,interpolation=cv.INTER_CUBIC)
            up  = up+ self.m_l_p[index-1].astype('uint8')
            reconstruction.append(up)

        return reconstruction
import os
def generate_mask_binary():  # generate rgb mask
    os.chdir(os.getcwd()+WORKDIR)
    img = cv.imread(LEFTDIR,cv.IMREAD_COLOR)
    mask = np.zeros(img.shape)

    #  square
    # rows,cols,_ = mask.shape
    # for i in range(rows):
    #     for j in range(cols):
    #         if(i%1000<500 and j%1000<500):
    #             mask[i,j]=255

    # mid 渐变
    mid = int(img.shape[1]/2)
    a = int(img.shape[1]*0.03)
    step = int(255/a/2+1)
    for i in range(2*a):
        mask[:,mid-a+i,:]=step*i
    mask[:,mid+a:,:]=255

    # mid
    # mid = int(img.shape[1]/2)
    # mask[:,0:mid,:]=255

    cv.imwrite('./static/mask.jpg',mask)

def visualization(imglist):
	'''
	function for visualization of the imgs
	'''
    fig,axs = plt.subplots(1,imglist.__len__(),squeeze=True)
    if(imglist.__len__()==1):
        axs.imshow(imglist[0])
    else:
        for i,ele in enumerate(imglist):
            axs[i].imshow(ele)
    plt.show()

def test():
    generate_mask_binary()
    # generate_mask_alpha()
    merge = ImageMerge(LEFTDIR,RIGHTDIR,MASKDIR)
    outs = merge.merge_img_whole(kernal_n=5,levels=6)

    # visualization(merge.l_g_p)
    # visualization(merge.l_l_p)
    # visualization(merge.r_g_p)
    # visualization(merge.r_l_p)
    visualization(merge.l_m_g_p)
    # visualization(merge.l_m_l_p)
    visualization(merge.r_m_g_p)
    # visualization(merge.r_m_l_p)
    # visualization(merge.m_g_p)
    visualization(merge.m_l_p)
    # visualization([merge.leftmask])
    out=outs[-1]
    # out = cv.cvtColor(outs[-1],code=cv.COLOR_BGR2RGB)
    plt.imsave('./out.jpg',out)


if __name__ =='__main__':
    test()

想要实现的效果(Computer Vision: Algorithms and Applications, 2nd Edition,P166):
用拉普拉斯金字塔进行图像混合_第2张图片
因为只是简单的图像融合方法,所以很多细节需要自己调节,包括对齐啊,蒙版啊,他所能实现的只是平滑(verb)连接处而已。
这里我们使用如下的两张素材:
用拉普拉斯金字塔进行图像混合_第3张图片

追求效果就是把Yeji的右眼替换为右图的眼睛。

因为选取素材的大小不一致,以及右边图像也需要一定的旋转,我们先预处理一下素材。
用拉普拉斯金字塔进行图像混合_第4张图片
这一步就是变调节边观察,自己觉得差不多就行了,适当使用仿射变换即可。其实这里应该可以直接仿射位移,但是也不简单,就这样吧,苹果橘子混合就不需要很多变换了。

然后就是创建所谓的 掩膜 了,图像与掩膜相乘时,只保留白色部分,这里主要做眼睛的掩膜,反色一下就是Yeji的掩膜。也是一边做一边观察,很痛苦。
用拉普拉斯金字塔进行图像混合_第5张图片
ok,虽然这样一看的话其实已经差不多了,但是我们假设连接处不是很平滑。。。重要的是重建图像。

然后就到了最重要的构建金字塔部分。
首先构建Yeji的高斯金字塔:
用拉普拉斯金字塔进行图像混合_第6张图片
然后是眼睛的高斯金字塔:
用拉普拉斯金字塔进行图像混合_第7张图片

然后是Yeji的laplace金字塔:

OK。。。反色输出一下看看。。。

emmmmm…ok…
眼睛的Laplace金字塔:
用拉普拉斯金字塔进行图像混合_第8张图片
接着是创建掩膜的高斯金字塔:
用拉普拉斯金字塔进行图像混合_第9张图片

用拉普拉斯金字塔进行图像混合_第10张图片

构建用于重建图像的laplace金字塔,即过程中描述的fp3。fp1,fp2就不展示了
用拉普拉斯金字塔进行图像混合_第11张图片

生成混合图像的高斯金字塔:
用拉普拉斯金字塔进行图像混合_第12张图片

这里从第8幅图开始向上重建。
用拉普拉斯金字塔进行图像混合_第13张图片

感觉没有什么区别的样子。。就是完美重建了拼接图像。。。效果只能说差强人意。。

我的问题,但是作业先交了再说。

完整notebook代码如下:

# %%
import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv

# %%
pajamas = cv.imread(cv.samples.findFile('./static/4.jpg'),cv.IMREAD_COLOR)[:,:,::-1]
eye = cv.imread(cv.samples.findFile('./static/eye.jpeg'),cv.IMREAD_COLOR)[:,:,::-1]
pajamas.shape,eye.shape
fig,axs = plt.subplots(1,2)
axs[0].imshow(pajamas)
axs[1].imshow(eye)

# %%
fig,axs =  plt.subplots(1,3)
pajamas_1k = pajamas[100:1100,80:1080,:]
# 注意这里一定要设置 dtype=int, 如果是浮点数,imshow会当成 0~1 处理,导致都是白色
eyebg = np.zeros(pajamas_1k.shape,dtype=int)
eye_cropped = eye[0:767:4,0:800:4,:]
eye_fliped = eye_cropped[:,::-1,:]
loc = (eye_fliped.shape[0]/2,eye_fliped.shape[1]/2)
shape = (eye_fliped.shape[0],eye_fliped.shape[1])
M = cv.getRotationMatrix2D(loc,-3,1)
eye_rotated = cv.warpAffine(eye_fliped,M,shape)
eye_final = eye_rotated
eye_final = eye_final-5
xoffset,yoffset,z = eye_final.shape
xloc = 310
yloc = 460

eyebg[xloc:xloc+xoffset,yloc:yloc+yoffset,:]= eye_final

axs[0].imshow(pajamas_1k)
axs[1].imshow(eyebg)

pajamas_temp = pajamas_1k.copy()
pajamas_temp[eyebg!=0]=0
ob = pajamas_temp+eyebg
print(eye_final.shape)
axs[2].imshow(ob)

# %%
fig,axs = plt.subplots(1,3)

# 310,460    510,652 
eye_mask = (eyebg!=0)*1
eye_mask[310:380,:,:]=0
eye_mask[460:510,:,:]=0
eye_mask[:,630:652,:]=0
eye_mask[:,460:490,:]=0
# 380,490    470,630
eye_mask[380:387,570:630,:]=0
eye_mask[380:400,490:550,:]=0
eye_mask[380:420,600:630,:]=0
eye_mask[420:470,490:500,:]=0
eye_mask[450:470,500:520,:]=0
eye_mask[450:460,500:540,:]=0
eye_mask[400:470,490:510,:]=0


axs[0].imshow(eye_mask[:,:,0],cmap='gray')

pajamas_temp = pajamas_1k.copy()
pajamas_temp[eyebg*eye_mask!=0]=0
ob = pajamas_temp+eyebg*eye_mask
axs[1].imshow(ob)

eye_mask = eye_mask[:,:,0]
yeji_mask = (eye_mask==0)*1

# plt.imshow(ob)

axs[2].imshow(yeji_mask,cmap='gray')

# %%
kernal = np.array([0.05,0.25,0.4,0.25,0.05])
gaussian_kernal = kernal.reshape((-1,1))*kernal
gaussian_kernal

# %%
fig,axs = plt.subplots(3,3,squeeze=True)

yeji_temp = pajamas_1k.copy()
pyramid_yeji = [np.array(i) for i in range(9)]
pyramid_yeji[0]=yeji_temp
for i in range(8):
    temp = cv.GaussianBlur(yeji_temp,ksize=(5,5),sigmaX=0,sigmaY=0,borderType=cv.BORDER_REFLECT)
    layer = cv.pyrDown(temp,borderType=cv.BORDER_REFLECT)
    # pyramid_yeji.append(layer)
    pyramid_yeji[i+1]=layer
    yeji_temp =layer

for i in range(3):
    for  j in range(3):
        axs[i][j].imshow(pyramid_yeji[i*3+j])



# %%
fig,axs = plt.subplots(3,3,squeeze=True)
# 注意这里踩了个坑,eyebg数据类型为int32,需要转换为uint8,会报错
# error: (-213:The function/feature is not implemented) Unsupported combination of source format (=20), and buffer format (=21) in function 'cv::opt_AVX2::getLinearRowFilter'
# 
eye_temp = eyebg.copy().astype('uint8')
# eye_temp = eye_temp +pajamas_temp
pyramid_eye = [np.array(i) for i in range(9)]
pyramid_eye[0]=eye_temp
for i in range(8):
    temp = cv.GaussianBlur(eye_temp,ksize=(5,5),sigmaX=0,sigmaY=0,borderType=cv.BORDER_REFLECT)
    layer = cv.pyrDown(temp,borderType=cv.BORDER_REFLECT)
    # pyramid_eye.append(layer)
    pyramid_eye[i+1]=layer
    eye_temp =layer

for i in range(3):
    for  j in range(3):
        axs[i][j].imshow(pyramid_eye[i*3+j])

# %%
lapalce_yeji = [np.ndarray(i) for i in range(8)]
for i in range(8):
    dstshape=(pyramid_yeji[i].shape[0],pyramid_yeji[i].shape[1])
    up = cv.pyrUp(pyramid_yeji[i+1],borderType=cv.BORDER_DEFAULT,dstsize=dstshape)
    lapalce_yeji[i] = pyramid_yeji[i]-up
rows,cols = 2,4
fig,axs = plt.subplots(rows,cols)
for i in range(rows):
    for j in range(cols):
        axs[i][j].imshow(lapalce_yeji[i*rows+j])

# %%
rows,cols = 2,4
fig,axs = plt.subplots(rows,cols)
for i in range(rows):
    for j in range(cols):
        axs[i][j].imshow(255-lapalce_yeji[i*rows+j])

# %%
lapalce_eye = [np.ndarray(i) for i in range(8)]
for i in range(8):
    dstshape=(pyramid_eye[i].shape[0],pyramid_eye[i].shape[1])
    up = cv.pyrUp(pyramid_eye[i+1],borderType=cv.BORDER_DEFAULT,dstsize=dstshape)
    lapalce_eye[i] = pyramid_eye[i]-up
rows,cols = 2,4
fig,axs = plt.subplots(rows,cols)
for i in range(rows):
    for j in range(cols):
        axs[i][j].imshow(lapalce_eye[i*rows+j])

# %%
print(yeji_mask.shape,eye_mask.shape,yeji_mask.dtype,eye_mask.dtype)

# %%
fig,axs = plt.subplots(3,3,squeeze=True)
mask_temp = yeji_mask.copy().astype('uint8')
pyramid_mask_yeji = [np.array(i) for i in range(9)]
pyramid_mask_yeji[0]=mask_temp
for i in range(8):
    temp = cv.GaussianBlur(mask_temp,ksize=(5,5),sigmaX=0,sigmaY=0,borderType=cv.BORDER_REFLECT)
    layer = cv.pyrDown(temp,borderType=cv.BORDER_REFLECT)
    pyramid_mask_yeji[i+1]=layer
    layer[layer==254]=0
    mask_temp =layer

for i in range(3):
    for  j in range(3):
        axs[i][j].imshow(pyramid_mask_yeji[i*3+j],cmap='gray')

# %%
fig,axs = plt.subplots(3,3,squeeze=True)
mask_temp = eye_mask.copy().astype('uint8')
pyramid_mask_eye = [np.array(i) for i in range(9)]
pyramid_mask_eye[0] = mask_temp
for i in range(8):
    temp = cv.GaussianBlur(mask_temp,ksize=(5,5),sigmaX=0,sigmaY=0,borderType=cv.BORDER_REFLECT)
    layer = cv.pyrDown(temp,borderType=cv.BORDER_REFLECT)
    pyramid_mask_eye[i+1]=layer
    mask_temp =layer

for i in range(3):
    for  j in range(3):
        axs[i][j].imshow(pyramid_mask_eye[i*3+j],cmap='gray')

# %%
def mask(src:np.ndarray,mask):
    temp = np.zeros(src.shape,dtype='uint8')
    for i in range(3):
        temp[:,:,i]= src[:,:,i]*mask
    return temp
    # return np.array([src[:,:,i]*mask for i in range(3)])

fp1 = [mask(lapalce_yeji[i],pyramid_mask_yeji[i]) for i in range(len(lapalce_yeji))]
fp2 = [mask(lapalce_eye[i],pyramid_mask_eye[i]) for i in range(len(lapalce_eye))]

fp3 = [fp1[i]+fp2[i] for i in range(len(fp1))]

fig,axs = plt.subplots(2,4)
for i in range(2):
    for j in range(4):
        axs[i][j].imshow(fp3[i*4+j])

# %%
fig,axs = plt.subplots(3,3,squeeze=True)

final_temp = ob.copy().astype('uint8')
pyramid_final = [np.array(i) for i in range(9)]
pyramid_final[0]=final_temp
for i in range(8):
    temp = cv.GaussianBlur(final_temp,ksize=(5,5),sigmaX=0,sigmaY=0,borderType=cv.BORDER_REFLECT)
    layer = cv.pyrDown(temp,borderType=cv.BORDER_REFLECT)
    # pyramid_final.append(layer)
    pyramid_final[i+1]=layer
    final_temp =layer

for i in range(3):
    for  j in range(3):
        axs[i][j].imshow(pyramid_final[i*3+j])

# %%
p1_1 = [mask(pyramid_yeji[i],pyramid_mask_yeji[i]) for i in range(len(pyramid_yeji))]
p1_2 = [mask(pyramid_eye[i],pyramid_mask_eye[i]) for i in range(len(pyramid_eye))]

p1 = [p1_1[i] + p1_2[i] for i in range(len(pyramid_yeji))]

fig,axs = plt.subplots(3,3)
for i in range(3):
    for j in range(3):
        axs[i][j].imshow(p1[i*3+j])


# %%
startpoint = 7
endpoint = 1
construction = []

for i in range(startpoint-endpoint):
    index = startpoint-i-1
    temp = p1[index] + fp3[index]
    # out = cv.pyrUp(temp)
    dshape = (p1[index-1].shape[0],p1[index-1].shape[1])
    out = cv.resize(temp,dsize=dshape,interpolation=cv.INTER_AREA)
    construction.append(out)

plt.imshow(construction[-1])

# %%




你可能感兴趣的:(CV,计算机视觉,图像处理)