飞桨(PaddlePaddle)图像分割全流程实现

最近, 百度AI Studio 推出了一项关于图像分割的课程,一个时常七天的短平快图像分割专项课程,详细介绍了几种经典的图像分割网络,FCN8s,U-Net,PSPNet和DeepLab系列,包括飞桨平台的数据处理,模型搭建,模型训练与预测等相关技能,本篇文章,会对一个礼拜以来的课程做一个整体性回顾。

利用飞桨构建一个数据加载器(dataloader)

飞桨提供了丰富的API用以模型训练前的数据加载,通过构造一个dataloader,生成相应的训练数据和标签作为模型的输入,如下代码所示,dataloader类可以根据自己的要求实现自定义的一些数据处理流程。


class BasicDataLoader(object):
    def __init__(self,
                 image_folder,
                 image_list_file,
                 transform=Transform(256),
                 shuffle=True
                 ):
        self.image_folder = image_folder
        self.image_list_file = image_list_file
        self.transform = Transform(256)
        self.shuffle = shuffle
        
        self.data_list = self.read_list()
    
    

    def read_list(self):
        data_list = []
        with open(self.image_list_file) as infile:
            for line in infile:
                #print(line)
                image_path = os.path.join(self.image_folder,line.split()[0])
                #print(image_path)
                label_path = os.path.join(self.image_folder,line.split()[1])
                #print(label_path)
                data_list.append((image_path,label_path))
        #data_list = random.shuffle(data_list)
       # print(data_list)
        return data_list


    def preprocess(self, data, label):
        h,w,c = data.shape
        h_gt,w_gt = label.shape
        assert h==h_gt,"Error"
        assert w==w_gt,"Error"
        if self.transform:
            data,label = self.transform(data,label)
        label = label[:,:,np.newaxis]
        data = data[:,:,np.newaxis]
        return data,label

    def __len__(self):
        return len(self.data_list)

    def __call__(self):
        for image,label in self.read_list():
            image = cv2.imread(image,cv2.IMREAD_COLOR)
            image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
            label = cv2.imread(label,cv2.IMREAD_GRAYSCALE)
            image,label = self.preprocess(image,label)
            yield image,label

通过以下代码,利用paddle生成dataloader,并且可以实现自定义batch_size的大小。

def main():
    batch_size = 5
    place = fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        # TODO: craete BasicDataloder instance
        basic_dataloader =BasicDataLoader(
            image_folder = "./dummy_data",
            image_list_file = "./dummy_data/list.txt",
            transform = Transform(256),
            shuffle=True

        )
       
        
        # image_folder="./dummy_data"
        
        # image_list_file="./dummy_data/list.txt"
     
        # TODO: craete fluid.io.DataLoader instance
        dataloader = fluid.io.DataLoader.from_generator(capacity=1,use_multiprocess=False)
        # TODO: set sample generator for fluid dataloader 
        dataloader.set_sample_generator(basic_dataloader,
        batch_size=batch_size,
        places=place)

        num_epoch = 2
        for epoch in range(1, num_epoch+1):
            print(f'Epoch [{epoch}/{num_epoch}]:')
            for idx, (data, label) in enumerate(dataloader):
                print(f'Iter {idx}, Data shape: {data.shape}, Label shape: {label.shape}')

if __name__ == "__main__":
    main()

数据增强实现

实际的图像分割中,需要用到一些数据增强的功能,也就是Transform实现,基于飞桨的Transform实现代码如下所示:

import cv2
import numpy as np
import paddle.fluid as fluid
import os
class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms
    def __call__(self, image):
        for t in self.transforms:
            image = t(image)
        return image


class Normalize(object):
    def __init__(self, mean_val, std_val, val_scale=1):
        # set val_scale = 1 if mean and std are in range (0,1)
        # set val_scale to other value, if mean and std are in range (0,255)
        self.mean = np.array(mean_val, dtype=np.float32)
        self.std = np.array(std_val, dtype=np.float32)
        self.val_scale = 1/255.0 if val_scale==1 else 1
    def __call__(self, image, label=None):
        image = image.astype(np.float32)
        image = image * self.val_scale
        image = image - self.mean
        image = image * (1 / self.std)
        return image, label


class ConvertDataType(object):
    def __call__(self, image, label=None):
        if label is not None:
            label = label.astype(np.int64)
        return image.astype(np.float32), label


class Pad(object):
    def __init__(self, size, ignore_label=255, mean_val=0, val_scale=1):
        # set val_scale to 1 if mean_val is in range (0, 1)
        # set val_scale to 255 if mean_val is in range (0, 255) 
        factor = 255 if val_scale == 1 else 1

        self.size = size
        self.ignore_label = ignore_label
        self.mean_val=mean_val
        # from 0-1 to 0-255
        if isinstance(self.mean_val, (tuple,list)):
            self.mean_val = [int(x* factor) for x in self.mean_val]
        else:
            self.mean_val = int(self.mean_val * factor)


    def __call__(self, image, label=None):
        h, w, c = image.shape
        pad_h = max(self.size - h, 0)
        pad_w = max(self.size - w, 0)

        pad_h_half = int(pad_h / 2)
        pad_w_half = int(pad_w / 2)

        if pad_h > 0 or pad_w > 0:

            image = cv2.copyMakeBorder(image,
                                       top=pad_h_half,
                                       left=pad_w_half,
                                       bottom=pad_h - pad_h_half,
                                       right=pad_w - pad_w_half,
                                       borderType=cv2.BORDER_CONSTANT,
                                       value=self.mean_val)
            
        return image


# TODO
class CenterCrop(object):
    
        
    def __call__(self,data):
        #self.data = np.array(self,data)
        w,h,_ = data.shape
        
        target_w = int(w/2)
        target_h = int(h/2)
        start_x = int((w-target_w)//2)
        start_y = int((h-target_h)//2)

        crop_data = self.data[start_y:start_y+target_h,start_x:start_x+target_w,:]
        crop_data = cv2.resize(crop_data ,(w,h))
        return crop_data







# TODO
class Resize(object):
    def __init__(self,h,w):
        
        self.w = w
        self.h = h
    def __call__(self,data):
        #self.data = np.array(self.data)
        
        resized_data = cv2.resize(data,(self.w,self.h))
        #print(type(resized_data))
        return resized_data
        




# TODO
class RandomFlip(object):
    def __init__(self,rate=0.5,h2v=1):
        #self.data = data
        self.rate = rate
        self.h2v = h2v
    def __call__(self,data):
        
        if np.random.random()>self.rate:
            # 沿垂直方向翻转
            if self.h2v==0:
                data = cv2.flip(data,1)
            # 沿水平方向翻转
            else:
                data = cv2.flip(data,0)
        return data

        


# TODO
class RandomCrop(object):
    def __init__(self,rate=0.8):
        #self.data = data
        self.rate = rate
    def __call__(self,data):
        #print(data)
        print(type(data))
        w,h,_ = data.shape
        target_w = int(w*self.rate)
        target_h = int(h*self.rate)

        start_x = int((w-target_w)//2)
        start_y = int((h-target_h)//2)

        zeros = data[start_y:start_y+target_w,start_x:start_x+target_w,:]
        zeros = cv2.resize(zeros,(w,h,))
        print(zeros)
        return zeros


# TODO
class Scale(object):
    def __init__(self,target_w,target_h):
        #self.data = data
        self.target_w = target_w
        self.target_h = target_h
    def __call__(self,data):
        w,h,_ =  data.shape
        start_x = w-self.target_w
        start_y = h-self.target_h
        zeros = data[start_y:start_y+self.target_h,start_x:start_x+self.target_w,:]
        
        zeros = cv2.resize(zeros,(w,h))
        return zeros



# TODO
class RandomScale(object):
    def __init__(self,ratio=0.8,rand=True):
        #self.data = data
        self.ratio= ratio
        self.rand = rand
    def __call__(self,data):
        w,h,_ = data.shape
        target_w = int(w*self.ratio)
        target_h = int(h*self.ratio)

        if self.rand:
            
            start_x = np.random.randint(0, w - target_w)
            start_y = np.random.randint(0, h - target_h)
        else:
            start_x = ( w - target_w ) // 2
            start_y = ( h - target_h ) // 2

        zeros = data[start_y:start_y+target_h,start_x:start_x+target_w,:]
        zeros = cv2.resize(zeros ,(w,h))
        return zeros

自定义实现Transform后,利用paddle实现自定义Transform组合的代码如下所示:

# Transform 测试

def main():
    image = cv2.imread('./dummy_data/JPEGImages/2008_000064.jpg')
    label = cv2.imread('./dummy_data/GroundTruth_trainval_png/2008_000064.png')
    print(image.shape)
    # TODO: crop_size
    #crop_size = (256,256)
    # TODO: Transform: RandomSacle, RandomFlip, Pad, RandomCrop
   
   augment = Compose([
            Resize(256,256),
            RandomScale(),
            RandomFlip(),
            Pad(256),
            RandomCrop()])
        new_image = augment(image)
        cv2.imwrite('aug_image{}.png'.format(i),new_image)

if __name__ == "__main__":
    main()

模型构建

飞桨提供了基于动态图模式的模型构建方式,即通过类的初始化方式定义模型如卷积,池化的一些基本操作,而通过forward方式,实现模型的前向传播。一个典型的U-Net网络的模型搭建代码如下图所示:

import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import to_variable
from paddle.fluid.dygraph import Layer
from paddle.fluid.dygraph import Conv2D
from paddle.fluid.dygraph import BatchNorm
from paddle.fluid.dygraph import Pool2D
from paddle.fluid.dygraph import Conv2DTranspose


class Encoder(Layer):
    def __init__(self, num_channels, num_filters):
        super(Encoder, self).__init__()
        #TODO: encoder contains:
        #       1 3x3conv + 1bn + relu + 
        #       1 3x3conc + 1bn + relu +
        #       1 2x2 pool
        # return features before and after pool
        self.conv1 = Conv2D(num_channels,
                            num_filters,
                            filter_size=3,
                            stride=1,
                            padding=1)
        self.bn1 = BatchNorm(num_filters,act='relu')

        self.conv2 = Conv2D(num_filters,
                            num_filters,
                            filter_size=3,
                            stride=1,
                            padding=1)
        self.bn2 = BatchNorm(num_filters,act='relu')

        self.pool = Pool2D(pool_size=2,pool_stride=2,pool_type='max',ceil_mode = True)


    def forward(self, inputs):
        # TODO: finish inference part
        x = self.conv1(inputs)
        x = self.bn1(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x_pooled = self.pool(x)
        
        return x, x_pooled


class Decoder(Layer):
    def __init__(self, num_channels, num_filters):
        super(Decoder, self).__init__()
        # TODO: decoder contains:
        #       1 2x2 transpose conv (makes feature map 2x larger)
        #       1 3x3 conv + 1bn + 1relu + 
        #       1 3x3 conv + 1bn + 1relu
        self.up = Conv2DTranspose(num_channels = num_channels,
                                    num_filters = num_filters,
                                    filter_size = 2,
                                    stride = 2)
        self.conv1 = Conv2D(num_channels,
                            num_filters,
                            filter_size=3,
                            stride=1,
                            padding=1)
        self.bn1 = BatchNorm(num_filters,act='relu')

        self.conv2 = Conv2D(num_filters,
                            num_filters,
                            filter_size=3,
                            stride=1,
                            padding=1)
        self.bn2 = BatchNorm(num_filters,act='relu')

    def forward(self, inputs_prev, inputs):

        # TODO: forward contains an Pad2d and Concat
        x = self.up(inputs)
        h_diff = (inputs_prev.shape[2] - x.shape[2])
        w_diff = (inputs_prev.shape[3] - x.shape[3])
        x = fluid.layers.pad2d(x,paddings = [h_diff,h_diff - h_diff//2,w_diff,w_diff - w_diff//2])
        x = fluid.layers.concat([inputs_prev,x],axis=1)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.conv2(x)
        x = self.bn2(x)
        #Pad

        return x


class UNet(Layer):
    def __init__(self, num_classes=59):
        super(UNet, self).__init__()
        # encoder: 3->64->128->256->512
        # mid: 512->1024->1024

        #TODO: 4 encoders, 4 decoders, and mid layers contains 2 1x1conv+bn+relu
        self.down1 = Encoder(num_channels=3,num_filters=64)
        self.down2 = Encoder(num_channels=64,num_filters=128)
        self.down3 = Encoder(num_channels=128,num_filters=256)
        self.down4 = Encoder(num_channels=256,num_filters=512)

        self.mid_conv1 = Conv2D(512,1024,filter_size=1,padding=0,stride=1)
        self.mid_bn1 = BatchNorm(1024,'relu')
        self.mid_conv2 = Conv2D(1024,1024,filter_size=1,stride = 1,padding=0)
        self.mid_bn2 = BatchNorm(1024,'relu')
        #这里通道数是encoder的两倍,因为这里进行了特征图通道叠加
        self.up4 = Decoder(1024,512)
        self.up3 = Decoder(512,256)
        self.up2 = Decoder(256,128)
        self.up1 = Decoder(128,64)

        self.last_conv = Conv2D(num_channels = 64,num_filters = num_classes,filter_size = 1)


    def forward(self, inputs):
        x1, x = self.down1(inputs)

        x2, x = self.down2(x)

        x3, x = self.down3(x)

        x4, x = self.down4(x)
  

        # middle layers
        x = self.mid_conv1(x)
        x = self.mid_bn1(x)
        x = self.mid_conv2(x)
        x = self.mid_bn2(x)

        x = self.up4(x4, x)

        x = self.up3(x3, x)

        x = self.up2(x2, x)

        x = self.up1(x1, x)


        x = self.last_conv(x)

        return x

模型训练

根据dataloader和模型的定义,可以定义模型的训练代码如下所示,此过程,可以自定义优化算法,损失函数和一些基本的参数如:学习率,epoch等。

def train():
    # Step 0: preparation
    place = paddle.fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        # Step 1: Define training dataloader
        #TODO: create dataloader
        train_reader = train_dataloader(args.batch_size)        
        # # Step 2: Create model
        if args.net == 'unet':
            model = UNet(num_classes=59)
        if args.net == 'pspnet':
            model = PSPNet(num_classes=59)
        

        # Step 3: Define criterion and optimizer
        criterion = Basic_SegLoss

        # create optimizer
        opt = AdamOptimizer(learning_rate=args.lr,parameter_list=model.parameters())
        # Step 4: Training
        for epoch in range(1, args.num_epochs+1):
            train_loss = train(train_reader,
                               model,
                               criterion,
                               opt,
                               epoch)
            # print(f"----- Epoch[{epoch}/{args.num_epochs}] Train Loss: {args.loss:.4f}")

            if epoch % args.save_freq == 0 or epoch == args.num_epochs:
                model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}-Loss-{train_loss}")

                # TODO: save model and optmizer states
                fluid.dygraph.save_dygraph(model.state_dict(),'save_model_state_dict')
                fluid.dygraph.save_dygraph(opt.state_dict(),'save_opt_state_dict')


                print(f'----- Save model: {model_path}.pdparams')
                print(f'----- Save optimizer: {model_path}.pdopt')



  • 损失函数定义:
    飞桨框架提供了多种图像可用于图像分割的损失函数,Dice Loss,Jaccard Loss等,本次模型训练使用的是softmax交叉熵损失函数,如下代码所示:

def Basic_SegLoss(preds, labels, ignore_index=255):
    n, c, h, w = preds.shape
    # TODO: create softmax_with_cross_entropy criterion
  
        # scale preds so that the class probas of each sample sum to 1
    loss=fluid.layers.softmax_with_cross_entropy(logits=preds,label=labels,axis=1)
    # TODO: transpose preds to NxHxWxC
    
    
    mask = labels!=ignore_index
    mask = fluid.layers.cast(mask, 'float32')

    # TODO: call criterion and compute loss
    
    loss = loss * mask
    avg_loss = fluid.layers.mean(loss) / (fluid.layers.mean(mask) + eps)

    return avg_loss

经过以上步骤,已经了解了模型搭建和训练的一些知识,最后,一个完整的模型训基本包括以下几个部分:
飞桨(PaddlePaddle)图像分割全流程实现_第1张图片

模型预测

经过多次训练后,通过飞桨可以保存模型的最佳权重参数,并通过保存好的模型权重参数进行预测,实现图片的分割,具体代码如下所示:


from unet import UNet
import cv2
import os
import numpy as np
import Image
import paddle
import paddle.fluid as fluid
import matplotlib.pyplot as plt
def colorize(gray, palette):
    # gray: numpy array of the label and 1*3N size list palette
    color = Image.fromarray(gray.astype(np.uint8)).convert('P')
    color.putpalette(palette)
    return color


def save_blend_image(image_file, pred_file):
    image1 = Image.open(image_file)
    image2 = Image.open(pred_file)
    image1 = image1.convert('RGBA')
    image2 = image2.convert('RGBA')
    image = Image.blend(image1, image2, 0.5)
    o_file = pred_file[0:-4] + "_blend.png"
    image.save(o_file)




def inference_resize(image_file,size):
    return cv2.resize(image_file,size)
    

def inference_sliding(image,window_size):
    cover = window_size//2
    w,h,_ = image.shape
    segment_slide = []
    for i in range(w//cover-1):
        for j in range(h//cover-1):
            image_zero = image[i*cover:(i+1)*cover,j*cover:(j+1)*cover,:]
            segment_slide.append(image_zero)
    return segment_slide





def save_images(prediction,save,img_file):
    ofile_name = os.path.join(save,os.path.basename(img_file))[0:-4]+'.png'
    cv2.imwrite(ofile_name,prediction)
    ofile_name_color = ofile_name[0:-4]+"_color.png"
    colors = np.loadtxt("./color_files/label_pascal_context_59.txt").astype("uint8")
    color_image = colorize(prediction,colors)
    save_blend_image(ofile_name_color,color_image)


def img_transform(img_path):
    img = cv2.imread(img_path)
    img = cv2.resize(img, (512, 512),cv2.INTER_NEAREST)
    plt.imshow(img[:,:,::-1])
    plt.show()
    # HWC to CHW
    if len(img.shape) == 3:
        img = np.transpose(img,(2,0,1))
    # 归一化
    img = np.expand_dims(img, axis=0).astype('float32')
    return img
    


# this inference code reads a list of image path, and do prediction for each image one by one
def main():
    # 0. env preparation
    place = paddle.fluid.CPUPlace()
    with fluid.dygraph.guard(place):


    # 1. create model
        model = UNet(num_classes=59)
    # 2. load pretrained model 
        state_dict,_ = fluid.dygraph.load_dygraph('work/save_model_state_dict')
    # 3. read test image list
        img = './work/dummy_data/JPEGImages/2008_000059.jpg'
        img = img_transform(img)
        model.eval()
        img = fluid.dygraph.to_variable(img)
        out = model(img)
        out = fluid.layers.softmax(img,axis=1)
        out = fluid.layers.squeeze(out,axes=[])
        out = fluid.layers.transpose(out,(1,2,0))
        out = out.numpy()
        save_images(out,"./","segmented")

if __name__ == "__main__":
    main()

最后,利用paddle实现了一个全流程的分割网络,利用AIStuido的GPU环境进行简单的训练,其效果如下所示:
飞桨(PaddlePaddle)图像分割全流程实现_第2张图片

飞桨(PaddlePaddle)图像分割全流程实现_第3张图片

完整课程与资料学习请访问:https://aistudio.baidu.com/aistudio/education/group/info/1767

你可能感兴趣的:(paddlepaddle,深度学习,神经网络,算法)