从零开始做ISPRS Potsdam地物分类研究——数据预处理

我跑的模型需要将数据裁剪为小图

裁剪RGB,DSM和Label数据,大小为1024,步长512,有重叠的裁剪 

 
import cv2
import os
from PIL import Image
import numpy as np
 
# Cutting the input image to h*w blocks
def clip_picture(file_path,outPath,a):
    slide_window = 1024  # 大的滑动窗口
    step_length = 1024
    sat_list = os.listdir(file_path) 

    for file in sat_list:
        Image_Path = os.path.join(file_path,file)
        
        image = Image.open(Image_Path)

        width = image.size[0]  # 获取图像的宽
        height = image.size[1]  # 获取图像的高

        right_fill = step_length - (width % step_length)
        bottom_fill = step_length - (height % step_length)

        width_path_number = int((width + right_fill) / step_length)  # 横向切成的小图的数量
        height_path_number = int((height + right_fill) / step_length)  # 纵向切成的小图的数量

        #print(width_path_number, height_path_number)
        image = np.array(image)

        image = cv2.copyMakeBorder(image, top=0, bottom=bottom_fill, left=0, right=right_fill,
                                borderType=cv2.BORDER_CONSTANT, value=0)

        image = cv2.copyMakeBorder(image, top=step_length // 2, bottom=step_length // 2, left=step_length // 2,
                                right=step_length // 2,
                                borderType=cv2.BORDER_CONSTANT, value=0)  # 填充1/2步长的外边框
    

        # 2.将膨胀后的大图按照滑窗裁剪
        tar = outPath
        #tar=os.path.join('./dataset/',file[:-8]+'/'+'Image_Crop_Result/')
        #shutil.rmtree(r"C:\Users\Administrator\Desktop\DeepGlobe-Road-Extraction-link34\dataset\Image_Crop_Result")  # 递归删除文件夹下的所有内容包扩文件夹本身
        
        target=tar

        image_crop_addr = target  # 图像裁剪后存储的文件夹
        image = Image.fromarray(image)  # 将图片格式从numpy转回PIL
        l = 0
        if a=='tif':
            for j in range(height_path_number):
                for i in range(width_path_number):
                    box = (i * step_length, j * step_length, i * step_length + slide_window, j * step_length + slide_window)
                    small_image = image.crop(box)
                    name=file.split('_')
                    small_image.save(
                        image_crop_addr + name[2]+'_'+name[3] + '({},{})@{:04d}_sat.tif'.format(j, i, l), quality=95)
                    l = l + 1
        
        if a=='png':
            for j in range(height_path_number):
                for i in range(width_path_number):
                    box = (i * step_length, j * step_length, i * step_length + slide_window, j * step_length + slide_window)
                    small_image = image.crop(box)
                    name=file.split('_')
                    small_image.save(
                        image_crop_addr + name[2]+'_'+name[3] + '({},{})@{:04d}_mask.png'.format(j, i, l), quality=95)
                    l = l + 1

 
inPath1 = "./Postdamdataset/RGB_train/"
inPath3 = "./Postdamdataset/DSM_train/"

inPath2 = "./Postdamdataset/Label_train/"

outPath1 = "./Postdamdataset/RGB/"
outPath3 = "./Postdamdataset/HHA/"

outPath2 = "./Postdamdataset/Label/"
clip_picture(inPath1,outPath1 ,'tif')
clip_picture(inPath3,outPath3,'tif')
clip_picture(inPath2,outPath2,'png')

mask_names = filter(lambda x: x.find('mask')!=-1, os.listdir(outPath2))
# sat_names = filter(lambda x: x.find('sat')!=-1, os.listdir(tar))
#trainlist = list(map(lambda x: x[:-8], imagelist))

裁剪到标签时出现错误,请出万能的处理影像的GDAL库,裁剪

 
import cv2
import os
from PIL import Image
import numpy as np
from osgeo import gdal
# Cutting the input image to h*w blocks
def clip_picture(file_path,outPath,a):
    slide_window = 1024  # 大的滑动窗口
    step_length = 1024
    sat_list = os.listdir(file_path) 

    for file in sat_list:
        Image_Path = os.path.join(file_path,file)
        image=gdal.Open(Image_Path)
        width = image.RasterXSize
        height = image.RasterYSize

        # image = Image.open(Image_Path)

        # width = image.size[0]  # 获取图像的宽
        # height = image.size[1]  # 获取图像的高

        right_fill = step_length - (width % step_length)
        bottom_fill = step_length - (height % step_length)

        width_path_number = int((width + right_fill) / step_length)  # 横向切成的小图的数量
        height_path_number = int((height + bottom_fill) / step_length)  # 纵向切成的小图的数量

        #print(width_path_number, height_path_number)
        # image = np.array(image)
        image=image.ReadAsArray()
        
        if a=='tif':
            image= np.rollaxis(image , 0, 3)
        


        image = cv2.copyMakeBorder(image, top=0, bottom=bottom_fill, left=0, right=right_fill,
                                borderType=cv2.BORDER_CONSTANT, value=0)

        image = cv2.copyMakeBorder(image, top=step_length // 2, bottom=step_length // 2, left=step_length // 2,
                                right=step_length // 2,
                                borderType=cv2.BORDER_CONSTANT, value=0)  # 填充1/2步长的外边框
    

        # 2.将膨胀后的大图按照滑窗裁剪
        tar = outPath
        #tar=os.path.join('./dataset/',file[:-8]+'/'+'Image_Crop_Result/')
        #shutil.rmtree(r"C:\Users\Administrator\Desktop\DeepGlobe-Road-Extraction-link34\dataset\Image_Crop_Result")  # 递归删除文件夹下的所有内容包扩文件夹本身
        
        target=tar

        image_crop_addr = target  # 图像裁剪后存储的文件夹
        # image = Image.fromarray(image)  # 将图片格式从numpy转回PIL
        image=Image.fromarray(np.uint8(image))
        l = 0
        if a=='tif':
            for j in range(height_path_number):
                for i in range(width_path_number):
                    box = (i * step_length, j * step_length, i * step_length + slide_window, j * step_length + slide_window)
                    small_image = image.crop(box)
                    name=file.split('_')
                    small_image.save(
                        image_crop_addr + name[2]+'_'+name[3] + '({},{})@{:04d}_sat.tif'.format(j, i, l), quality=95)
                    l = l + 1
        
        if a=='png':
            for j in range(height_path_number):
                for i in range(width_path_number):
                    box = (i * step_length, j * step_length, i * step_length + slide_window, j * step_length + slide_window)
                    small_image = image.crop(box)
                    name=file.split('_')
                    small_image.save(
                        image_crop_addr + name[2]+'_'+name[3] + '({},{})@{:04d}_mask.png'.format(j, i, l), quality=95)
                    l = l + 1

 
inPath1 = "./Postdamdataset/RGB_train/"
inPath3 = "./Postdamdataset/DSM_train/"

inPath2 = "./Postdamdataset/Label_train/"

outPath1 = "./Postdamdataset/RGB/"
outPath3 = "./Postdamdataset/HHA/"

outPath2 = "./Postdamdataset/Label/"
# clip_picture(inPath1,outPath1 ,'tif')
clip_picture(inPath3,outPath3,'tif')
clip_picture(inPath2,outPath2,'png')

mask_names = filter(lambda x: x.find('mask')!=-1, os.listdir(outPath2))
# sat_names = filter(lambda x: x.find('sat')!=-1, os.listdir(tar))
#trainlist = list(map(lambda x: x[:-8], imagelist))

发现DSM多了一个0

name2=name[2]   name2[1:]

检查空白标签 防止错误

 
import cv2
import os
from PIL import Image
import numpy as np
from osgeo import gdal
# Cutting the input image to h*w blocks

 
inPath1 = "./Postdamdataset/RGB_test/"
inPath3 = "./Postdamdataset/DSM_test/"

inPath2 = "./Postdamdataset/Label_test/"

outPath1 = "./Postdamdataset/test_RGB/"
outPath3 = "./Postdamdataset/test_HHA/"

outPath2 = "./Postdamdataset/test_Label/"


mask_names = filter(lambda x: x.find('mask')!=-1, os.listdir(outPath2))
# sat_names = filter(lambda x: x.find('sat')!=-1, os.listdir(tar))
#testlist = list(map(lambda x: x[:-8], imagelist))

for f in mask_names:
    path = outPath2 + f.strip()
    if not os.path.exists(path):
        continue;    
    img = cv2.imread(path,0)             
    if cv2.countNonZero(img) == 0:
       print(f+'Image is black')
       path2=f[:-9]
       os.remove(path)
       os.remove(outPath1 +path2 + "_sat.tif")
       os.remove(outPath3 +path2 + "_sat.tif")

 通过这段代码发现4-12有问题,解决方法见上节

得到864张1024大小训练集,504张1024大小的测试集和验证集

你可能感兴趣的:(分类,数据挖掘,人工智能)