写在前面:
因为最近在做裂缝检测,用的CRACK500数据集,尺寸大部分是640*340,如果直接resize(512,512)效果不太好。
尝试如下:
1、先将340尺寸填充成512 (512是你需要的尺寸)
2、因为mask标签图片需要为单通道的二值图像,填充后可能会变成RGB图像,所以再改为二值图像
3、随机裁剪,这个是我自己设计的算法,大概思想是根据你需要的尺寸,我先限定一个x和y可能的区域,再通过一个随机值来乘以这个区域的长度,横纵坐标分别加上这个区域距离左边和下边的长度,就得到了一个区间,在这个区间内的任意坐标,横纵坐标分别加减目标尺寸的一半,就可以得到四个坐标,就是最终的裁剪区域。(说的有点绕,其实很好理解,不理解也没关系,直接用)
4、因为随机裁剪,可能会有的区域没有你需要的像素点,所以我设计了一个读取像素点的函数,来删除那些目标像素点小于1000的图片(因为我用的是二值图像,非黑即白,如果和我不一样的话,可以适当更改,还有就是因为imread函数不能读有中文路径的,所以我把目标数据集文件复制了一份在桌面上,具体可以看注释)
1.填充尺寸
import cv2
import numpy as np
import os
## 边界填充
# 这个按下面的对称填充改就可以
def constant(filenames, save_path):
for filename in filenames:
img = cv2.imread(filename)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
constant = cv2.copyMakeBorder(img, 76, 76, 0, 0, cv2.BORDER_CONSTANT, value=[0, 0, 0])
cv2.imwrite(save_path + f'{i}.jpg', constant)
# 对称填充
def reflect(filenames, save_path):
i = 0
for filename in filenames:
img = cv2.imread(filename)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
reflect = cv2.copyMakeBorder(img, 76, 76, 0, 0, cv2.BORDER_REFLECT)
cv2.imwrite(os.path.join(save_path, f'{i}.jpg'), reflect)
i += 1
def get_filename(path):
file_names = os.listdir(path)
image_names = []
for file_name in file_names:
image_path = os.path.join(path, file_name)
image_names.append(image_path)
return image_names
if __name__ == '__main__':
image_names = get_filename(r'C:\Users\dell\Desktop\CRACK500_sameSize\test\image')
save_path = r'C:\Users\dell\Desktop\CRACK500_512_padding\test\image'
reflect(image_names, save_path)
2.改变图片通道,变为二值图像:
import numpy as np
from PIL import Image
import os
def get_filename(path):
file_names = os.listdir(path)
image_names = []
for file_name in file_names:
image_path = os.path.join(path, file_name)
image_names.append(image_path)
return image_names
# 把图片改为二值图像
def change_channel(filenames):
for filename in filenames:
image = Image.open(filename)
# 转为二值图像
image_1 = image.convert('1')
image_1.save(filename)
if __name__ == '__main__':
# 获取图片完整路径
# 在括号里传文件夹路径
image_names = get_filename()
change_channel(image_names)
3.随机裁剪
from PIL import Image
import os
import random
def random_crop(height, length, n):
for i in range(0, n):
x_random = random.random()
y_random = random.random()
for image_name in os.listdir(image_input_path):
print(f'-----------第{image_name}张图片------------')
# 每个图像全路径
image_input_fullname = os.path.join(image_input_path, image_name)
# PIL库打开每一张图像
img = Image.open(image_input_fullname)
# 定义裁剪图片左、上、右、下的像素坐标
x_max = img.size[0] # 宽
y_max = img.size[1] # 高
print(x_max, y_max)
if x_max % 2 != 0:
x_max += 1
if y_max % 2 != 0:
y_max += 1
print(x_max, y_max)
x_max_half = x_max // 2 # 图片 宽的一半
y_max_half = y_max // 2 # 图片 高的一半
height_half = height // 2 # 裁剪框 高的一半
length_half = length // 2 # 裁剪框 宽的一半
if x_max >= length and y_max >= height:
# print(i)
lost_height = y_max_half - height_half
lost_length = x_max_half - length_half
# for i in range(0, n):
mid_point_x = int(length_half + x_random * 2 * lost_length)
mid_point_y = int(height_half + y_random * 2 * lost_height)
print(f'第{i}次裁剪的中心坐标为 ', "x:", mid_point_x, "y:", mid_point_y)
down = mid_point_y + height_half
up = mid_point_y - height_half
right = mid_point_x + length_half
left = mid_point_x - length_half
BOX_LEFT, BOX_UP, BOX_RIGHT, BOX_DOWN = left, up, right, down
# 从原始图像返回一个矩形区域,区域是一个4元组定义左上右下像素坐标
box = (BOX_LEFT, BOX_UP, BOX_RIGHT, BOX_DOWN)
# 进行roi裁剪
roi_area = img.crop(box)
# 裁剪后每个图像的路径+名称
qianzhui, houzhui = os.path.splitext(image_name)
image_output_fullname = os.path.join(image_output_path, qianzhui + f'_{i}' + houzhui)
# 如果用原文件名称/只裁剪一次,可以用这个
# image_output_fullname = os.path.join(image_output_path, image_name)
# 存储裁剪得到的图像
roi_area.save(image_output_fullname)
print('{0} crop done.'.format(image_output_fullname))
else:
print("输入尺寸不符合要求")
for image_name in os.listdir(mask_input_path):
print(f'-----------第{image_name}张图片------------')
# 每个图像全路径
image_input_fullname = os.path.join(mask_input_path, image_name)
# PIL库打开每一张图像
img = Image.open(image_input_fullname)
# 定义裁剪图片左、上、右、下的像素坐标
x_max = img.size[0] # 宽
y_max = img.size[1] # 高
print(x_max, y_max)
if x_max % 2 != 0:
x_max += 1
if y_max % 2 != 0:
y_max += 1
print(x_max, y_max)
x_max_half = x_max // 2 # 图片 宽的一半
y_max_half = y_max // 2 # 图片 高的一半
height_half = height // 2 # 裁剪框 高的一半
length_half = length // 2 # 裁剪框 宽的一半
if x_max >= length and y_max >= height:
# print(i)
lost_height = y_max_half - height_half
lost_length = x_max_half - length_half
# for i in range(0, n):
mid_point_x = int(length_half + x_random * 2 * lost_length)
mid_point_y = int(height_half + y_random * 2 * lost_height)
print(f'第{i}次裁剪的中心坐标为 ', "x:", mid_point_x, "y:", mid_point_y)
down = mid_point_y + height_half
up = mid_point_y - height_half
right = mid_point_x + length_half
left = mid_point_x - length_half
BOX_LEFT, BOX_UP, BOX_RIGHT, BOX_DOWN = left, up, right, down
# 从原始图像返回一个矩形区域,区域是一个4元组定义左上右下像素坐标
box = (BOX_LEFT, BOX_UP, BOX_RIGHT, BOX_DOWN)
# 进行roi裁剪
roi_area = img.crop(box)
# 裁剪后每个图像的路径+名称
qianzhui, houzhui = os.path.splitext(image_name)
image_output_fullname = os.path.join(mask_output_path, qianzhui + f'_{i}' + houzhui)
# 如果用原文件名称/只裁剪一次,可以用这个
# image_output_fullname = os.path.join(mask_output_path, image_name)
# 存储裁剪得到的图像
roi_area.save(image_output_fullname)
print('{0} crop done.'.format(image_output_fullname))
else:
print("输入尺寸不符合要求")
if __name__ == '__main__':
# 定义待批量裁剪图像的路径地址
image_input_path = r'C:\Users\dell\Desktop\CRACK500_512_padding\train\image'
mask_input_path = r'C:\Users\dell\Desktop\CRACK500_512_padding\train\mask'
# 定义裁剪后的图像存放地址
image_output_path = r''
mask_output_path = r''
# 需要裁剪的尺寸,第三个参数代表你要对每张图片裁几次
random_crop(512, 512, 2)
4.读图片的像素,删除小于1000像素的图片
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
# 获取裂缝像素小于 1000 的图片
def get_filename(path):
file_names = os.listdir(path)
image_names = []
for file_name in file_names:
image_path = os.path.join(path, file_name)
img = cv2.imread(image_path)
GrayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = np.array(GrayImage)
height, width = img.shape
i = 0
a = 0 # 黑
b = 0 # 白
for row in range(height):
for col in range(width):
val = img[row][col]
if val == 0:
a = a + 1
else:
b = b + 1
i += 1
print("黑色像素有", a, "个,白色像素有", b, "个", '总共:', i)
if b < 1000:
image_names.append(file_name)
return image_names
def delete_image(path, image_names):
for mask_name in image_names:
# 删除mask文件夹下的图片
mask_path = os.path.join(path, 'mask', mask_name)
os.remove(mask_path)
for mask_name in image_names:
# 删除image文件夹下的图片,此时文件名后缀是png,需要改成jpg
image, extension = os.path.splitext(mask_name)
image_name = image + '.jpg'
image_path = os.path.join(path, 'image', image_name)
os.remove(image_path)
if __name__ == '__main__':
# 因为imread不能用中文路径,先复制一份数据集获取文件名
# 复制后的路径
path = r'C:\Users\dell\Desktop\CRACK500_512_padding\train\mask'
# 获取像素点小于1000的文件名
image_names = get_filename(path)
# 需要改动的数据集(就是你需要删除的数据集,上面那个path只是为了获取一份需要删除的图像名单)
dataset_path = r''
# 删除
delete_image(dataset_path, image_names)
# print(image_names)
有不懂的地方欢迎留言!