最近在做 object_detection 时需要用到一些对图片的预处理脚本。
比如要批量剪切图片、图片灰度、二值化、缩放、丰富数据等操作,写了如下几个功能的脚本
images 文件加内为图片处理
- 将某目录内图片全部转化成 500*500
python resize.py ./test/ 500
- 将某目录内图片随机切成 300*300
python randomCrop.py ./test/ 300
- 将某目录内图片类型转为RGB
python convertToRGB.py ./test/
- 将某目录内图片转为灰度图
python convertToL.py ./test/
- 将某目录内图片做二值化处理,其中阈值为 120。二值化图像每个像素用 8 个bit表示,0 表示黑,255 表示白。阈值的作用就是大于阈值为白,小于阈值为黑
在图像检测中较常用,可以去掉过多的干扰和噪点
python convertTo1.py ./test/ 120
- 将某张图片按照阈值 0~255 生成 256 张图片,用于选择合理的阈值
python getAllBinarizationImg.py test/github.JPG
- 将某目录内图片翻转 45 度,共翻转 7 次,每翻转一次生成一张图。适合增加样本数量。还在在翻转中随机增加剪裁、灰度等丰富样本数据
python rotate.py test 45 7
可以参考 github
核心代码如下
import os
import sys
import math
import random
import functools
import numpy as np
from PIL import Image, ImageEnhance
def rotate(img_path, degree, num):
img = Image.open(img_path)
save_path = './rotate/'
mkdir(save_path)
i = 1
while i <= num:
img = img.rotate(degree)
img_arr = os.path.basename(img_path).split('.')
img_name = save_path + img_arr[0] + '_' + bytes(degree * i) + '.' + img_arr[1]
img.save(img_name, quality=95)
i += 1
print 'save to ' + img_name
def randomCrop(img_path, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]):
img = Image.open(img_path)
aspect_ratio = math.sqrt(np.random.uniform(*ratio))
w = 1. * aspect_ratio
h = 1. / aspect_ratio
bound = min((float(img.size[0]) / img.size[1]) / (w**2),
(float(img.size[1]) / img.size[0]) / (h**2))
scale_max = min(scale[1], bound)
scale_min = min(scale[0], bound)
target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min,
scale_max)
target_size = math.sqrt(target_area)
w = int(target_size * w)
h = int(target_size * h)
i = np.random.randint(0, img.size[0] - w + 1)
j = np.random.randint(0, img.size[1] - h + 1)
img = img.crop((i, j, i + w, j + h))
img = img.resize((size, size), Image.LANCZOS)
if img.mode != 'RGB':
img = img.convert('RGB')
save_path = './random_crop/'
mkdir(save_path)
img_name = save_path + os.path.basename(img_path)
img.save(img_name, quality=95)
print 'save to ' + img_name
return img
def resize(img_path, size):
img = Image.open(img_path)
img = img.resize((size, size), Image.ANTIALIAS)
if img.mode != 'RGB':
img = img.convert('RGB')
save_path = './resize_' + bytes(size) + '/'
mkdir(save_path)
img_name = save_path + os.path.basename(img_path)
img.save(img_name, quality=95)
print 'save to ' + img_name
return img
def convertToRGB(img_path):
img = Image.open(img_path)
if img.mode != 'RGB':
img = img.convert('RGB')
save_path = './convert/'
mkdir(save_path)
img_name = save_path + os.path.basename(img_path)
img.save(img_name, quality=95)
print 'save to ' + img_name
return img
def convertToL(img_path):
#a = np.array(Image.open(img_path).convert('L')).astype('float')
#
#depth = 10.
#grad = np.gradient(a)
#grad_x, grad_y = grad
#
#grad_x = grad_x*depth/100.
#grad_y = grad_y*depth/100.
#A = np.sqrt(grad_y**2+grad_y**2+1)
#uni_x = grad_x/A
#uni_y = grad_y/A
#uni_z = 1./A
#
#vec_el = np.pi/2.2
#vec_az = np.pi/4
#dx = np.cos(vec_el)*np.cos(vec_az)
#dy = np.cos(vec_el)*np.sin(vec_az)
#dz = np.sin(vec_el)
#
#b = 255*(dx*uni_x+dy*uni_y+dz*uni_z)
#b = b.clip(0, 225)
#
#im = Image.fromarray(b.astype('uint8'))
#
im = Image.open(img_path)
im = im.convert('L')
save_path = './toL/'
mkdir(save_path)
img_name = save_path + os.path.basename(img_path)
print('success save to ' + img_name)
im.save(img_name, quality=95)
def convertTo1(img_path, threshold):
im = Image.open(img_path)
Lim = im.convert('L' )
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
bim = Lim.point(table, '1' )
save_path = './to_binarization/'
mkdir(save_path)
img_arr = os.path.basename(img_path).split('.')
img_name = save_path + img_arr[0] + '_' + bytes(threshold) + '.' + img_arr[1]
print('success save to ' + img_name)
bim.save(img_name, quality=95)
def imgList(dir, suffix = '.JPG'):
assert os.path.isdir(dir)
list = []
for file in os.listdir(dir):
img_path = os.path.join(dir, file)
if os.path.splitext(img_path)[1] == ".JPG":
list.append(img_path)
return list
def mkdir(dir):
if not os.path.isdir(dir):
os.makedirs(dir)
if __name__ == '__main__':
print 'test'