读取图片描述
from pandas import read_csv
tagged = dict([(p,w) for _,p,w in read_csv('../input/whale-categorization-playground/train.csv').to_records()])
join = list(tagged.keys())
from os.path import isfile
from PIL import Image as pil_image
from tqdm import tqdm_notebook
def expand_path(p):
if isfile('../input/whale-categorization-playground/train/' + p): return '../input/whale-categorization-playground/train/' + p
if isfile('../input/whale-categorization-playground/test/' + p): return '../input/whale-categorization-playground/test/' + p
return p
trianimage = {}
for p in tqdm_notebook(join):
image = pil_image.open(expand_path(p))
trianimage [p] = image
图像旋转处理
with open('../input/humpback-whale-identification-model-files/rotate.txt', 'rt') as f: rotate = f.read().split('\n')[:-1]
rotate = set(rotate)
def read_raw_image(p):
img = pil_image.open(expand_path(p))
if p in rotate: img = img.rotate(180)
return img
显示一张图像
p = list(rotate)[0]
imgs = [pil_image.open(expand_path(p)), read_raw_image(p)]
# 从bounding box kernel中读取边界框数据(参见上面的参考资料)
with open('../input/humpback-whale-identification-model-files/bounding-box.pickle', 'rb') as f:
p2bb = pickle.load(f)
**仿射变换**
#抑制导入keras时烦人的stderr输出
import sys
import platform
old_stderr = sys.stderr
sys.stderr = open('/dev/null' if platform.system() != 'Windows' else 'nul', 'w')
import keras
sys.stderr = old_stderr
import random
from keras import backend as K
from keras.preprocessing.image import img_to_array,array_to_img
from scipy.ndimage import affine_transform
img_shape = (384,384,1) # 模型使用的图像形状
anisotropy = 2.15 # 水平压缩比
crop_margin = 0.05 # 在边界框周围添加余量以补偿边界框的不精确性
def build_transform(rotation, shear, height_zoom, width_zoom, height_shift, width_shift):
"""
构建具有指定特征的变换矩阵
"""
rotation = np.deg2rad(rotation)
shear = np.deg2rad(shear)
rotation_matrix = np.array([[np.cos(rotation), np.sin(rotation), 0], [-np.sin(rotation), np.cos(rotation), 0], [0, 0, 1]])
shift_matrix = np.array([[1, 0, height_shift], [0, 1, width_shift], [0, 0, 1]])
shear_matrix = np.array([[1, np.sin(shear), 0], [0, np.cos(shear), 0], [0, 0, 1]])
zoom_matrix = np.array([[1.0/height_zoom, 0, 0], [0, 1.0/width_zoom, 0], [0, 0, 1]])
shift_matrix = np.array([[1, 0, -height_shift], [0, 1, -width_shift], [0, 0, 1]])
return np.dot(np.dot(rotation_matrix, shear_matrix), np.dot(zoom_matrix, shift_matrix))
def read_cropped_image(p, augment):
"""
@param p : 要读取的图片的名称
@param augment: 是否需要做图像增强
@返回变换后的图像
"""
# 根据边界框确定要捕获的原始图像的区域。
x0,y0,x1,y1 = p2bb[p]
if p in rotate: x0, y0, x1, y1 = size_x - x1, size_y - y1, size_x - x0, size_y - y0
dx = x1 - x0
dy = y1 - y0
x0 -= dx*crop_margin
x1 += dx*crop_margin + 1
y0 -= dy*crop_margin
y1 += dy*crop_margin + 1
if (x0 < 0 ): x0 = 0
if (x1 > size_x): x1 = size_x
if (y0 < 0 ): y0 = 0
if (y1 > size_y): y1 = size_y
dx = x1 - x0
dy = y1 - y0
if dx > dy*anisotropy:
dy = 0.5*(dx/anisotropy - dy)
y0 -= dy
y1 += dy
else:
dx = 0.5*(dy*anisotropy - dx)
x0 -= dx
x1 += dx
# 生成变换矩阵
trans = np.array([[1, 0, -0.5*img_shape[0]], [0, 1, -0.5*img_shape[1]], [0, 0, 1]])
trans = np.dot(np.array([[(y1 - y0)/img_shape[0], 0, 0], [0, (x1 - x0)/img_shape[1], 0], [0, 0, 1]]), trans)
if augment:
trans = np.dot(build_transform(
random.uniform(-5, 5),
random.uniform(-5, 5),
random.uniform(0.8, 1.0),
random.uniform(0.8, 1.0),
random.uniform(-0.05*(y1 - y0), 0.05*(y1 - y0)),
random.uniform(-0.05*(x1 - x0), 0.05*(x1 - x0))
), trans)
trans = np.dot(np.array([[1, 0, 0.5*(y1 + y0)], [0, 1, 0.5*(x1 + x0)], [0, 0, 1]]), trans)
# 读取图像,转换为黑白再转换为numpy数组
img = read_raw_image(p).convert('L')
img = img_to_array(img)
# 使用仿射变换
matrix = trans[:2,:2]
offset = trans[:2,2]
img = img.reshape(img.shape[:-1])
img = affine_transform(img, matrix, offset, output_shape=img_shape[:-1], order=1, mode='constant', cval=np.average(img))
img = img.reshape(img_shape)
# 归一化为零均值和单位方差
img -= np.mean(img, keepdims=True)
img /= np.std(img, keepdims=True) + K.epsilon()
return img
def read_for_training(p):
"""
使用数据增强(随机变换)读取和预处理图像。
"""
return read_cropped_image(p, True)
def read_for_validation(p):
"""
在没有数据增强的情况下读取和预处理图像(用于测试)。
"""
return read_cropped_image(p, False)
p = list(tagged.keys())[312]
imgs = [
read_raw_image(p),
array_to_img(read_for_validation(p)),
array_to_img(read_for_training(p) ]
show_whale(imgs, per_row=3)