import tensorflow as tf
import os
import time
from os.path import join
import matplotlib.pyplot as plt
def max_num(image_path):
'''获取最大编号.
参数:
image_path: 图像文件路径
返回:
max_number: 文件最大编号
'''
file_lists = os.listdir(image_path)
numbers = []
if file_lists:
for file_list in file_lists:
number, _ = os.path.splitext(file_list)
numbers.append(int(number))
max_number = max([x for x in numbers])
else:
max_number = 0
return max_number
def parse_function(filenames):
'''图像解析函数.
参数:
filenames: 文件名称队列(Tensor)
返回:
img_decode: 图像编码
'''
img_bytes = tf.read_file(filenames)
img_decode = tf.image.decode_jpeg(img_bytes, channels=3)
return img_decode
def reshape_image(image_path, save_path, max_number, sess):
'''设置图像尺寸.
参数:
image_path: 源图像路径
save_path: 处理后图像的保存路径
max_number: 图像最大编号
sess: tensorflow会话
返回(输出):
处理图像的进度
'''
'''获取文件名列表'''
imgs_name = os.listdir(image_path)
png = imgs_name[0].lower().endswith("png")
'''文件路径:路径+文件名'''
imgs_path = [join(image_path, f) for f in imgs_name]
imgs_num = len(imgs_path)
'''文件路径队列'''
imgs_queue = tf.data.Dataset.from_tensor_slices(imgs_path)
'''文件解析数据队列'''
imgs_map = imgs_queue.map(parse_function)
'''文件数据遍历'''
img_decode = imgs_map.make_one_shot_iterator().get_next()
for i in range(imgs_num):
img_type = img_decode.dtype
if img_decode.dtype != tf.float32:
'''图像数据转为float32格式,降低尺寸调节信息损失(浮点数据)'''
img_decode = tf.image.convert_image_dtype(img_decode, dtype=tf.float32)
'''图像尺寸调节'''
img_decode = tf.image.resize_images(img_decode, [128, 128], method=0)
if img_decode.dtype == tf.float32:
'''图像数据格式恢复:uint8,用于存储'''
img_decode = tf.image.convert_image_dtype(img_decode, dtype=tf.uint8)
img_value = sess.run(img_decode)
'''设置图形像素'''
plt.figure(figsize=(1.28, 1.28))
'''读入图形数据,否则图像数据为空'''
plt.imshow(img_value)
plt.axis("off")
plt.savefig(save_path+"/{}.jpg".format(max_number+i+1), format="jpg")
print("Processing {} image.".format(max_number+i+1))
'''matplotlib打开的图像数量有限制,随开随关'''
plt.close("all")
def time_costed(times):
'''时间格式转换:秒转为时:分:秒.
参数:
times:秒
返回:
hours:时
minutes:分
seconds:秒
'''
time_cost = times / 3600
'''//提取整数部分,即小时'''
hours = times // 3600
'''获取小数部分:如1.52-1=0.52'''
temp = time_cost - hours
'''转化分钟:0.52*60=31.2分钟'''
temp_1 = temp * 60
'''提取分钟'''
minutes = int(temp_1)
'''拆分整数与小数部分:["31","2"]'''
temp_2 = str(temp_1).split(".")
'''提取小数部分并转为int(eval)'''
temp_3 = temp_2[1][:1]
'''获取秒数'''
seconds = eval(temp_3) * 6
return hours, minutes, seconds
if __name__ == "__main__":
with tf.Session() as sess:
if not os.path.exists("./handwrite_resized_images"):
os.makedirs("./handwrite_resized_images")
save_path = "./handwrite_resized_images"
start_time = time.time()
'''开启协程'''
coord = tf.train.Coordinator()
'''启用线程,用协程填充'''
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
images_path = "./data/handwrite_images"
'''获取二级目录directory list: ['images_2', 'images_1']'''
dirs_list = os.listdir(images_path)
'''directory path: ['./test_images/images_2', './test_images/images_1']'''
dirs_path = [join(images_path, f) for f in dirs_list]
for dir_path in dirs_path:
s_time = time.time()
max_number = max_num(save_path)
image_number = reshape_image(dir_path, save_path, max_number, sess)
e_time = time.time()
time_c = e_time - s_time
hours, minutes, seconds = time_costed(time_c)
print("Time costed: {} h {} m {} s".format(hours, minutes, seconds))
# try:
# while not coord.should_stop():
# pass
# except tf.errors.OutOfRangeError:
# print("Executive finished.")
# finally:
# coord.request_stop()
# coord.join(threads)
'''协程停止'''
coord.request_stop()
'''线程锁'''
coord.join(threads)
end_time = time.time()
time_cost = end_time - start_time
hours, minutes, seconds = time_costed(time_cost)
print("Total time costed: {} h {} m {} s".format(hours, minutes, seconds))
该数据来源于将2800张200$\times$200~200 × \times × 300不同尺寸的图像数据转换为128$\times$128尺寸的数据,可见,图像处理对内存的消耗很高,本机使用CPU配置,集成显卡,处理过程中会出现短时卡顿现象.
import os
from os.path import join, abspath, dirname
import shutil
base_dir = abspath(dirname(__name__))
print("base directory: {}".format(base_dir))
# images_path = './train_images'
images_path = './test_images'
'''directory list: ['images_2', 'images_1']'''
directory_list = os.listdir(images_path)
'''directory path: ['./test_images/images_2', './test_images/images_1']'''
dir_path = [join(images_path, f) for f in directory_list]
'''file name list: ['b3.jpg', 'b4.jpg', 'b1.jpg', 'b2.jpg']'''
file_list = os.listdir(dir_path[0])
'''file path: ['./test_images/images_2/b3.jpg', './test_images/images_2/b4.jpg', './test_images/images_2/b1.jpg', './test_images/images_2/b2.jpg']'''
file_path = [join(dir_path[0], file_name) for file_name in file_list]
print("file path: {}".format(file_path))
for file in file_path:
shutil.copy(file, dir_path[1])
(1) 图像数据处理,先获取图像文件目录和文件名,后将文件路径和文件名拼接,用于处理;
(2) 图像尺寸调整需要将图像转换为float32格式(降低图像信息损失,若整数之间直接处理,会丢失小数部分的信息),保存图像时将数据格式转换为uint8格式;
(3) matplotlib保存图像时,首先通过imshow获取数据,然后保存;为了保证大量图像数据存储正常,需要保存之后,即时关闭打开的窗口,使用close关闭;
(4) 图像合并,需要获取图像的拼接路径,而不是单独的文件名称,shutil.copy(file, path)file为文件,path为目标路径;
(5) 对数据进行分组处理,可有效缓解服务器或本地主机的压力;
[参考文献]
[1]https://blog.csdn.net/Xin_101/article/details/82585098
[2]https://blog.csdn.net/Xin_101/article/details/84231722