tensorflow官方模型转换教程 https://tensorflow.google.cn/lite/convert
tensorflow1.8版本好像一些功能没有,我们还是通过bazel的方式转换
bazel build tensorflow/lite/toco:toco
bazel-bin/tensorflow/lite/toco/toco --input_file='../kerasyolo3/m/transfer_multi_4cl_1207.pb' --input_format=TENSORFLOW_GRAPHDEF --output_format=TFLITE --output_file='../kerasyolo3/m/transfer_multi_4cl_1207.tflite' --inference_type=FLOAT --input_type=FLOAT --input_arrays=input_1 --output_arrays=output_1,output_2,output_3 --input_shapes=1,416,416,3
--post_training_quantize用于模型压缩
tensorflow1.13.1 通过下面的代码也可以模型压缩,但是不知道压缩了什么,为什么反而慢这么多。
converter=tf.lite.TFLiteConverter.from_keras_model_file('transfer_multi_4cl_1207.h5',input_shapes={"input_1" : [1, 416, 416, 3]})
converter.post_training_quantize=True
tflite_quantized_model=converter.convert()
open('quantized_model.tflite', 'wb').write(tflite_quantized_model)
tf-nightly 1.14 版本下借助于tf.lite.Interpreter,可以在电脑段运行tflite。采用tflite.invoke。但是tensorflow lite貌似只能在cpu下跑,不管是不是装的gpu版本。
我的测试结果,h5模型在gpu下运行4.8张/秒,在cpu下运行大概1.2张/秒;pb模型不采用8位存储 gpu下4.8张/秒,8位存储gpu下3.6张/秒;转化为tflite,不采用post_training_quantize,只能在cpu下,也是1.2张/秒,但是采用post_training_quantize后,只能在cpu下运行 13秒/张(我要崩溃了,还特意装的最新每日版tensorflow,结果反而慢了。)
#coding=utf-8
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import cv2
import numpy as np
import time
import tensorflow as tf
def preprocess_input(image, net_h, net_w):
new_h, new_w, _ = image.shape
# determine the new size of the image
if (float(net_w) / new_w) < (float(net_h) / new_h):
new_h = (new_h * net_w) // new_w
new_w = net_w
else:
new_w = (new_w * net_h) // new_h
new_h = net_h
# resize the image to the new size
resized = cv2.resize(image[:, :, ::-1] / 255., (new_w, new_h))
# embed the image into the standard letter box
new_image = np.ones((net_h, net_w, 3)) * 0.5
new_image[(net_h - new_h) // 2:(net_h + new_h) // 2, (net_w - new_w) // 2:(net_w + new_w) // 2, :] = resized
new_image = np.expand_dims(new_image, 0)
return new_image
if __name__=='__main__':
test_image_dir = ''
# model_path = "./model/quantize_frozen_graph.tflite"
model_path = "transfer_multi_4cl_1207.tflite"
# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()
# Get input and output tensors.
input_details = interpreter.get_input_details()
print(str(input_details))
output_details = interpreter.get_output_details()
print(str(output_details))
# x = interpreter.set_tensor(input_details[0]['index'])
x = interpreter.tensor(interpreter.get_input_details()[0]["index"])
y = interpreter.tensor(interpreter.get_output_details()[0]["index"])
# y1 = interpreter.set_tensor(output_details[0]['index'])
# y2 = interpreter.set_tensor(output_details[1]['index'])
# y3 = interpreter.set_tensor(output_details[2]['index'])
# 载入图片和调整
image = cv2.imread('003000.jpg')
batch_input = np.zeros((1, 416, 416, 3))
batch_input[0] = preprocess_input(image, 416, 416)
batch_input = batch_input.astype('float32')
with tf.Session() as sess:
#x().fill(batch_input)
#interpreter.invoke()
#print("inference %s" % y())
#y_out = sess.run(y, feed_dict={x: batch_input})
interpreter.set_tensor(input_details[0]['index'], batch_input)
interpreter.invoke()
output_data = [interpreter.get_tensor(output_details[0]['index']),interpreter.get_tensor(output_details[1]['index']),interpreter.get_tensor(output_details[2]['index'])]
print(output_data)