MCU在现在的产品中随出可见,功能也越发于强大,价格也越趋于便宜。一提到人工智能,彷佛只有强大的硬件才能实现,需要什么神经网络加速器之类的硬件支撑。其实NPU、KPU等只是加快相关的计算能力,以达到较好的实时性,对于简单的、功能不复杂的神经网络结构,一般稍强的MCU完全跑得动。Tensorflow在深度学习平台中比较有名,目前已经适配了嵌入式平台,tflite_micro就比较适用于一般的MCU平台。
完全由c++语言实现,可以说凡是带有c++编译器的MCU平台都可以移植使用。其中已经实现许多的神经网络算子,比如:卷积、池化等等。对于熟悉c++的可以使用c++开发,对于只会c语言的就可以直接使用c语言调用c++了。
gcc.mk
%.o: %.cc
$(Q)$(CPP) $(CC_FLAGS) $(CC_SYMBOLS) -std=c++11 -Wno-type-limits -Wno-maybe-uninitialized -Wno-missing-field-initializers -Wstrict-aliasing -DTF_LITE_STATIC_MEMORY -DNDEBUG -Wsign-compare -Wdouble-promotion -Wshadow -Wunused-variable -Wmissing-field-initializers -Wunused-function -Wswitch -Wvla -O3 -Wno-return-type -Wno-strict-aliasing $(INCLUDE_PATHS) -o $@ $<
工程下的makefile文件
SRCS += $(basename $(foreach dir,$(DIRS),$(wildcard $(dir)/*.cc))) #查找所有当前工程下的.cc文件
#包含源码的头文件路径
INCLUDE_PATHS += -I$(PRJ_ROOT_PATH)/tfmicro
INCLUDE_PATHS += -I$(PRJ_ROOT_PATH)/tfmicro/third_party/gemmlowp
INCLUDE_PATHS += -I$(PRJ_ROOT_PATH)/tfmicro/third_party/flatbuffers/include
INCLUDE_PATHS += -I$(PRJ_ROOT_PATH)/tfmicro/third_party/ruy
使用keras随便搭建一个模型(二分类等),待训练完毕,保存为.h5模型文件。
model = Sequential([ #模型结构])
model.summary()
epochs=120
model.compile(optimizer='adam',
loss=tf.keras.losses.BinaryCrossentropy(),
metrics=['accuracy'])
#log_dir = "./" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard( histogram_freq=1)
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath="./trans_model",
monitor='val_accuracy',
mode='max',
save_best_only=True)
#train_dataset 数据集
history = model.fit(
train_dataset,
steps_per_epoch=len(X_train) // batch_size, #每个batch包含的样本数,一次epochs分多少步
epochs=epochs, #训练轮数
validation_data=train_dataset,
validation_steps=1,
callbacks=[tensorboard_callback, model_checkpoint_callback]
)
model.save("trained.model.h5")
#mnist_ds为模型数据集 data labels
def representative_dataset_gen():
for input_value in mnist_ds.take(100):
yield [input_value]
saved_model_dir= "trained.model.h5"
converter2 = tf.compat.v1.lite.TFLiteConverter.from_keras_model_file(model_file=saved_model_dir)
converter2.optimizations = [tf.lite.Optimize.DEFAULT]
converter2.representative_dataset = tf.lite.RepresentativeDataset(representative_dataset_gen)
converter2.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
tflite_quant_model = converter2.convert()
open("converted_model.tflite", "wb").write(tflite_quant_model)
linux 下使用xxd -i命令可以将文件转化为数据格式,如下:
// 以下代码参考过github
const int kArenaSize = 55000; //模型需要的内存空间
NeuralNetwork::NeuralNetwork()
{
#if 1
m_error_reporter = new tflite::MicroErrorReporter();
m_tensor_arena = (uint8_t *)malloc(kArenaSize);
if((int)m_tensor_arena%16 != 0)
{
m_tensor_arena = m_tensor_arena + (16-(int)m_tensor_arena%16);
}
if (!m_tensor_arena)
{
TF_LITE_REPORT_ERROR(m_error_reporter, "Could not allocate arena");
return;
}
TF_LITE_REPORT_ERROR(m_error_reporter, "Loading model");
m_model = tflite::GetModel(converted_model_tflite);
if (m_model->version() != TFLITE_SCHEMA_VERSION)
{
TF_LITE_REPORT_ERROR(m_error_reporter, "Model provided is schema version %d not equal to supported version %d.",
m_model->version(), TFLITE_SCHEMA_VERSION);
return;
}
// This pulls in the operators implementations we need
m_resolver = new tflite::MicroMutableOpResolver<10>();
m_resolver->AddConv2D();
m_resolver->AddDepthwiseConv2D();
m_resolver->AddMaxPool2D();
m_resolver->AddFullyConnected();
m_resolver->AddMul();
m_resolver->AddAdd();
m_resolver->AddLogistic();
m_resolver->AddReshape();
m_resolver->AddQuantize();
m_resolver->AddDequantize();
// Build an interpreter to run the model with.
m_interpreter = new tflite::MicroInterpreter(
m_model, *m_resolver, m_tensor_arena, kArenaSize, m_error_reporter);
// Allocate memory from the tensor_arena for the model's tensors.
TfLiteStatus allocate_status = m_interpreter->AllocateTensors();
if (allocate_status != kTfLiteOk)
{
TF_LITE_REPORT_ERROR(m_error_reporter, "AllocateTensors() failed");
return;
}
size_t used_bytes = m_interpreter->arena_used_bytes();
TF_LITE_REPORT_ERROR(m_error_reporter, "Used bytes %d\n", used_bytes);
// Obtain pointers to the model's input and output tensors.
input = m_interpreter->input(0);
output = m_interpreter->output(0);
// TfliteIntArray *dims = m_interpreter->tensor(m_interpreter->inputs()[0])->dims;
// printf("height:%d width:%d channels:%d\r\n", dims->data[1], dims->data[2],dims->data[3]);
//printf("input len:%d\r\n", m_interpreter->inputs_size());
int uinput = m_interpreter->inputs()[0];
// get the dims of input tensor
TfLiteIntArray* dims = m_interpreter->tensor(uinput)->dims;
int wanted_height = dims->data[1];
int wanted_width = dims->data[2];
int wanted_channels = dims->data[3];
printf("wanted_height:%d, wanted_width:%d, wanted_channels:%d\r\n", wanted_height, wanted_width, wanted_channels);
#endif
}
NeuralNetwork::~NeuralNetwork()
{
free(m_tensor_arena);
}
float *NeuralNetwork::getInputBuffer()
{
return input->data.f;
}
float NeuralNetwork::predict()
{
#if 1
m_interpreter->Invoke();
return output->data.f[0];
#endif
}