深度学习神经网络的输出模型有多种格式,这其中有一种格式使用比较广泛,并且背景深厚,它就是protobuf格式,关于这个格式的介绍请参考这篇博客:
ONNX格式解析之google protobuf解析_papaofdoudou的博客-CSDN博客_onnx protoONNX模型是按照google protobuf格式保存的,模型训练的目的就是为了得到变量的权值,只不过是纯数字罢了,但是我们也不能就这样把这些数字一个一个地写入文件,因为在要保存的模型文件里,不光要保存权值,也要告诉之后用这个模型的人,模型结构是怎么样的,所以需要合理地设计保存文件的格式。不同的机器学习框架都有自己的模型保存格式,例如 Keras 的模型格式是 h5,而 Tensorflow 和 onnx 的保存格式就是 protobuf。其实 protobuf 使用起来非常简单方便,就是自己先定义一https://blog.csdn.net/tugouxp/article/details/120583308本文是在这篇博客的基础上开发一个简单的数据模型,在模型上进行序列化和反序列化操作,并检验数据的正确性。
需要注意的是,为了增加难度,我定义了具有嵌套关系的结构体结构形式。
syntax = "proto3";
package hello;
message NestObj
{
int32 zilong1 = 6;
string zilong2 = 7;
int32 zilong3 = 8;
}
message UserInfo
{
int32 id = 1;
int32 age = 2;
string name = 3;
bytes raw_data = 4;
NestObj zilong = 5;
}
protoc-c --c_out=./ zilong.proto
生成的头文件和源文件如下
/* Generated by the protocol buffer compiler. DO NOT EDIT! */
/* Generated from: zilong.proto */
/* Do not generate deprecated warnings for self */
#ifndef PROTOBUF_C__NO_DEPRECATED
#define PROTOBUF_C__NO_DEPRECATED
#endif
#include "zilong.pb-c.h"
void hello__nest_obj__init
(Hello__NestObj *message)
{
static Hello__NestObj init_value = HELLO__NEST_OBJ__INIT;
*message = init_value;
}
size_t hello__nest_obj__get_packed_size
(const Hello__NestObj *message)
{
assert(message->base.descriptor == &hello__nest_obj__descriptor);
return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message));
}
size_t hello__nest_obj__pack
(const Hello__NestObj *message,
uint8_t *out)
{
assert(message->base.descriptor == &hello__nest_obj__descriptor);
return protobuf_c_message_pack ((const ProtobufCMessage*)message, out);
}
size_t hello__nest_obj__pack_to_buffer
(const Hello__NestObj *message,
ProtobufCBuffer *buffer)
{
assert(message->base.descriptor == &hello__nest_obj__descriptor);
return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer);
}
Hello__NestObj *
hello__nest_obj__unpack
(ProtobufCAllocator *allocator,
size_t len,
const uint8_t *data)
{
return (Hello__NestObj *)
protobuf_c_message_unpack (&hello__nest_obj__descriptor,
allocator, len, data);
}
void hello__nest_obj__free_unpacked
(Hello__NestObj *message,
ProtobufCAllocator *allocator)
{
assert(message->base.descriptor == &hello__nest_obj__descriptor);
protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator);
}
void hello__user_info__init
(Hello__UserInfo *message)
{
static Hello__UserInfo init_value = HELLO__USER_INFO__INIT;
*message = init_value;
}
size_t hello__user_info__get_packed_size
(const Hello__UserInfo *message)
{
assert(message->base.descriptor == &hello__user_info__descriptor);
return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message));
}
size_t hello__user_info__pack
(const Hello__UserInfo *message,
uint8_t *out)
{
assert(message->base.descriptor == &hello__user_info__descriptor);
return protobuf_c_message_pack ((const ProtobufCMessage*)message, out);
}
size_t hello__user_info__pack_to_buffer
(const Hello__UserInfo *message,
ProtobufCBuffer *buffer)
{
assert(message->base.descriptor == &hello__user_info__descriptor);
return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer);
}
Hello__UserInfo *
hello__user_info__unpack
(ProtobufCAllocator *allocator,
size_t len,
const uint8_t *data)
{
return (Hello__UserInfo *)
protobuf_c_message_unpack (&hello__user_info__descriptor,
allocator, len, data);
}
void hello__user_info__free_unpacked
(Hello__UserInfo *message,
ProtobufCAllocator *allocator)
{
assert(message->base.descriptor == &hello__user_info__descriptor);
protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator);
}
static const ProtobufCFieldDescriptor hello__nest_obj__field_descriptors[3] =
{
{
"zilong1",
6,
PROTOBUF_C_LABEL_OPTIONAL,
PROTOBUF_C_TYPE_INT32,
offsetof(Hello__NestObj, has_zilong1),
offsetof(Hello__NestObj, zilong1),
NULL,
NULL,
0, /* flags */
0,NULL,NULL /* reserved1,reserved2, etc */
},
{
"zilong2",
7,
PROTOBUF_C_LABEL_OPTIONAL,
PROTOBUF_C_TYPE_STRING,
0, /* quantifier_offset */
offsetof(Hello__NestObj, zilong2),
NULL,
NULL,
0, /* flags */
0,NULL,NULL /* reserved1,reserved2, etc */
},
{
"zilong3",
8,
PROTOBUF_C_LABEL_OPTIONAL,
PROTOBUF_C_TYPE_INT32,
offsetof(Hello__NestObj, has_zilong3),
offsetof(Hello__NestObj, zilong3),
NULL,
NULL,
0, /* flags */
0,NULL,NULL /* reserved1,reserved2, etc */
},
};
static const unsigned hello__nest_obj__field_indices_by_name[] = {
0, /* field[0] = zilong1 */
1, /* field[1] = zilong2 */
2, /* field[2] = zilong3 */
};
static const ProtobufCIntRange hello__nest_obj__number_ranges[1 + 1] =
{
{ 6, 0 },
{ 0, 3 }
};
const ProtobufCMessageDescriptor hello__nest_obj__descriptor =
{
PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
"hello.NestObj",
"NestObj",
"Hello__NestObj",
"hello",
sizeof(Hello__NestObj),
3,
hello__nest_obj__field_descriptors,
hello__nest_obj__field_indices_by_name,
1, hello__nest_obj__number_ranges,
(ProtobufCMessageInit) hello__nest_obj__init,
NULL,NULL,NULL /* reserved[123] */
};
static const ProtobufCFieldDescriptor hello__user_info__field_descriptors[5] =
{
{
"id",
1,
PROTOBUF_C_LABEL_OPTIONAL,
PROTOBUF_C_TYPE_INT32,
offsetof(Hello__UserInfo, has_id),
offsetof(Hello__UserInfo, id),
NULL,
NULL,
0, /* flags */
0,NULL,NULL /* reserved1,reserved2, etc */
},
{
"age",
2,
PROTOBUF_C_LABEL_OPTIONAL,
PROTOBUF_C_TYPE_INT32,
offsetof(Hello__UserInfo, has_age),
offsetof(Hello__UserInfo, age),
NULL,
NULL,
0, /* flags */
0,NULL,NULL /* reserved1,reserved2, etc */
},
{
"name",
3,
PROTOBUF_C_LABEL_OPTIONAL,
PROTOBUF_C_TYPE_STRING,
0, /* quantifier_offset */
offsetof(Hello__UserInfo, name),
NULL,
NULL,
0, /* flags */
0,NULL,NULL /* reserved1,reserved2, etc */
},
{
"raw_data",
4,
PROTOBUF_C_LABEL_OPTIONAL,
PROTOBUF_C_TYPE_BYTES,
offsetof(Hello__UserInfo, has_raw_data),
offsetof(Hello__UserInfo, raw_data),
NULL,
NULL,
0, /* flags */
0,NULL,NULL /* reserved1,reserved2, etc */
},
{
"zilong",
5,
PROTOBUF_C_LABEL_OPTIONAL,
PROTOBUF_C_TYPE_MESSAGE,
0, /* quantifier_offset */
offsetof(Hello__UserInfo, zilong),
&hello__nest_obj__descriptor,
NULL,
0, /* flags */
0,NULL,NULL /* reserved1,reserved2, etc */
},
};
static const unsigned hello__user_info__field_indices_by_name[] = {
1, /* field[1] = age */
0, /* field[0] = id */
2, /* field[2] = name */
3, /* field[3] = raw_data */
4, /* field[4] = zilong */
};
static const ProtobufCIntRange hello__user_info__number_ranges[1 + 1] =
{
{ 1, 0 },
{ 0, 5 }
};
const ProtobufCMessageDescriptor hello__user_info__descriptor =
{
PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
"hello.UserInfo",
"UserInfo",
"Hello__UserInfo",
"hello",
sizeof(Hello__UserInfo),
5,
hello__user_info__field_descriptors,
hello__user_info__field_indices_by_name,
1, hello__user_info__number_ranges,
(ProtobufCMessageInit) hello__user_info__init,
NULL,NULL,NULL /* reserved[123] */
};
/* Generated by the protocol buffer compiler. DO NOT EDIT! */
/* Generated from: zilong.proto */
#ifndef PROTOBUF_C_zilong_2eproto__INCLUDED
#define PROTOBUF_C_zilong_2eproto__INCLUDED
#include
PROTOBUF_C__BEGIN_DECLS
#if PROTOBUF_C_VERSION_NUMBER < 1000000
# error This file was generated by a newer version of protoc-c which is incompatible with your libprotobuf-c headers. Please update your headers.
#elif 1002001 < PROTOBUF_C_MIN_COMPILER_VERSION
# error This file was generated by an older version of protoc-c which is incompatible with your libprotobuf-c headers. Please regenerate this file with a newer version of protoc-c.
#endif
typedef struct _Hello__NestObj Hello__NestObj;
typedef struct _Hello__UserInfo Hello__UserInfo;
/* --- enums --- */
/* --- messages --- */
struct _Hello__NestObj
{
ProtobufCMessage base;
protobuf_c_boolean has_zilong1;
int32_t zilong1;
char *zilong2;
protobuf_c_boolean has_zilong3;
int32_t zilong3;
};
#define HELLO__NEST_OBJ__INIT \
{ PROTOBUF_C_MESSAGE_INIT (&hello__nest_obj__descriptor) \
, 0,0, NULL, 0,0 }
struct _Hello__UserInfo
{
ProtobufCMessage base;
protobuf_c_boolean has_id;
int32_t id;
protobuf_c_boolean has_age;
int32_t age;
char *name;
protobuf_c_boolean has_raw_data;
ProtobufCBinaryData raw_data;
Hello__NestObj *zilong;
};
#define HELLO__USER_INFO__INIT \
{ PROTOBUF_C_MESSAGE_INIT (&hello__user_info__descriptor) \
, 0,0, 0,0, NULL, 0,{0,NULL}, NULL }
/* Hello__NestObj methods */
void hello__nest_obj__init
(Hello__NestObj *message);
size_t hello__nest_obj__get_packed_size
(const Hello__NestObj *message);
size_t hello__nest_obj__pack
(const Hello__NestObj *message,
uint8_t *out);
size_t hello__nest_obj__pack_to_buffer
(const Hello__NestObj *message,
ProtobufCBuffer *buffer);
Hello__NestObj *
hello__nest_obj__unpack
(ProtobufCAllocator *allocator,
size_t len,
const uint8_t *data);
void hello__nest_obj__free_unpacked
(Hello__NestObj *message,
ProtobufCAllocator *allocator);
/* Hello__UserInfo methods */
void hello__user_info__init
(Hello__UserInfo *message);
size_t hello__user_info__get_packed_size
(const Hello__UserInfo *message);
size_t hello__user_info__pack
(const Hello__UserInfo *message,
uint8_t *out);
size_t hello__user_info__pack_to_buffer
(const Hello__UserInfo *message,
ProtobufCBuffer *buffer);
Hello__UserInfo *
hello__user_info__unpack
(ProtobufCAllocator *allocator,
size_t len,
const uint8_t *data);
void hello__user_info__free_unpacked
(Hello__UserInfo *message,
ProtobufCAllocator *allocator);
/* --- per-message closures --- */
typedef void (*Hello__NestObj_Closure)
(const Hello__NestObj *message,
void *closure_data);
typedef void (*Hello__UserInfo_Closure)
(const Hello__UserInfo *message,
void *closure_data);
/* --- services --- */
/* --- descriptors --- */
extern const ProtobufCMessageDescriptor hello__nest_obj__descriptor;
extern const ProtobufCMessageDescriptor hello__user_info__descriptor;
PROTOBUF_C__END_DECLS
#endif /* PROTOBUF_C_zilong_2eproto__INCLUDED */
测试用例:
编译
protoc-c --c_out=./ zilong.proto
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/caozilong/Workspace/proto-c/install/lib
gcc zilong.pb-c.c main.c -I/home/caozilong/Workspace/proto-c/install/include -L/home/caozilong/Workspace/proto-c/install/lib -lprotobuf-c
运行验证:
不明白为什么只有字符串才会序列化和反序列化成功,其他的数字类型为0,暂时到这里吧,有时间再查。
上面的数字类型输出为0的问题,一次偶然发现了根因,原来上面我编译protoc-c的时候用的是环境的工具,它是通过APT GET 安装的,而GCC连接的的库是另一份protoc-c源码的SDK编译的,所以很可能是因为环境的proto-c和源码环境的protoc-c版本不一致造成的。
知道了原因,解决方法就简单了,用源码编译出的protoc-c重新编译zilong.proto即可
/home/caozilong/Workspace/proto-c/install/bin/protoc-c --c_out=./ zilong.proto
此时我们加重测试用例,将nest_obj也填充上。
#include
#include
#include
#include
#include "zilong.pb-c.h"
static size_t pack_data(unsigned char *out)
{
unsigned char data[256];
memset(data, 0x00, 256);
int i = 0;
for(i = 0; i < 256; i ++)
data[i] = i;
static Hello__UserInfo usr_obj;
static Hello__NestObj nest_obj;
memset(&usr_obj, 0x00, sizeof(Hello__UserInfo));
memset(&nest_obj, 0x00, sizeof(Hello__NestObj));
hello__user_info__init(&usr_obj);
hello__nest_obj__init(&nest_obj);
usr_obj.name = "zilongc";
usr_obj.id = 12;
usr_obj.age = 37;
usr_obj.raw_data.data = data;
usr_obj.raw_data.len = 256;
usr_obj.zilong = &nest_obj;
usr_obj.zilong->zilong1 = 0x5a5a5a5a;
usr_obj.zilong->zilong2 = "zilongcao";
usr_obj.zilong->zilong3 = 0xa5a5a5a5;
return hello__user_info__pack(&usr_obj, out);
}
static size_t unpack_data(size_t len, const unsigned char *data)
{
Hello__UserInfo *tmp;
tmp = hello__user_info__unpack(NULL, len, data);
printf("name %s.\n", tmp->name);
printf("id %d.\n", tmp->id);
printf("age %d.\n", tmp->age);
printf("zlong1 %x.\n", tmp->zilong->zilong1);
printf("zlong2 %s.\n", tmp->zilong->zilong2);
printf("zlong3 %x.\n", tmp->zilong->zilong3);
int i = 0;
for(i = 0; i < tmp->raw_data.len; i ++)
{
printf("[%3d]->%3d ", i, tmp->raw_data.data[i]);
}
printf("\n");
hello__user_info__free_unpacked(tmp, NULL);
return 0;
}
int main(void)
{
unsigned char buff[1024];
memset(buff, 0x00, 1024);
size_t pack_size = pack_data(buff);
printf("%s line %d, packsize %ld.\n", __func__, __LINE__, pack_size);
unpack_data(pack_size, buff);
return 0;
}
重新编译
gcc zilong.pb-c.c main.c -I/home/caozilong/Workspace/proto-c/install/include -L/home/caozilong/Workspace/proto-c/install/lib -lprotobuf-c -static
测试发现,反序列化的数据,每个域都是对的。
至此,我了解了ONNX,PB之类的格式的一些原理。
通过GDB将序列化的数据导出来。
得到序列化后的数据内容
对于嵌套MESSAGE 的处理逻辑,重点关注下面的函数实现,可以看到prefixed_message_pack其实是在protobuf_c_message_pack的环境中运行的,但是在其中又调用了protobuf_c_message_pack,这是一种第归调用,递归的退出依赖于你定义的proto中不存在结构体的循环嵌套定义,这里rv_packed_size是子MESSAGE的长度,由于打包格式是 LEN DATA,所以计算出LEN之后,需要将DATA搬动rv_packed_size大小,目的是为了给LEN域留下空间。
很自然想到,如果MESSAGE的指针为空怎么办,为了避免作无谓的序列化操作,代码中用field_is_zeroish作了检测,当遇到指针为NULL,则推出。
又很自然的想到,如果刻意制造一个循环指向,看protobuf能否处理,还是会被戏耍? 说干就干,修改proto ,在NestObj中增加一个对UserInfo的指向。
代码也很好修改,增加33行的代码即可。
重新生成proto c代码之后,编译程序,运行CRASH,由于我们引入了递归,怀疑是爆栈导致的。
为了确认,我们用GDB抓取CRASH时的现场:
可以看到,死机时候,调用堆栈深达160多层,并且protobuf_c_message_pack被调用了很多次,确认是堆栈溢出导致。
看来谷歌的工程师并为对这种循环检测增加检测处理,这样我们很容易“戏耍” protobuf导致其溢出。