1、背景需求
通信协议设计,考虑了后续跨语言的支持(如Java、Python、C),需求一种序列化、反序列化的库
2、相关知识
Google Protocol BUffer 提供了一种适用于RPC系统、持续数据存储系统的混合语言数据标准,可用于通信协议、数据存储等领域的语言无关、平台无关、可扩展的序列化结构数据格式。目前有C++、C、Java、Python三种语言的API。
2.1 protobuf的优点
1、通过数据结构的定义,能够生成结构相关的接口代码;
2、兼容性好,支持对现有数据结构添加新成员;
3、协议文本字段自动压缩,使用二进制传输;
详细的介绍可见参考文献[1],附上一张序列化技术相关的性能对比图:
关于proto2还是proto3的选择可参考文献[4],考虑兼容性的问题,该文章采用的proto2方式定义[5];
2.3 安装
对于C语言的使用的方法,则需要安装 protobuf、protobuf-c两个安装包来实现(本文用的 protobuf-all-3.5.1.tar.gz、protobuf-c-1.3.0.tar.gz )。
安装方式:./configure && make && make install
使用 proto2 语法定义一个用户结构 user.proto,包含协议号(默认0x010000)、魔数(默认0xfb709394)、用户名、电话、状态、邮箱(可选)信息;
syntax = "proto2"; option optimize_for = SPEED; message User { required uint32 version = 1 [ default = 0x010000 ]; required uint32 magic = 2 [ default = 0xfb709394 ]; required string name = 3; required string phone = 4; enum Status { IDLE = 1; BUSY = 2; }; required Status stat = 5 [ default = IDLE ]; optional string email = 6; } 执行命令 ``protoc-c --c_out=. user.proto`` 后将生成user.pb-c.c、user.pb-c.h两个文件,编译的时候需要加上 -lprotobuf-c 选项。 简单看下生成的文件接口,主要就是: 结构体: struct _User 相关接口:user__init、user__pack、user__unpack、user__free_unpacked /* Generated by the protocol buffer compiler. DO NOT EDIT! */ /* Generated from: user.proto */ #ifndef PROTOBUF_C_user_2eproto__INCLUDED #define PROTOBUF_C_user_2eproto__INCLUDED #includePROTOBUF_C__BEGIN_DECLS #if PROTOBUF_C_VERSION_NUMBER < 1000000 # error This file was generated by a newer version of protoc-c which is incompatible with your libprotobuf-c headers. Please update your headers. #elif 1003000 < PROTOBUF_C_MIN_COMPILER_VERSION # error This file was generated by an older version of protoc-c which is incompatible with your libprotobuf-c headers. Please regenerate this file with a newer version of protoc-c. #endif typedef struct _User User; /* --- enums --- */ typedef enum _User__Status { USER__STATUS__IDLE = 1, USER__STATUS__BUSY = 2 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(USER__STATUS) } User__Status; /* --- messages --- */ struct _User { ProtobufCMessage base; uint32_t version; uint32_t magic; char *name; char *phone; User__Status stat; char *email; }; #define USER__INIT \ { PROTOBUF_C_MESSAGE_INIT (&user__descriptor) \ , 65536u, 4218459028u, NULL, NULL, USER__STATUS__IDLE, NULL } /* User methods */ void user__init (User *message); size_t user__get_packed_size (const User *message); size_t user__pack (const User *message, uint8_t *out); size_t user__pack_to_buffer (const User *message, ProtobufCBuffer *buffer); User * user__unpack (ProtobufCAllocator *allocator, size_t len, const uint8_t *data); void user__free_unpacked (User *message, ProtobufCAllocator *allocator); /* --- per-message closures --- */ typedef void (*User_Closure) (const User *message, void *closure_data); /* --- services --- */ /* --- descriptors --- */ extern const ProtobufCMessageDescriptor user__descriptor; extern const ProtobufCEnumDescriptor user__status__descriptor; PROTOBUF_C__END_DECLS #endif /* PROTOBUF_C_user_2eproto__INCLUDED */ 使用实例,这里相对就方便多了,序列化:
static size_t __do_pack(u8 *buffer)
{
User user;
user__init(&user);
user.name = "zhangsan";
user.phone = "010-1234-5678";
user.email = "[email protected]";
user.stat = USER__STATUS__IDLE;
return user__pack(&user, buffer);
}
反序列化,注意 xx_unpack 接口是会申请空间后返回指针出来,使用完成后需调用 xx__free_unpacked 进行释放:
static int __do_unpack(const u8 *buffer, size_t len)
{
User *pusr = user__unpack(NULL, len, buffer);
if (!pusr) {
printf("user__unpack failed\n");
return FAILURE;
}
assert(pusr->magic == MAGIC);
assert(pusr->version == VERSION);
printf("Unpack: %s %s %s\n", pusr->name, pusr->phone, pusr->email);
user__free_unpacked(pusr, NULL);
return SUCCESS;
}
int main(int argc, char *argv[])
{
u8 buffer[1024] = {0};
size_t size = __do_pack(buffer);
printf("Packet size: %zd\n", size);
__do_unpack(buffer, size);
exit(EXIT_SUCCESS);
}
执行结果为:
Packet size: 55
Unpack: zhangsan 010-1234-5678 [email protected]
使用gdb打印序列化后的buffer内容
(gdb) b __do_pack
Breakpoint 1 at 0x4008f4: file user.cc, line 24.
(gdb) r
Starting program: /home/liujinfeng/git/filesync/tests/test_redis/user
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/libthread_db.so.1".
Breakpoint 1, __do_pack (buffer=0x7fffffffe230 "") at user.cc:24
24 user__init(&user);
(gdb) finish
Run till exit from #0 __do_pack (buffer=0x7fffffffe230 "") at user.cc:24
0x0000000000400a38 in main (argc=1, argv=0x7fffffffe728) at user.cc:53
53 size_t size = __do_pack(buffer);
Value returned is $1 = 55
(gdb) p buffer
$2 = "\b\200\200\004\020\224\247\302\333\017\032\bzhangsan\"\r010-1234-5678(\001\062\[email protected]", '\000'
发现数值部分会进行压缩处理(Zigzag),但字符串那块并没有进行压缩。
然后对size进行改变,内部调用unpack会返回NULL,这块需要注意一下。
(gdb) p __do_unpack(buffer, 30)
user__unpack failed
$6 = -1
(gdb) p __do_unpack(buffer, 60)
user__unpack failed
$7 = -1
4、结论
protobuf的哲学在于定义结构标准,使用工具生成代码接口,达到跨语言的目的;
协议内容那块,对于数字组合能有效进行压缩,但字符串方面不处理,可以考虑结合libz进行压缩处理;
参考文章:
[1] https://www.ibm.com/developerworks/cn/linux/l-cn-gpb/index.html
[2] https://code.google.com/archive/p/thrift-protobuf-compare/wikis/Benchmarking.wiki
[3] https://blog.csdn.net/kid_2412/article/details/52502567
[4] https://solicomo.com/network-dev/protobuf-proto3-vs-proto2.html
[5] https://blog.csdn.net/zhaozheng7758/article/details/6749047