早之前就用过Google的Protobuf做数据编码,一直没有深入理解其中的原理,最近做了一次通讯抓包,发现其中很多Protobuf编码的数据包,于是决定分析一下其中的数据包及其编码。
首先来简单介绍一下Protobuf的使用,这里以windows下java开发为例,几个步骤:编写*.proto ->使用google提供的protoc.exe生成*.java->项目中导入protobuf的.jar包进行开发即可。先看这里的*.proto文件:
package com;
message CMsg
{
required string msghead = 1;
required string msgbody = 2;
}
message CMsgHead
{
required int32 msglen = 1;
required int32 msgtype = 2;
required int32 msgseq = 3;
required int32 termversion = 4;
required int32 msgres = 5;
required string termid = 6;
}
message CMsgReg
{
optional int32 area = 1;
optional int32 region = 2;
optional int32 shop = 3;
optional int32 ret = 4;
optional string termid = 5;
}
使用protoc.exe生成java文件,命令如下:
将生成的Msg.java及protobuf-java-2.3.0.jar导入项目中进行开发,这里写一个服务器端ProtobufServer及客户端ProtobufClient
package com;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.ServerSocket;
import java.net.Socket;
import com.Msg.CMsg;
import com.Msg.CMsgHead;
import com.Msg.CMsgReg;
public class ProtoServer implements Runnable {
@Override
public void run() {
try {
System.out.println("beign:");
ServerSocket serverSocket = new ServerSocket(12345);
while (true) {
System.out.println("等待接收用户连接:");
// 接受客户端请求
Socket client = serverSocket.accept();
DataOutputStream dataOutputStream;
DataInputStream dataInputStream;
try {
InputStream inputstream = client.getInputStream();
dataOutputStream = new DataOutputStream(
client.getOutputStream());
byte len[] = new byte[1024];
int count = inputstream.read(len);
byte[] temp = new byte[count];
for (int i = 0; i < count; i++) {
temp[i] = len[i];
}
CMsg msg = CMsg.parseFrom(temp);
CMsgHead head = CMsgHead.parseFrom(msg.getMsghead()
.getBytes());
System.out.println("==len===" + head.getMsglen());
System.out.println("==res===" + head.getMsgres());
System.out.println("==seq===" + head.getMsgseq());
System.out.println("==type===" + head.getMsgtype());
System.out.println("==Termid===" + head.getTermid());
System.out.println("==Termversion==="
+ head.getTermversion());
CMsgReg body = CMsgReg.parseFrom(msg.getMsgbody()
.getBytes());
System.out.println("==area==" + body.getArea());
System.out.println("==Region==" + body.getRegion());
System.out.println("==shop==" + body.getShop());
sendProtoBufBack(dataOutputStream);
inputstream.close();
} catch (Exception ex) {
System.out.println(ex.getMessage());
ex.printStackTrace();
} finally {
client.close();
System.out.println("close");
}
}
} catch (IOException e) {
System.out.println(e.getMessage());
}
}
private byte[] getProtoBufBack() {
// head
CMsgHead head = CMsgHead.newBuilder().setMsglen(10).setMsgtype(21)
.setMsgseq(32).setTermversion(43).setMsgres(54)
.setTermid("Server:head").build();
// body
CMsgReg body = CMsgReg.newBuilder().setArea(11).setRegion(22)
.setShop(33).setRet(44).setTermid("Server:body").build();
// Msg
CMsg msg = CMsg.newBuilder()
.setMsghead(head.toByteString().toStringUtf8())
.setMsgbody(body.toByteString().toStringUtf8()).build();
return msg.toByteArray();
}
private void sendProtoBufBack(DataOutputStream dataOutputStream) {
byte[] backBytes = getProtoBufBack();
// Integer len2 = backBytes.length;
// byte[] cmdHead2 = BytesUtil.IntToBytes4(len2);
try {
// dataOutputStream.write(cmdHead2, 0, cmdHead2.length);
dataOutputStream.write(backBytes, 0, backBytes.length);
dataOutputStream.flush();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
Thread desktopServerThread = new Thread(new ProtoServer());
desktopServerThread.start();
}
}
package com;
import java.io.InputStream;
import java.net.Socket;
import com.Msg.CMsg;
import com.Msg.CMsgHead;
import com.Msg.CMsgReg;
public class ProtoClient {
public static void main(String[] args) {
ProtoClient pc=new ProtoClient();
System.out.println("beign:");
pc.runget();
}
public void runget() {
Socket socket = null;
try {
//socket = new Socket("localhost", 12345);
socket = new Socket("192.168.85.152", 12345);
// head
CMsgHead head = CMsgHead.newBuilder().setMsglen(5).setMsgtype(1)
.setMsgseq(3).setTermversion(41).setMsgres(5)
.setTermid("Client:head").build();
// body
CMsgReg body = CMsgReg.newBuilder().setArea(11).setRegion(22)
.setShop(33).setRet(44).setTermid("Clent:body").build();
// Msg
CMsg msg = CMsg.newBuilder()
.setMsghead(head.toByteString().toStringUtf8())
.setMsgbody(body.toByteString().toStringUtf8()).build();
// 向服务器发送信息
System.out.println("sendMsg...");
msg.writeTo(socket.getOutputStream());
// 接受服务器的信息
InputStream input = socket.getInputStream();
System.out.println("recvMsg:");
byte[] by = recvMsg(input);
printMsg(CMsg.parseFrom(by));
input.close();
socket.close();
} catch (Exception e) {
System.out.println(e.toString());
}
}
public void printMsg(CMsg g) {
try {
CMsgHead h = CMsgHead.parseFrom(g.getMsghead().getBytes());
StringBuffer sb = new StringBuffer();
if (h.hasMsglen())
sb.append("==msglen===" + h.getMsglen() + "\n");
if (h.hasMsgres())
sb.append("==msgres===" + h.getMsgres() + "\n");
if (h.hasMsgseq())
sb.append("==msgseq===" + h.getMsgseq() + "\n");
if (h.hasMsgtype())
sb.append("==msgtype===" + h.getMsgtype() + "\n");
if (h.hasTermid())
sb.append("==termid===" + h.getTermid() + "\n");
if (h.hasTermversion())
sb.append("==termversion===" + h.getTermversion() + "\n");
CMsgReg bo = CMsgReg.parseFrom(g.getMsgbody().getBytes());
if (bo.hasArea())
sb.append("==area==" + bo.getArea() + "\n");
if (bo.hasRegion())
sb.append("==region==" + bo.getRegion() + "\n");
if (bo.hasShop())
sb.append("==shop==" + bo.getShop() + "\n");
if (bo.hasRet())
sb.append("==ret==" + bo.getRet() + "\n");
if (bo.hasTermid())
sb.append("==termid==" + bo.getTermid() + "\n");
System.out.println(sb.toString());
} catch (Exception e) {
e.printStackTrace();
}
}
public byte[] recvMsg(InputStream inpustream) {
byte[] temp = null;
try {
byte len[] = new byte[1024];
int count = inpustream.read(len);
temp = new byte[count];
for (int i = 0; i < count; i++) {
temp[i] = len[i];
}
return temp;
} catch (Exception e) {
System.out.println(e.toString());
return temp;
}
}
}
运行结果:
在上面socket通信过程中我使用了wireshark对其进行抓包,结果分析如下图
由上图我们可以很清楚的看到,protobuf编码其实类似tlv(tag length value)编码,其内部就是(tag, length, value)的组合,其中tag由(field_number<<3)|wire_type计算得出,field_number由我们在proto文件中定义,wire_type由protobuf根据proto中定义的字段类型决定,length长度采用一种叫做Varint 的数字表示方法,它是一种紧凑的表示数字的方法,用一个或多个字节来表示一个数字,值越小的数字使用越少的字节数,具体细节可以谷歌Varint。总之Protobuf 序列化后所生成的二进制消息非常紧凑,这得益于 Protobuf 采用了上面的 Encoding 方法。
参考文献:http://www.ibm.com/developerworks/cn/linux/l-cn-gpb/
源码下载:http://download.csdn.net/detail/wangqiuyun/8294015
转载请注明:http://blog.csdn.net/wangqiuyun/article/details/42119835