做tcpip协议栈数据包重组时遇到一些编码问题,主要是chunk和gzip编码。
先看chunk:
RFC2616中对Chunked的定义:
Chunked-Body = *chunk
last-chunk
trailer
CRLF
chunk = chunk-size [ chunk-extension ] CRLF
chunk-data CRLF
chunk-size = 1*HEX
last-chunk = 1*("0") [ chunk-extension ] CRLF
chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
chunk-ext-name = token
chunk-ext-val = token | quoted-string
chunk-data = chunk-size(OCTET)
trailer = *(entity-header CRLF)
以下是解码过程的伪代码:
length := 0//用来记录解码后的数据体长度
read chunk-size, chunk-extension (if any) and CRLF//第一次读取块大小
while (chunk-size > 0) {//一直循环,直到读取的块大小为0
read chunk-data and CRLF//读取块数据体,以回车结束
append chunk-data to entity-body//添加块数据体到解码后实体数据
length := length + chunk-size//更新解码后的实体长度
read chunk-size and CRLF//读取新的块大小
}
read entity-header//以下代码读取全部的头标记
while (entity-header not empty) {
append entity-header to existing header fields
read entity-header
}
Content-Length := length//头标记中添加内容长度
Remove "chunked" from Transfer-Encoding//头标记中移除Transfer-Encoding
伪码的逻辑有点混乱,研究了下,自己写了C语言的解码代码:
//////////////////////////////////////
char * unchunk (char *filename)
{
char cmdbuf[1024];
/*if (strstr (filename, ".trunk") == 0)
{
strcat (filename, ".trunk");
memset (cmdbuf, 0x0, sizeof (tmpfile));
sprintf (cmdbuf, "mv %s %s", chunkfile, filename);
system (cmdbuf);
}*/
FILE *fp = fopen (filename, "ab+");
char newfile[128];
memset (newfile, 0x0, sizeof (tmpfile));
strcpy (newfile, filename);
char *ptr = strstr(newfile, ".trunk");
*ptr = 0;
printf ("%s/n", newfile);
FILE *fp_unchunk = fopen (newfile, "wb+");
char chunk_head[8];
memset (chunk_head, 0x0, sizeof (chunk_head));
fgets (chunk_head, sizeof (chunk_head), fp);
char *p = strstr (chunk_head, "/r/n");
if (p)
{
int chunk_size = strtol (chunk_head, NULL, 16);
char *chunk_data;
while (chunk_size > 0)
{
chunk_data = (char *)malloc (chunk_size);
memset (chunk_data, 0x0, chunk_size);
fread (chunk_data, chunk_size, 1, fp);
fwrite (chunk_data, chunk_size, 1, fp_unchunk);
fseek (fp, 2, SEEK_CUR);
//reread chunk head
memset (chunk_head, 0x0, sizeof (chunk_head));
fgets (chunk_head, sizeof (chunk_head), fp);
char *p = strstr (chunk_head, "/r/n");
if (p)
{
chunk_size = strtol (chunk_head, NULL, 16);
free (chunk_data);
}
else
break;
}
//remove old file
memset (cmdbuf, 0x0, sizeof (cmdbuf));
sprintf (cmdbuf, "rm %s", filename);
system (cmdbuf);
fclose (fp_unchunk);
fclose (fp);
return newfile;
}
else
{
fclose (fp_unchunk);
fclose (fp);
return filename;
}
}
//////////////////////////////////////
接下来看gzip的解码
gzip的解码相对更加简单一些,两种方法实现:
一种是直接调用系统gzip命令解压,没有技术含量;
另一种是使用zlib库,通用性更高,但是要使用zlib库,开发过程稍复杂,下面给出gzip文件的解压C代码:
/////////////////////////////////
//调用系统gzip命令的代码(无难度)
void ungzip (char *filename)
{
char cmdbuf[1024];
if (strstr (filename, ".gz") == 0)
{
memset (cmdbuf, 0x0, sizeof (cmdbuf));
sprintf (cmdbuf, "mv %s %s.gz", filename, filename);
system (cmdbuf);
}
memset (cmdbuf, 0x0, sizeof (cmdbuf));
sprintf (cmdbuf, "gzip -d %s", filename);
system (cmdbuf);
}
// 使用zlib库的代码
#include "zlib/zlib.h"
void uncompresstorrent(char *src, char *dst)
{
gzFile *gzfp=gzopen(src,"rb");
FILE *fp=fopen(dst,"wb");
char in[CHUNK];
int retlen = -1;
while(0 != (retlen=gzread(gzfp,in,CHUNK)))
{
fwrite(in,1,retlen,fp);
}
gzclose(gzfp);
fclose(fp);
}
(编译是需要带 -Lzlib -lz 参数)
/////////////////////////////////