JPEG 在文件中以 Segment 的形式组织,它具有以下特点:
(2)FFFn: APPn (Application,应用程序保留标记)
FFE0: APP0,Application,应用程序保留标记 0
(3) FFDB: DQT,Define Quantization Table,定义量化表
length: 67 byte (2 byte)
第一个FFDB定义第一张量化表
00 43 00 + qt_table(64字节)
第二个字节43表示这一部分长67字节,第三个字节表示索引号0,qt_table长64字节对应量化表的8*8个值
第二个FFDB定义第二章量化表,不同的是上张表的索引号为 00,这张表的索引号为 01,在后面的SOF0 的部分中我们将会知道上张表对应亮度量化表,这张表对应色度量化表,对这张图来说就这两张量化表。
(4)FFC0: SOF0 ,Start of Frame, 基线离散余弦变换
(5) FFC4: DHT,Define Huffman Table,定义 Huffman 树表
以表1为例:
(6) FFDA: 标记代码 SOS,Start of Scan,扫描开始
3.1 读取文件
3.2 解析 Segment Marker
3.2.1 解析 SOI
3.2.2 解析 APP0
检查标识“JFIF”及版本
得到一些参数
3.2.3 解析 DQT
得到量化表长度(可能包含多张量化表)
得到量化表的精度
得到及检查量化表的序号(只能是 0 —— 3) 得到量化表内容(64 个数据)
3.2.4 解析 SOF0
得到每个 sample 的比特数、长宽、颜色分量数
得到每个颜色分量的 ID、水平采样因子、垂直采样因子、使用的量化表
序号(与 DQT 中序号对应)
3.2.5 解析 DHT
得到 Huffman 表的类型(AC、DC)、序号
依据数据重建 Huffman 表
3.2.6 解析 SOS
得到解析每个颜色分量的 DC、AC 值所使用的 Huffman 表序号(与 DHT
中序号对应)
3.3 依据每个分量的水平、垂直采样因子计算 MCU 的大小,并得到每个 MCU 中 8*8
宏块的个数
3.4 对每个 MCU 解码(依照各分量水平、垂直采样因子对 MCU 中每个分量宏块解
码)
3.4.1 对每个宏块进行 Huffman 解码,得到 DCT 系数
3.4.2 对每个宏块的 DCT 系数进行 IDCT,得到 Y、Cb、Cr
3.4.3 遇到 Segment Marker RST 时,清空之前的 DC DCT 系数
3.5 解析到 EOI,解码结束
3.6 将 Y、Cb、Cr 转化为需要的色彩空间并保存。
该系统依照分层思想进行设计,为了更好的描述系统中各模块的关系,在tinyjpeg-internal.h文件中定义如下3个结构体:
struct huffman_table//Huffman表结构体
{
/* Fast look up table, using HUFFMAN_HASH_NBITS bits we can have directly the symbol,
* if the symbol is <0, then we need to look into the tree table */
short int lookup[HUFFMAN_HASH_SIZE];//查找权值对应的码字
/* code size: give the number of bits of a symbol is encoded */
unsigned char code_size[HUFFMAN_HASH_SIZE];//查找字符对应的权值
/* some place to store value that is not encoded in the lookup table
* FIXME: Calculate if 256 value is enough to store all values
*/
uint16_t slowtable[16-HUFFMAN_HASH_NBITS][256];
};
struct component //宏块结构体
{
unsigned int Hfactor;//水平采样因子
unsigned int Vfactor;//垂直采样因子
float *Q_table; /* Pointer to the quantisation table to use */
struct huffman_table *AC_table;//指向直流系数对应的Huffman表
struct huffman_table *DC_table;//指向交流系数对应的Huffman表
short int previous_DC; /* Previous DC coefficient */
short int DCT[64]; /* DCT coef */
#if SANITY_CHECK
unsigned int cid;
#endif
};
struct jdec_private//解码信息结构体
{
/* Public variables */
uint8_t *components[COMPONENTS];
unsigned int width, height; /* Size of the image */
unsigned int flags;
/* Private variables */
const unsigned char *stream_begin, *stream_end;
unsigned int stream_length;
const unsigned char *stream; /* Pointer to the current stream */
unsigned int reservoir, nbits_in_reservoir;
struct component component_infos[COMPONENTS];
float Q_tables[COMPONENTS][64]; /* quantization tables */
struct huffman_table HTDC[HUFFMAN_TABLES]; /* DC huffman tables */
struct huffman_table HTAC[HUFFMAN_TABLES]; /* AC huffman tables */
int default_huffman_table_initialized;
int restart_interval;
int restarts_to_go; /* MCUs left in this restart interval */
int last_rst_marker_seen; /* Rst marker is incremented each time */
/* Temp space used after the IDCT to store each components */
uint8_t Y[64*4], Cr[64], Cb[64];
jmp_buf jump_state;
/* Internal Pointer use for colorspace conversion, do not modify it !!! */
uint8_t *plane[COMPONENTS];
/*add by yangyulan for DCimage and ACimage*/
int *dcimg, *acimg;
unsigned char *dcimg_ch, *acimg_ch;
/*end by yangyulan for DCimage and ACimage*/
};
trace类似于一个开关,用于追踪代码的执行流程,当trace=1,可以输出程序执行的中间结果。程序执行完毕后,可以在trace_jpeg.txt中看到图片的信息。
#if TRACE//TRACE=1,则编译中间代码
p_trace=fopen(TRACEFILE,"w");
if (p_trace==NULL)
{
printf("trace file open error!");
}
#endif
//解析量化表
static int parse_DQT(struct jdec_private *priv, const unsigned char *stream)
{
int qi;
float *table;//定义指向量化表的指针
const unsigned char *dqt_block_end;
FILE *DQTfile;
#if TRACE
fprintf(p_trace,"> DQT marker\n");
fflush(p_trace);
#endif
dqt_block_end = stream + be16_to_cpu(stream); // 得到量化表长度(可能
包含多张量化表)
stream += 2; /* Skip length */
while (stream < dqt_block_end) // 检查是否还有表
{
qi = *stream++;
if (qi>>4)
snprintf(error_string, sizeof(error_string),"16 bits quantization table
is not supported\n"); // 得到量化表的精度(高四位)
if (qi>4)
snprintf(error_string, sizeof(error_string),"No more 4 quantization
table is supported (got %d)\n", qi); // 得到量化表序号(低四位)
table = priv->Q_tables[qi];
build_quantization_table(table, stream); // 得到量化表内容
stream += 64;
}
static void build_quantization_table(float *qtable, const unsigned char
*ref_table)
{
int i, j;
//比例因子
static const double aanscalefactor[8] = {
1.0, 1.387039845, 1.306562965, 1.175875602,
1.0, 0.785694958, 0.541196100, 0.275899379
};
const unsigned char *zz = zigzag;
for (i=0; i<8; i++) {
for (j=0; j<8; j++) {
//之字扫描重建量化表
*qtable++ = ref_table[*zz++] * aanscalefactor[i] * aanscalefactor[j]; //
以 zig-zag 序存储
}
} }
static const unsigned char zigzag[64] =
{ // zig-zag 排序
0, 1, 5, 6, 14, 15, 27, 28,
2, 4, 7, 13, 16, 26, 29, 42,
3, 8, 12, 17, 25, 30, 41, 43,
9, 11, 18, 24, 31, 40, 44, 53,
10, 19, 23, 32, 39, 45, 52, 54,
20, 22, 33, 38, 46, 51, 55, 60,
21, 34, 37, 47, 50, 56, 59, 61,
35, 36, 48, 49, 57, 58, 62, 63
};
解析 SOF0
height = be16_to_cpu(stream+3); // 得到当前图片的高度
width = be16_to_cpu(stream+5); // 得到当前图片的宽度
nr_components = stream[7]; // 得到颜色分量数(Y、Cb、Cr,共计 3)
stream += 8;
for (i=0; i<nr_components; i++) { // 得到每个分量的 ID 等
cid = *stream++; // 该分量的 ID
sampling_factor = *stream++; // 该分量的水平、垂直采样因子
Q_table = *stream++; // 该分量使用的量化表序号
c = &priv->component_infos[i];
c->cid = cid;
if (Q_table >= COMPONENTS)
snprintf(error_string, sizeof(error_string),"Bad Quantization table
index (got %d, max allowed %d)\n", Q_table, COMPONENTS-1);
c->Vfactor = sampling_factor&0xf; // 垂直采样因子(低四位)
c->Hfactor = sampling_factor>>4; // 水平采样因子(高四位)
c->Q_table = priv->Q_tables[Q_table];
}
priv->width = width;
priv->height = height;
//解析huffman表
static int parse_DHT(struct jdec_private *priv, const unsigned char *stream)
{
unsigned int count, i;
unsigned char huff_bits[17];//数组中有16字节,分别存储长度1至16的码字的个数
int length, index;
//**********************************************************
FILE *HuffFile;
HuffFile = fopen("huffmanFile.txt", "a");
//**********************************************************
length = be16_to_cpu(stream) - 2;
stream += 2; /* Skip length */
#if TRACE
fprintf(p_trace,"> DHT marker (length=%d)\n", length);
fflush(p_trace);
#endif
while (length>0) {
index = *stream++;
/* We need to calculate the number of bytes 'vals' will takes */
huff_bits[0] = 0;
count = 0;
for (i=1; i<17; i++) {
huff_bits[i] = *stream++;//分别写码长1到16的码字的个数
count += huff_bits[i];//总码字数也就是huffman表长度
}
#if SANITY_CHECK
if (count >= HUFFMAN_BITS_SIZE)
snprintf(error_string, sizeof(error_string),"No more than %d bytes is allowed to describe a huffman table", HUFFMAN_BITS_SIZE);
if ( (index &0xf) >= HUFFMAN_TABLES)
snprintf(error_string, sizeof(error_string),"No more than %d Huffman tables is supported (got %d)\n", HUFFMAN_TABLES, index&0xf);
#if TRACE
fprintf(p_trace,"Huffman table %s[%d] length=%d\n", (index&0xf0)?"AC":"DC", index&0xf, count);//index低位代表是几号表
fflush(p_trace);
//*****************************************************************************************************
fprintf(HuffFile, "Huffman table %s[%d] length=%d\r\n", (index & 0xf0) ? "AC" : "DC", index & 0xf, count);
fflush(HuffFile);
//*****************************************************************************************************
#endif
#endif
//重建huffman表
if (index & 0xf0 )//index高位为1则是AC表
build_huffman_table(huff_bits, stream, &priv->HTAC[index&0xf]);
else//高位为0则是DC表
build_huffman_table(huff_bits, stream, &priv->HTDC[index&0xf]);
length -= 1;
length -= 16;
length -= count;
stream += count;
}
#if TRACE
fprintf(p_trace,"< DHT marker\n");
fflush(p_trace);
#endif
return 0;
}
//扫描开始 内有颜色分量信息,每个分量3字节
static int parse_SOS(struct jdec_private *priv, const unsigned char *stream)
{
unsigned int i, cid, table;
unsigned int nr_components = stream[2];//颜色分量数
#if TRACE
fprintf(p_trace,"> SOS marker\n");
fflush(p_trace);
#endif
#if SANITY_CHECK
if (nr_components != 3)
snprintf(error_string, sizeof(error_string),"We only support YCbCr image\n");
#endif
stream += 3;
for (i=0;i<nr_components;i++) {//遍历所有分量
cid = *stream++;//取第一个字节也就是颜色分量ID
table = *stream++;//取第二个字节也就是Huffman表号
#if SANITY_CHECK
if ((table&0xf)>=4)
snprintf(error_string, sizeof(error_string),"We do not support more than 2 AC Huffman table\n");
if ((table>>4)>=4)
snprintf(error_string, sizeof(error_string),"We do not support more than 2 DC Huffman table\n");
if (cid != priv->component_infos[i].cid)
snprintf(error_string, sizeof(error_string),"SOS cid order (%d:%d) isn't compatible with the SOF marker (%d:%d)\n",
i, cid, i, priv->component_infos[i].cid);
#if TRACE
fprintf(p_trace,"ComponentId:%d tableAC:%d tableDC:%d\n", cid, table&0xf, table>>4);
fflush(p_trace);
#endif
#endif
priv->component_infos[i].AC_table = &priv->HTAC[table&0xf];//取table低4位,代表颜色分量AC值使用的Huffman表序号
priv->component_infos[i].DC_table = &priv->HTDC[table>>4];//取table高4位,代表颜色分量DC值使用的Huffman表序号
}
priv->stream = stream+3;
#if TRACE
fprintf(p_trace,"< SOS marker\n");
fflush(p_trace);
#endif
return 0;
}
依据每个分量的水平、垂直采样因子计算 MCU 的大小,并得到每个 MCU 中 8*8
宏块的个数
xstride_by_mcu = ystride_by_mcu = 8;//初始化为MCU宽高均为8 即444的情况
if ((priv->component_infos[cY].Hfactor | priv->component_infos[cY].Vfactor) == 1) {//如果y分量垂直和水平采样因子都是1
decode_MCU = decode_mcu_table[0];//那么每个MCU包括1个Y分量
convert_to_pixfmt = colorspace_array_conv[0];
#if TRACE
fprintf(p_trace,"Use decode 1x1 sampling\n");
fflush(p_trace);
#endif
} else if (priv->component_infos[cY].Hfactor == 1) {//如果y分量水平采样因子为1,垂直与他不相等,为2
decode_MCU = decode_mcu_table[1];//每个MCU包括2个Y分量
convert_to_pixfmt = colorspace_array_conv[1];
ystride_by_mcu = 16;//MCU大小 8*16
#if TRACE
fprintf(p_trace,"Use decode 1x2 sampling (not supported)\n");
fflush(p_trace);
#endif
} else if (priv->component_infos[cY].Vfactor == 2) {//如果水平为2,垂直为2
decode_MCU = decode_mcu_table[3];//每个MCU包括4个Y分量
convert_to_pixfmt = colorspace_array_conv[3];
xstride_by_mcu = 16;
ystride_by_mcu = 16;//MCU大小 16*16
#if TRACE
fprintf(p_trace,"Use decode 2x2 sampling\n");
fflush(p_trace);
#endif
} else {//如果水平为2,垂直为1
decode_MCU = decode_mcu_table[2];//每个MCU包括2个Y分量
convert_to_pixfmt = colorspace_array_conv[2];
xstride_by_mcu = 16;//MCU大小 16*8
#if TRACE
fprintf(p_trace,"Use decode 2x1 sampling\n");
fflush(p_trace);
#endif
}
对每个 MCU 解码(依照各分量水平、垂直采样因子对 MCU 中每个分量宏块解
码)
例如:
//以MCU为单位解码
static void decode_MCU_1x1_3planes(struct jdec_private *priv)//4:4:4
{
// Y
process_Huffman_data_unit(priv, cY);//以8*8宏块为单位huffman解码
IDCT(&priv->component_infos[cY], priv->Y, 8);//对得到的DCT系数进行IDCT
// Cb
process_Huffman_data_unit(priv, cCb);
IDCT(&priv->component_infos[cCb], priv->Cb, 8);
// Cr
process_Huffman_data_unit(priv, cCr);
IDCT(&priv->component_infos[cCr], priv->Cr, 8);
}
//以8*8宏块为单位Huffman解码
static void process_Huffman_data_unit(struct jdec_private *priv, int component)
{
unsigned char j;
unsigned int huff_code;
unsigned char size_val, count_0;
struct component *c = &priv->component_infos[component];
short int DCT[64];
/* Initialize the DCT coef table */
memset(DCT, 0, sizeof(DCT));
/* DC coefficient decoding */
huff_code = get_next_huffman_code(priv, c->DC_table);
//trace("+ %x\n", huff_code);
if (huff_code) {
get_nbits(priv->reservoir, priv->nbits_in_reservoir, priv->stream, huff_code, DCT[0]);//查表的DC DCT系数(残值)
DCT[0] += c->previous_DC;//DC系数采用差分编码,恢复原值
c->previous_DC = DCT[0];
} else {
DCT[0] = c->previous_DC;
}
/* AC coefficient decoding */
j = 1;
while (j<64)
{
huff_code = get_next_huffman_code(priv, c->AC_table);
//trace("- %x\n", huff_code);
size_val = huff_code & 0xF;//amplitude 幅值
count_0 = huff_code >> 4;//零游程长度
if (size_val == 0)
{ /* RLE */
if (count_0 == 0)
break; /* EOB found, go out */
else if (count_0 == 0xF)
j += 16; /* skip 16 zeros */
}
else
{
j += count_0; /* skip count_0 zeroes */
if (__unlikely(j >= 64))
{
snprintf(error_string, sizeof(error_string), "Bad huffman data (buffer overflow)");
break;
}
get_nbits(priv->reservoir, priv->nbits_in_reservoir, priv->stream, size_val, DCT[j]);
j++;
}
}
for (j = 0; j < 64; j++)
c->DCT[j] = DCT[zigzag[j]];//以zig-zag序保存
}
在parse_DQT函数最后添加以下代码,输出量化表:
//************************************************
DQTfile = fopen("DQTfile.txt", "a");
fputs("量化表\n", DQTfile);
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < 8; j++)
{
fprintf(DQTfile, "%f ", *table);
table++;
}
fputs("\n", DQTfile);
}
//************************************************
量化表:
修改函数parse_DHT中的trace,得到Huffman表:
#if TRACE
fprintf(p_trace,"Huffman table %s[%d] length=%d\n", (index&0xf0)?"AC":"DC", index&0xf, count);//index低位代表是几号表
fflush(p_trace);
//*****************************************************************************************************
fprintf(HuffFile, "Huffman table %s[%d] length=%d\r\n", (index & 0xf0) ? "AC" : "DC", index & 0xf, count);
fflush(HuffFile);
//*****************************************************************************************************
#endif
在函数tinyjpeg_decode中声明文件指针DC_File和AC_File.
//==================================================================
FILE *DC_File = fopen("DCimage.yuv", "wb");
FILE *AC_File = fopen("ACimage.yuv", "wb");
//================================================================
在函数末尾添加如下代码,注意:依据DCT能量守恒定理,在IDFT后DC取值最大达到256*8,因此需要对DC系数做归一化处理,使其范围限制在[0,255],AC系数同理。
//--------------------------------------------------------------------------------------
unsigned char DCimg = 0;
unsigned char ACimg = 0;
//读出DC AC系数并做归一化处理
DCimg = (unsigned char)((priv->component_infos->DCT[0] + 512.0) / 4 + 0.5);
ACimg = (unsigned char)(priv->component_infos->DCT[1] + 128);
//写文件
fwrite(&DCimg, 1, 1, DC_File);
fwrite(&ACimg, 1, 1, AC_File);
//-----------------------------------------------------------------------------------------