JPEG编解码原理与转换

JPEG是一种常用的有损编码方法,具体编码过程如下:
JPEG编解码原理与转换_第1张图片
具体思路是这样的:
1、将输入图片先做一个零偏置电平下移,也就是将数值除以2,将原数值转换为有符号数,将值域往下做搬移,这样做的目的是提高编码效率。

2、做88的DCT变换。也就是将整个图片,分成由多个88共64个像素的宏块所组成,那么对于图片宽高不为8的倍数的图片,做边缘填充处理。分为8*8的宏块后,以宏块为单位,对其做DCT变换,所谓DCT变换实际上是一种无损的变换方式,变换后,矩阵左上角的值表示该宏块的直流DC成分,其余为交流AC成分,并且趋势由左上角到右下角为低频到高频的转变,这么做的好处是将一个区域中复杂的细节关系做了一个有规律的梳理,便于后续的处理与分析。
JPEG编解码原理与转换_第2张图片

3、做过DCT变换后,由图可以观察到,从左上角到右下角细节越来越明显。而事实上,人眼对于图片的高频区细节并不敏感,也就是说当我们对于DCT变换后的图像做量化时,我们可以将高频区不敏感部分的量化步长设置的较大,将低频区较敏感的量化步长设置的相对较小,也就是所谓的低频细量化高频粗量化。
4、针对DC参数编码。事实上因为相邻像素的DC值变化基本很小,因此可以针对DC值用DCPM编码,也就是求相邻值的差值。
5、针对AC参数编码。对于AC交流值来说,规律是由左上角到右下角量化步长越来越大,也就是说右下角有很大部分会出现量化后变成0的情况。因此采取之字形扫描后然后再游程编码的手段,之字形扫描的具体扫描顺序是这样的:
JPEG编解码原理与转换_第3张图片
在这样排序后,再做游程编码,简单来说就是可以将连续的0压缩,从而提高压缩率。

以上步骤都做完后,就该使用Huffman编码了,举一个Huffman码表的例子:
JPEG编解码原理与转换_第4张图片
例如该序列为一个Huffman码表,其中黑色部分为Marker,红色部分表示了该表是一个编码DC的0号表,从蓝色部分一共有15个,表示从码长为1位到15位的码各有多少个。由这一部分可以看出来,2位码有三个,3位码有一个,4位码有一个,5位码有一个,6位码有一个,7位码有一个,8位码有一个,9位码有一个。后面的绿色部分表示了对应各码长位数的权重,权重表示了解码时需要再读入的位数,通过这个读入的位数查表可以获得真正的码值,将表列为更通俗易懂的表则如下,编码手段为同等位数的码字逐项加1,相邻有码长加一变化的,采取码字加1补0的操作:
JPEG编解码原理与转换_第5张图片

了解了大概的编码过程之后,接下来需要了解一下JPEG的文件格式:
JPEG中每个部分称为一个segment,每个segment的开头称为marker,每个marker都是0xFF后接该部分标识符,然后是两字节的marker长度,然后是内容。分别有以下几种关键文件格式:
JPEG编解码原理与转换_第6张图片

具体每个segment对应marker如下:

enum std_markers {
     
   DQT  = 0xDB, /* Define Quantization Table */
   SOF  = 0xC0, /* Start of Frame (size information) */
   DHT  = 0xC4, /* Huffman Table */
   SOI  = 0xD8, /* Start of Image */
   SOS  = 0xDA, /* Start of Scan */
   RST  = 0xD0, /* Reset Marker d0 -> .. */
   RST7 = 0xD7, /* Reset Marker .. -> d7 */
   EOI  = 0xD9, /* End of Image */
   DRI  = 0xDD, /* Define Restart Interval */
   APP0 = 0xE0,
};

具体使用的结构体如下:
huffman表结构体

struct huffman_table
{
     
  /* Fast look up table, using HUFFMAN_HASH_NBITS bits we can have directly the symbol,
   * if the symbol is <0, then we need to look into the tree table */
  short int lookup[HUFFMAN_HASH_SIZE];
  /* code size: give the number of bits of a symbol is encoded */
  unsigned char code_size[HUFFMAN_HASH_SIZE];
  /* some place to store value that is not encoded in the lookup table 
   * FIXME: Calculate if 256 value is enough to store all values
   */
  uint16_t slowtable[16-HUFFMAN_HASH_NBITS][256];
};

关于8*8宏块中的解码信息:

struct component 
{
     
  unsigned int Hfactor;
  unsigned int Vfactor;
  float *Q_table;  /* Pointer to the quantisation table to use */
  struct huffman_table *AC_table;
  struct huffman_table *DC_table;
  short int previous_DC; /* Previous DC coefficient */
  short int DCT[64];  /* DCT coef */
#if SANITY_CHECK
  unsigned int cid;
#endif
};

关于JPEG数据流的结构体,包含图片长宽、码表等等信息:

struct jdec_private
{
     
  /* Public variables */
  uint8_t *components[COMPONENTS];
  unsigned int width, height; /* Size of the image */
  unsigned int flags;
  /* Private variables */
  const unsigned char *stream_begin, *stream_end;
  unsigned int stream_length;
  const unsigned char *stream; /* Pointer to the current stream */
  unsigned int reservoir, nbits_in_reservoir;
  struct component component_infos[COMPONENTS];
  float Q_tables[COMPONENTS][64];  /* quantization tables */
  struct huffman_table HTDC[HUFFMAN_TABLES]; /* DC huffman tables   */
  struct huffman_table HTAC[HUFFMAN_TABLES]; /* AC huffman tables   */
  int default_huffman_table_initialized;
  int restart_interval;
  int restarts_to_go;    /* MCUs left in this restart interval */
  int last_rst_marker_seen;   /* Rst marker is incremented each time */
  /* Temp space used after the IDCT to store each components */
  uint8_t Y[64*4], Cr[64], Cb[64];
  jmp_buf jump_state;
  /* Internal Pointer use for colorspace conversion, do not modify it !!! */
  uint8_t *plane[COMPONENTS]
};

接下来是转换图片函数,包含从JPEG文件头中提取信息并可选择性的转换为各种格式:

int convert_one_image(const char *infilename, const char *outfilename, int output_format)
{
     
  FILE *fp;
  unsigned int length_of_file;
  unsigned int width, height;
  unsigned char *buf;
  struct jdec_private *jdec;
  unsigned char *components[3];
  /* Load the Jpeg into memory */
  fp = fopen(infilename, "rb");
  if (fp == NULL)
    exitmessage("Cannot open filename\n");
  length_of_file = filesize(fp);
  buf = (unsigned char *)malloc(length_of_file + 4);
  if (buf == NULL)
    exitmessage("Not enough memory for loading file\n");
  fread(buf, length_of_file, 1, fp);
  fclose(fp);
  /* Decompress it */
  jdec = tinyjpeg_init();
  if (jdec == NULL)
    exitmessage("Not enough memory to alloc the structure need for decompressing\n");
  if (tinyjpeg_parse_header(jdec, buf, length_of_file)<0)
    exitmessage(tinyjpeg_get_errorstring(jdec));
  /* Get the size of the image */
  tinyjpeg_get_size(jdec, &width, &height);
  snprintf(error_string, sizeof(error_string),"Decoding JPEG image...\n");
  if (tinyjpeg_decode(jdec, output_format) < 0)
    exitmessage(tinyjpeg_get_errorstring(jdec));
  /* 
   * Get address for each plane (not only max 3 planes is supported), and
   * depending of the output mode, only some components will be filled 
   * RGB: 1 plane, YUV420P: 3 planes, GREY: 1 plane
   */
  tinyjpeg_get_components(jdec, components);
  /* Save it */
  switch (output_format)
   {
     
    case TINYJPEG_FMT_RGB24:
    case TINYJPEG_FMT_BGR24:
      write_tga(outfilename, output_format, width, height, components);
      break;
    case TINYJPEG_FMT_YUV420P:
      write_yuv(outfilename, width, height, components);
      break;
    case TINYJPEG_FMT_GREY:
      write_pgm(outfilename, width, height, components);
      break;
   }
  /* Only called this if the buffers were allocated by tinyjpeg_decode() */
  tinyjpeg_free(jdec);
  /* else called just free(jdec); */
  free(buf);
  return 0;
}

接下来是上面转换函数中所用到的所有函数:
解析JPEG文件头函数,其中定位了文件头标识符后的开始位置、长度、以及结束位置:

int tinyjpeg_parse_header(struct jdec_private *priv, const unsigned char *buf, unsigned int size)
{
     
  int ret;
  /* Identify the file */
  if ((buf[0] != 0xFF) || (buf[1] != SOI))    snprintf(error_string, sizeof(error_string),"Not a JPG file ?\n");
  priv->stream_begin = buf+2;
  priv->stream_length = size-2;
  priv->stream_end = priv->stream_begin + priv->stream_length;
  ret = parse_JFIF(priv, priv->stream_begin);
  return ret;
}

解析DQT,其中包括读取量化精度、读取量化表等等:

static int parse_DQT(struct jdec_private *priv, const unsigned char *stream)
{
     
  int qi;
  float *table;
  const unsigned char *dqt_block_end;
#if TRACE
  fprintf(p_trace,"> DQT marker\n");
  fflush(p_trace);
#endif
  dqt_block_end = stream + be16_to_cpu(stream);
  stream += 2; /* Skip length */
  while (stream < dqt_block_end)
   {
     
     qi = *stream++;
#if SANITY_CHECK
     if (qi>>4)
       snprintf(error_string, sizeof(error_string),"16 bits quantization table is not supported\n");
     if (qi>4)
       snprintf(error_string, sizeof(error_string),"No more 4 quantization table is supported (got %d)\n", qi);
#endif
     table = priv->Q_tables[qi];
     build_quantization_table(table, stream);
     stream += 64;
   }
#if TRACE
  fprintf(p_trace,"< DQT marker\n");
  fflush(p_trace);
#endif
  return 0;
}

计算建立量化表,根据规定好的量化因子计算量化表:

static void build_quantization_table(float *qtable, const unsigned char *ref_table)
{
     
  /* Taken from libjpeg. Copyright Independent JPEG Group's LLM idct.
   * For float AA&N IDCT method, divisors are equal to quantization
   * coefficients scaled by scalefactor[row]*scalefactor[col], where
   *   scalefactor[0] = 1
   *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
   * We apply a further scale factor of 8.
   * What's actually stored is 1/divisor so that the inner loop can
   * use a multiplication rather than a division.
   */
  int i, j;
  static const double aanscalefactor[8] = {
     
     1.0, 1.387039845, 1.306562965, 1.175875602,
     1.0, 0.785694958, 0.541196100, 0.275899379
  };
  const unsigned char *zz = zigzag;
  for (i=0; i<8; i++) {
     
     for (j=0; j<8; j++) {
     
       *qtable++ = ref_table[*zz++] * aanscalefactor[i] * aanscalefactor[j];
     }
   }
}

定义Huffman表,其中包括建立交流表和直流表:

static int parse_DHT(struct jdec_private *priv, const unsigned char *stream)
{
     
  unsigned int count, i;
  unsigned char huff_bits[17];
  int length, index;
  length = be16_to_cpu(stream) - 2;
  stream += 2; /* Skip length */
#if TRACE
  fprintf(p_trace,"> DHT marker (length=%d)\n", length);
  fflush(p_trace);
#endif
  while (length>0) {
     
     index = *stream++;
     /* We need to calculate the number of bytes 'vals' will takes */
     huff_bits[0] = 0;
     count = 0;
     for (i=1; i<17; i++) {
     
 huff_bits[i] = *stream++;
 count += huff_bits[i];
     }
#if SANITY_CHECK
     if (count >= HUFFMAN_BITS_SIZE)
       snprintf(error_string, sizeof(error_string),"No more than %d bytes is allowed to describe a huffman table", HUFFMAN_BITS_SIZE);
     if ( (index &0xf) >= HUFFMAN_TABLES)
       snprintf(error_string, sizeof(error_string),"No more than %d Huffman tables is supported (got %d)\n", HUFFMAN_TABLES, index&0xf);
#if TRACE
     fprintf(p_trace,"Huffman table %s[%d] length=%d\n", (index&0xf0)?"AC":"DC", index&0xf, count);
  fflush(p_trace);
#endif
#endif
     if (index & 0xf0 )
       build_huffman_table(huff_bits, stream, &priv->HTAC[index&0xf]);
     else
       build_huffman_table(huff_bits, stream, &priv->HTDC[index&0xf]);
     length -= 1;
     length -= 16;
     length -= count;
     stream += count;
  }
#if TRACE
  fprintf(p_trace,"< DHT marker\n");
  fflush(p_trace);
#endif
  return 0;
}

生成Huffman表中数值:

static void build_huffman_table(const unsigned char *bits, const unsigned char *vals, struct huffman_table *table)
{
     
  unsigned int i, j, code, code_size, val, nbits;
  unsigned char huffsize[HUFFMAN_BITS_SIZE+1], *hz;
  unsigned int huffcode[HUFFMAN_BITS_SIZE+1], *hc;
  int next_free_entry;
  /*
   * Build a temp array 
   *   huffsize[X] => numbers of bits to write vals[X]
   */
  hz = huffsize;
  for (i=1; i<=16; i++)
   {
     
     for (j=1; j<=bits[i]; j++)
       *hz++ = i;
   }
  *hz = 0;
  memset(table->lookup, 0xff, sizeof(table->lookup));
  for (i=0; i<(16-HUFFMAN_HASH_NBITS); i++)
    table->slowtable[i][0] = 0;
  /* Build a temp array
   *   huffcode[X] => code used to write vals[X]
   */
  code = 0;
  hc = huffcode;
  hz = huffsize;
  nbits = *hz;
  while (*hz)
   {
     
     while (*hz == nbits)
      {
     
 *hc++ = code++;
 hz++;
      }
     code <<= 1;
     nbits++;
   }
  /*
   * Build the lookup table, and the slowtable if needed.
   */
  next_free_entry = -1;
  for (i=0; huffsize[i]; i++)
   {
     
     val = vals[i];
     code = huffcode[i];
     code_size = huffsize[i];
 #if TRACE
     fprintf(p_trace,"val=%2.2x code=%8.8x codesize=%2.2d\n", val, code, code_size);
  fflush(p_trace);
    #endif
     table->code_size[val] = code_size;
     if (code_size <= HUFFMAN_HASH_NBITS)
      {
     
 /*
  * Good: val can be put in the lookup table, so fill all value of this
  * column with value val 
  */
 int repeat = 1UL<<(HUFFMAN_HASH_NBITS - code_size);
 code <<= HUFFMAN_HASH_NBITS - code_size;
 while ( repeat-- )
   table->lookup[code++] = val;
      }
     else
      {
     
 /* Perhaps sorting the array will be an optimization */
 uint16_t *slowtable = table->slowtable[code_size-HUFFMAN_HASH_NBITS-1];
 while(slowtable[0])
   slowtable+=2;
 slowtable[0] = code;
 slowtable[1] = val;
 slowtable[2] = 0;
 /* TODO: NEED TO CHECK FOR AN OVERFLOW OF THE TABLE */
      }
   }
}

解析SOS:

static int parse_SOS(struct jdec_private *priv, const unsigned char *stream)
{
     
  unsigned int i, cid, table;
  unsigned int nr_components = stream[2];
#if TRACE
  fprintf(p_trace,"> SOS marker\n");
  fflush(p_trace);
#endif
#if SANITY_CHECK
  if (nr_components != 3)
    snprintf(error_string, sizeof(error_string),"We only support YCbCr image\n");
#endif
  stream += 3;
  for (i=0;i<nr_components;i++) {
     
     cid = *stream++;
     table = *stream++;
#if SANITY_CHECK
     if ((table&0xf)>=4)
 snprintf(error_string, sizeof(error_string),"We do not support more than 2 AC Huffman table\n");
     if ((table>>4)>=4)
 snprintf(error_string, sizeof(error_string),"We do not support more than 2 DC Huffman table\n");
     if (cid != priv->component_infos[i].cid)
        snprintf(error_string, sizeof(error_string),"SOS cid order (%d:%d) isn't compatible with the SOF marker (%d:%d)\n",
       i, cid, i, priv->component_infos[i].cid);
#if TRACE
     fprintf(p_trace,"ComponentId:%d  tableAC:%d tableDC:%d\n", cid, table&0xf, table>>4);
  fflush(p_trace);
#endif
#endif
     priv->component_infos[i].AC_table = &priv->HTAC[table&0xf];
     priv->component_infos[i].DC_table = &priv->HTDC[table>>4];
  }
  priv->stream = stream+3;
#if TRACE
  fprintf(p_trace,"< SOS marker\n");
  fflush(p_trace);
#endif
  return 0;
}

解析SOF:

static int parse_SOF(struct jdec_private *priv, const unsigned char *stream)
{
     
  int i, width, height, nr_components, cid, sampling_factor;
  int Q_table;
  struct component *c;
#if TRACE
  fprintf(p_trace,"> SOF marker\n");
  fflush(p_trace);
#endif
  print_SOF(stream);
  height = be16_to_cpu(stream+3);
  width  = be16_to_cpu(stream+5);
  nr_components = stream[7];
#if SANITY_CHECK
  if (stream[2] != 8)
    snprintf(error_string, sizeof(error_string),"Precision other than 8 is not supported\n");
  if (width>JPEG_MAX_WIDTH || height>JPEG_MAX_HEIGHT)
    snprintf(error_string, sizeof(error_string),"Width and Height (%dx%d) seems suspicious\n", width, height);
  if (nr_components != 3)
    snprintf(error_string, sizeof(error_string),"We only support YUV images\n");
  if (height%16)
    snprintf(error_string, sizeof(error_string),"Height need to be a multiple of 16 (current height is %d)\n", height);
  if (width%16)
    snprintf(error_string, sizeof(error_string),"Width need to be a multiple of 16 (current Width is %d)\n", width);
#endif
  stream += 8;
  for (i=0; i<nr_components; i++) {
     
     cid = *stream++;
     sampling_factor = *stream++;
     Q_table = *stream++;
     c = &priv->component_infos[i];
#if SANITY_CHECK
     c->cid = cid;
     if (Q_table >= COMPONENTS)
       snprintf(error_string, sizeof(error_string),"Bad Quantization table index (got %d, max allowed %d)\n", Q_table, COMPONENTS-1);
#endif
     c->Vfactor = sampling_factor&0xf;
     c->Hfactor = sampling_factor>>4;
     c->Q_table = priv->Q_tables[Q_table];
#if TRACE
     fprintf(p_trace,"Component:%d  factor:%dx%d  Quantization table:%d\n",
           cid, c->Hfactor, c->Hfactor, Q_table );
  fflush(p_trace);
#endif
  }
  priv->width = width;
  priv->height = height;
#if TRACE
  fprintf(p_trace,"< SOF marker\n");
  fflush(p_trace);
#endif
  return 0;
}

具体函数过于庞大不完全发出,尝试输出转成功的YUV格式文件:
JPEG编解码原理与转换_第7张图片

可以看到,转换成功。

再在代码中添加一个FILE*变量定义一个.txt文件存储量化表,再在build_quantization_table函数中添加代码循环输出量化表,添加较为简单,结果如下:
JPEG编解码原理与转换_第8张图片
可以看到,该JPEG文件中有两个量化表,由输出顺序来看第一个应该是DC值的量化表,第二个是AC值量化表

接下来,我们来输出图片的DC值和AC值,需将代码中tinyjpeg_decode函数修改为如下,其中dcBuffer为存储DC值的数组,acBuffer为存储AC值数组,uvBuffer中数据因为输出是灰度图所以皆为128,dcFile和acFile都是FILE*全局变量。

  for (y=0; y < priv->height/ystride_by_mcu; y++)
   {
     
     //trace("Decoding row %d\n", y);
     priv->plane[0] = priv->components[0] + (y * bytes_per_blocklines[0]);
     priv->plane[1] = priv->components[1] + (y * bytes_per_blocklines[1]);
     priv->plane[2] = priv->components[2] + (y * bytes_per_blocklines[2]);
     for (x=0; x < priv->width; x+=xstride_by_mcu)
      {
     
     decode_MCU(priv);
        dcBuffer = (unsigned char)((priv->component_infos->DCT[0] + 512.0) / 4 + 0.5);  // DCT[0]为DC系数;DC系数范围-512~512;变换到0~255
        acBuffer = (unsigned char)(priv->component_infos->DCT[1] + 128);   // 选取DCT[1]作为AC的observation;+128便于观察
        fwrite(&dcBuffer, 1, 1, dcFile);
        fwrite(&acBuffer, 1, 1, acFile);
        count++;
     convert_to_pixfmt(priv);
     priv->plane[0] += bytes_per_mcu[0];
     priv->plane[1] += bytes_per_mcu[1];
     priv->plane[2] += bytes_per_mcu[2];
     if (priv->restarts_to_go>0)
     {
     
         priv->restarts_to_go--;
          if (priv->restarts_to_go == 0)
          {
     
              priv->stream -= (priv->nbits_in_reservoir/8);
              resync(priv);
              if (find_next_rst_marker(priv) < 0)
             return -1;
          }
      }
      }
   }
   for (int i = 0; i < count / 4 * 2; i++) {
     
      fwrite(&uvBuff, sizeof(unsigned char), 1, dcFile);
      fwrite(&uvBuff, sizeof(unsigned char), 1, acFile);
  }

输出结果如下,因为原图像是10241024,DC为每88个块一个,同理,AC也是每88个块选择一个,因此输出格式选择128128:
JPEG编解码原理与转换_第9张图片
JPEG编解码原理与转换_第10张图片
QMF分析如下:
JPEG编解码原理与转换_第11张图片

你可能感兴趣的:(JPEG编解码原理与转换)