Base64编码解码与实现

Base64是一种很常见的编码规范,其定义为:Base64内容传送编码被设计用来把任意序列的8位字节描述为一种不易被人直接识别的形式。(The Base64 Content-Transfer-Encoding is designed to represent arbitrary sequences of octets in a form that need not be humanly readable.),其作用是将二进制序列转换为人类可读的ASCII字符序列,常用在需用通过文本来传输二进制数据的协议中,如HTTP和SMTP等。

    Base64编码规则:对于待编码数据,以3个字节为单位,依次取6位,前两位补0形成8位编码,由于3*8=4*6,3个字节的输入会编码成4个字节的输出。如果剩下的字符不足3个字节,则用0填充,输出字符使用'=',因此编码后输出的文本末尾可能会出现1或2个'='。

    为了保证所输出的编码位可读字符,Base64制定了一个编码表,以便进行统一转换。编码表的大小为2^6=64,这也是Base64名称的由来。

 

           Base64编码表

   码值   码   码值  码  码值码  码值  码
      0         A       17       R       34     i       51     z
      1         B       18       S       35     j       52     0
      2         C       19       T       36     k      53     1
      3         D       20       U       37     l       54     2
      4         E       21        V       38     m     55    3
      5         F        22       W      39     n      56    4
      6         G        23       X       40     o      57     5
      7         H        24       Y       41     p      58     6
      8         I          25       Z       42     q      59     7
      9         J         26       a       43      r      60     8
      0         K         27       b       44     s      61    9
      11       L         28       c       45      t       62    +
      12       M         29      d       46      u      63    /
      13       N         30      e       47      v
      14       O         31       f        48     w      (pad) =
      15       P         32       g       49 x
      16       Q         33       h       50 y 

编码详解


1. 不加后补位的字符串“abC”


01100001 01100010 01000011
00011000 00010110 00001001 00000011

24       22       9        3

查表可以得到编码值为:“YWJD”。

 

2. 加后补位的字符串“ab”:

 

01100001 01100010
00011000 00010110 00001000 00000000

24       22       8        -
 

由于不够24个字节位,所以我们要加8个0字节位以凑够24个。“-”表示增加的补位,编码后应为“=”,所以可以得到编码后的字符串为“YWI=”。

 

3. 加后补位的字符串“a”:

 

01100001

00011000 00010000 00000000 00000000

24       16       -        -
 

同样,编码后的字符串为“YQ==”,只是这里出现了两个“=”。

 

算法实现:

 

// Decode a block (4 bytes)

void decodeBlock(unsigned char *dest, char *src)
{
  unsigned int x = 0;
  int i;
  for(i = 0; i < 4; i++) {
    if(src[i] >= 'A' && src[i] <= 'Z')
      x = (x << 6) + (unsigned int)(src[i] - 'A' + 0);
    else if(src[i] >= 'a' && src[i] <= 'z')
      x = (x << 6) + (unsigned int)(src[i] - 'a' + 26);
    else if(src[i] >= '0' && src[i] <= '9')
      x = (x << 6) + (unsigned int)(src[i] - '0' + 52);
    else if(src[i] == '+')
      x = (x << 6) + 62;
    else if(src[i] == '/')
      x = (x << 6) + 63;
    else if(src[i] == '=')
      x = (x << 6);
  }

  dest[2] = (unsigned char)(x & 255); x >>= 8;
  dest[1] = (unsigned char)(x & 255); x >>= 8;
  dest[0] = (unsigned char)(x & 255); x >>= 8;
}


 

// decode the src string and store the decoded string to dest, return the

// length of decoded string in len

// NOTE: the length of dest buffer must be larger than (strlen(src)*3)/4+3
void base64Decode(unsigned char *dest, char *src, int *len)
{
  int length = 0;
  int equalsTerm = 0;
  int i;
  int numBlocks;
  unsigned char lastBlock[3];

  while((src[length] != '=') && src[length])
    length++;
  while(src[length+equalsTerm] == '=')
    equalsTerm++;

  numBlocks = (length + equalsTerm) / 4;
  if(len)
    *len = (numBlocks * 3) - equalsTerm;

  for(i = 0; i < numBlocks - 1; i++) {
    decodeBlock(dest, src);
    dest += 3;

    src += 4;
  }

  decodeBlock(lastBlock, src);
  for(i = 0; i < 3 - equalsTerm; i++)
    dest[i] = lastBlock[i];

}


 

static char table64[]=
  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

 

// encode the data from inbuf and store the encoded data into outbuf

// return the length of encoded data

// NOTE: do not forget to free the outbuf allocated here
int base64Encode(const void *inbuf, int inlen, char **outbuf)
{
  unsigned char ibuf[3];
  unsigned char obuf[4];
  int i;
  int inputparts;
  char *output;
  char *base64data;

  char *indata = (char *)inbuf;

  if(0 == inlen)
    inlen = strlen(indata);

  base64data = output = (char*)malloc(inlen*4/3+4);
  if(NULL == output)
    return -1;

  while(inlen > 0) {
    for (i = inputparts = 0; i < 3; i++) {
      if(inlen > 0) {
        inputparts++;
        ibuf[i] = *indata;
        indata++;
        inlen--;
      }
      else
        ibuf[i] = 0;
    }

    obuf [0] = (ibuf [0] & 0xFC) >> 2;
    obuf [1] = ((ibuf [0] & 0x03) << 4) | ((ibuf [1] & 0xF0) >> 4);
    obuf [2] = ((ibuf [1] & 0x0F) << 2) | ((ibuf [2] & 0xC0) >> 6);
    obuf [3] = ibuf [2] & 0x3F;

    switch(inputparts) {
    case 1: /* only one byte read, two '=' needed */
      sprintf(output, "%c%c==",
              table64[obuf[0]],
              table64[obuf[1]]);
      break;
    case 2: /* two bytes read, one '=' needed */
      sprintf(output, "%c%c%c=",
              table64[obuf[0]],
              table64[obuf[1]],
              table64[obuf[2]]);
      break;
    default:
      sprintf(output, "%c%c%c%c",
              table64[obuf[0]],
              table64[obuf[1]],
              table64[obuf[2]],
              table64[obuf[3]] );
      break;
    }
    output += 4;
  }
  *output=0;
  *outbuf = base64data;

  return strlen(base64data);
}


 

-----------------------------

部分资料来自互联网,转载请注明作者及出处,谢谢!

你可能感兴趣的:(base64,职场,字符编码,休闲)