Base64是一种很常见的编码规范,其定义为:Base64内容传送编码被设计用来把任意序列的8位字节描述为一种不易被人直接识别的形式。(The Base64 Content-Transfer-Encoding is designed to represent arbitrary sequences of octets in a form that need not be humanly readable.),其作用是将二进制序列转换为人类可读的ASCII字符序列,常用在需用通过文本来传输二进制数据的协议中,如HTTP和SMTP等。
Base64编码规则:对于待编码数据,以3个字节为单位,依次取6位,前两位补0形成8位编码,由于3*8=4*6,3个字节的输入会编码成4个字节的输出。如果剩下的字符不足3个字节,则用0填充,输出字符使用'=',因此编码后输出的文本末尾可能会出现1或2个'='。
为了保证所输出的编码位可读字符,Base64制定了一个编码表,以便进行统一转换。编码表的大小为2^6=64,这也是Base64名称的由来。
Base64编码表
码值 码 码值 码 码值码 码值 码
0 A 17 R 34 i 51 z
1 B 18 S 35 j 52 0
2 C 19 T 36 k 53 1
3 D 20 U 37 l 54 2
4 E 21 V 38 m 55 3
5 F 22 W 39 n 56 4
6 G 23 X 40 o 57 5
7 H 24 Y 41 p 58 6
8 I 25 Z 42 q 59 7
9 J 26 a 43 r 60 8
0 K 27 b 44 s 61 9
11 L 28 c 45 t 62 +
12 M 29 d 46 u 63 /
13 N 30 e 47 v
14 O 31 f 48 w (pad) =
15 P 32 g 49 x
16 Q 33 h 50 y
编码详解
1. 不加后补位的字符串“abC”
01100001 01100010 01000011
00011000 00010110 00001001 00000011
24 22 9 3
查表可以得到编码值为:“YWJD”。
2. 加后补位的字符串“ab”:
01100001 01100010
00011000 00010110 00001000 00000000
24 22 8 -
由于不够24个字节位,所以我们要加8个0字节位以凑够24个。“-”表示增加的补位,编码后应为“=”,所以可以得到编码后的字符串为“YWI=”。
3. 加后补位的字符串“a”:
01100001
00011000 00010000 00000000 00000000
24 16 - -
同样,编码后的字符串为“YQ==”,只是这里出现了两个“=”。
算法实现:
// Decode a block (4 bytes)
void decodeBlock(unsigned char *dest, char *src)
{
unsigned int x = 0;
int i;
for(i = 0; i < 4; i++) {
if(src[i] >= 'A' && src[i] <= 'Z')
x = (x << 6) + (unsigned int)(src[i] - 'A' + 0);
else if(src[i] >= 'a' && src[i] <= 'z')
x = (x << 6) + (unsigned int)(src[i] - 'a' + 26);
else if(src[i] >= '0' && src[i] <= '9')
x = (x << 6) + (unsigned int)(src[i] - '0' + 52);
else if(src[i] == '+')
x = (x << 6) + 62;
else if(src[i] == '/')
x = (x << 6) + 63;
else if(src[i] == '=')
x = (x << 6);
}
dest[2] = (unsigned char)(x & 255); x >>= 8;
dest[1] = (unsigned char)(x & 255); x >>= 8;
dest[0] = (unsigned char)(x & 255); x >>= 8;
}
// decode the src string and store the decoded string to dest, return the
// length of decoded string in len
// NOTE: the length of dest buffer must be larger than (strlen(src)*3)/4+3
void base64Decode(unsigned char *dest, char *src, int *len)
{
int length = 0;
int equalsTerm = 0;
int i;
int numBlocks;
unsigned char lastBlock[3];
while((src[length] != '=') && src[length])
length++;
while(src[length+equalsTerm] == '=')
equalsTerm++;
numBlocks = (length + equalsTerm) / 4;
if(len)
*len = (numBlocks * 3) - equalsTerm;
for(i = 0; i < numBlocks - 1; i++) {
decodeBlock(dest, src);
dest += 3;
src += 4;
}
decodeBlock(lastBlock, src);
for(i = 0; i < 3 - equalsTerm; i++)
dest[i] = lastBlock[i];
}
static char table64[]=
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
// encode the data from inbuf and store the encoded data into outbuf
// return the length of encoded data
// NOTE: do not forget to free the outbuf allocated here
int base64Encode(const void *inbuf, int inlen, char **outbuf)
{
unsigned char ibuf[3];
unsigned char obuf[4];
int i;
int inputparts;
char *output;
char *base64data;
char *indata = (char *)inbuf;
if(0 == inlen)
inlen = strlen(indata);
base64data = output = (char*)malloc(inlen*4/3+4);
if(NULL == output)
return -1;
while(inlen > 0) {
for (i = inputparts = 0; i < 3; i++) {
if(inlen > 0) {
inputparts++;
ibuf[i] = *indata;
indata++;
inlen--;
}
else
ibuf[i] = 0;
}
obuf [0] = (ibuf [0] & 0xFC) >> 2;
obuf [1] = ((ibuf [0] & 0x03) << 4) | ((ibuf [1] & 0xF0) >> 4);
obuf [2] = ((ibuf [1] & 0x0F) << 2) | ((ibuf [2] & 0xC0) >> 6);
obuf [3] = ibuf [2] & 0x3F;
switch(inputparts) {
case 1: /* only one byte read, two '=' needed */
sprintf(output, "%c%c==",
table64[obuf[0]],
table64[obuf[1]]);
break;
case 2: /* two bytes read, one '=' needed */
sprintf(output, "%c%c%c=",
table64[obuf[0]],
table64[obuf[1]],
table64[obuf[2]]);
break;
default:
sprintf(output, "%c%c%c%c",
table64[obuf[0]],
table64[obuf[1]],
table64[obuf[2]],
table64[obuf[3]] );
break;
}
output += 4;
}
*output=0;
*outbuf = base64data;
return strlen(base64data);
}
-----------------------------
部分资料来自互联网,转载请注明作者及出处,谢谢!