Base64是网络上最常见的用于传输8Bit字节码的编码方式之一。Base64就是一种基于64个可打印字符来表示二进制数据的方法。
Base64包含小写字母a-z、大写字母A-Z、数字0-9、符号“+”、“/”一共64个字符的字符集。任何符号都可以转换成这个字符集中的字符,这个转换过程就叫做base64编码。
Base64编码是从二进制到字符的过程,可用于在HTTP环境下传递较长的标识信息。采用base64编码具有不可读性,需要解码后才能阅读。
Base64要求每三个8bit字节转化为四个6bit字节(3 * 8 = 4 * 6 = 24bit ),6bit高位补0得到8bit。所以理论上,转化后的字符串比原来长了1/4。
例如,原来的数据是0x010203,转换为二进制为00000001 ,00000010,00000011。将其按6bit划分,000000/01 ,0000/0010 ,00/000011。高位补0,,得到如下0000 0000,0001 0000,0000 1000,0000 0011。转换后的数据为0x00100803。
转换后,通过一个码表转化,就得到了最终的base64编码。码表只是起到一个查表的作用。
索引 |
对应字符 |
索引 |
对应字符 |
索引 |
对应字符 |
索引 |
对应字符 |
0 |
A |
17 |
R |
34 |
i |
51 |
z |
1 |
B |
18 |
S |
35 |
j |
52 |
0 |
2 |
C |
19 |
T |
36 |
k |
53 |
1 |
3 |
D |
20 |
U |
37 |
l |
54 |
2 |
4 |
E |
21 |
V |
38 |
m |
55 |
3 |
5 |
F |
22 |
W |
39 |
n |
56 |
4 |
6 |
G |
23 |
X |
40 |
o |
57 |
5 |
7 |
H |
24 |
Y |
41 |
p |
58 |
6 |
8 |
I |
25 |
Z |
42 |
q |
59 |
7 |
9 |
J |
26 |
a |
43 |
r |
60 |
8 |
10 |
K |
27 |
b |
44 |
s |
61 |
9 |
11 |
L |
28 |
c |
45 |
t |
62 |
+ |
12 |
M |
29 |
d |
46 |
u |
63 |
/ |
13 |
N |
30 |
e |
47 |
v |
||
14 |
O |
31 |
f |
48 |
w |
||
15 |
P |
32 |
g |
49 |
x |
||
16 |
Q |
33 |
h |
50 |
y |
上述0x010203转为后得到0x00100803,即0、16、8、3。查表得到A、Q、I、D。
AQID就是base64转化的最终结果。即0x010203 -》 AQID。
这里有个问题,就是如果要转化的字节数量不是3的倍数该怎么办。
例如原数据为0x01020305。二进制为0000 0001,0000 0010,0000 0011,0000 0101,。按6Bit切分0000 00/01,0000/ 0010,00/00 0011/,0000 01/01。
补齐6bit,结果为0000 0000,,0001 0000,0000 1000,0000 0011,0000 0001,0001 0000。转化十进制为0.16.8.3.1.16。查表转化结果为AQIDBQ==。
注:如果最后剩余不足6bit,需要低位补0,直到满足6bit,最后才高位补0。
注:Base64编码都是以3个字节为基础单位进行转换,编码转换后的数据都是4的倍数。如果原数据大小不是3的倍数,则会在编码转换后的数据末尾增加一个或两个“=”(最多两个),保证编码输出的数据大小为4的倍数。
原数据 | 1 | 2 | 3 | 5 | ||||||||||||||||||||||||||||||||||||||||||||
二进制 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | ||||||||||||||||
二进制(高2位补0) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | ||||||||||||
补0后十进制值 | 0x00(0) | 0x10(16) | 0x08(8) | 0x03(3) | 0x01(1) | 0x10(16) | ||||||||||||||||||||||||||||||||||||||||||
base64编码 | A | Q | I | D | B | Q | = | = |
解码的过程与编码刚好相反,这里同样以0x01020305为例。刚才我们得到的结果是AQIDBQ==,去掉补充的==后,结果为AQIDBQ,十六进制为0x41,0x51,0x49,0x44,0x42,0x51,转换为十进制为: 65,81,73,68,66,81。依然使用查表的方式,查下表得到0,16,8,3,1,16。
转换为二进制为0000 0000,0001 0000,0000 1000,0000 0011,0000 0001,0001 0000。全部去掉高2位为00 0000 ,01 0000, 00 1000, 00 0011, 00 0001, 01 0000。拼接为8bit。0000 0001 0000 0010 0000 0011 00 00 0101。转化为十六进制0x01020305。
注:解码时,是以4个6bit为一组,如果最后凑不齐4个6bit则直接丢弃数据。
索引 |
对应数字 |
索引 |
对应数字 |
索引 |
对应数字 |
索引 |
对应数字 |
43 |
62 |
69 |
4 |
86 |
21 |
109 |
38 |
47 |
63 |
70 |
5 |
87 |
22 |
110 |
39 |
48 |
52 |
71 |
6 |
88 |
23 |
111 |
40 |
49 |
53 |
72 |
7 |
89 |
24 |
112 |
41 |
50 |
54 |
73 |
8 |
90 |
25 |
113 |
42 |
51 |
55 |
74 |
9 |
97 |
26 |
114 |
43 |
52 |
56 |
75 |
10 |
98 |
27 |
115 |
44 |
53 |
57 |
76 |
11 |
99 |
28 |
116 |
45 |
54 |
58 |
77 |
12 |
100 |
29 |
117 |
46 |
55 |
59 |
78 |
13 |
101 |
30 |
118 |
47 |
56 |
60 |
79 |
14 |
102 |
31 |
119 |
48 |
57 |
61 |
80 |
15 |
103 |
32 |
120 |
49 |
61 |
64 |
81 |
16 |
104 |
33 |
121 |
50 |
65 |
0 |
82 |
17 |
105 |
34 |
122 | 51 |
66 |
1 |
83 |
18 |
106 |
35 |
||
67 |
2 |
84 |
19 |
107 |
36 |
||
68 |
3 |
85 |
20 |
108 |
37 |
base64编码 | A(65) | Q(81) | I(73) | D(68) | B(66) | Q(81) | = | = | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
查表值 | 0 | 16 | 8 | 3 | 1 | 16 | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
二进制 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | ||||||||||||||||
取消高2位 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | ||||||||||||||||||||||||||||
原始数据 | 0x01 | 0x02 | 0x03 | 0x05 | 弃 |
static const unsigned char base64_enc_map[64] =
{
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd',
'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', '+', '/'
};
static const unsigned char base64_dec_map[128] =
{
127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 62, 127, 127, 127, 63, 52, 53,
54, 55, 56, 57, 58, 59, 60, 61, 127, 127,
127, 64, 127, 127, 127, 0, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 127, 127, 127, 127, 127, 127, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
49, 50, 51, 127, 127, 127, 127, 127
};
#define BASE64_SIZE_T_MAX ( (size_t) -1 ) /* SIZE_T_MAX is not standard */
#define MBEDTLS_ERR_BASE64_BUFFER_TOO_SMALL -0x002A /**< Output buffer too small. */
#define MBEDTLS_ERR_BASE64_INVALID_CHARACTER -0x002C /**< Invalid character in input. */
/*
* Encode a buffer into base64 format
*/
int mbedtls_base64_encode( unsigned char *dst, size_t dlen, size_t *olen,
const unsigned char *src, size_t slen )
{
size_t i, n;
int C1, C2, C3;
unsigned char *p;
if( slen == 0 )
{
*olen = 0;
return( 0 );
}
n = slen / 3 + ( slen % 3 != 0 );
if( n > ( BASE64_SIZE_T_MAX - 1 ) / 4 )
{
*olen = BASE64_SIZE_T_MAX;
return( MBEDTLS_ERR_BASE64_BUFFER_TOO_SMALL );
}
n *= 4;
if( ( dlen < n + 1 ) || ( NULL == dst ) )
{
*olen = n + 1;
return( MBEDTLS_ERR_BASE64_BUFFER_TOO_SMALL );
}
n = ( slen / 3 ) * 3;
for( i = 0, p = dst; i < n; i += 3 )
{
C1 = *src++;
C2 = *src++;
C3 = *src++;
*p++ = base64_enc_map[(C1 >> 2) & 0x3F];
*p++ = base64_enc_map[(((C1 & 3) << 4) + (C2 >> 4)) & 0x3F];
*p++ = base64_enc_map[(((C2 & 15) << 2) + (C3 >> 6)) & 0x3F];
*p++ = base64_enc_map[C3 & 0x3F];
}
if( i < slen )
{
C1 = *src++;
C2 = ( ( i + 1 ) < slen ) ? *src++ : 0;
*p++ = base64_enc_map[(C1 >> 2) & 0x3F];
*p++ = base64_enc_map[(((C1 & 3) << 4) + (C2 >> 4)) & 0x3F];
if( ( i + 1 ) < slen )
*p++ = base64_enc_map[((C2 & 15) << 2) & 0x3F];
else *p++ = '=';
*p++ = '=';
}
*olen = p - dst;
*p = 0;
return( 0 );
}
/*
* Decode a base64-formatted buffer
*/
int mbedtls_base64_decode( unsigned char *dst, size_t dlen, size_t *olen,
const unsigned char *src, size_t slen )
{
size_t i, n;
uint32_t j, x;
unsigned char *p;
/* First pass: check for validity and get output length */
for( i = n = j = 0; i < slen; i++ )
{
/* Skip spaces before checking for EOL */
x = 0;
while( i < slen && src[i] == ' ' )
{
++i;
++x;
}
/* Spaces at end of buffer are OK */
if( i == slen )
break;
if( ( slen - i ) >= 2 &&
src[i] == '\r' && src[i + 1] == '\n' )
continue;
if( src[i] == '\n' )
continue;
/* Space inside a line is an error */
if( x != 0 )
return( MBEDTLS_ERR_BASE64_INVALID_CHARACTER );
if( src[i] == '=' && ++j > 2 )
return( MBEDTLS_ERR_BASE64_INVALID_CHARACTER );
if( src[i] > 127 || base64_dec_map[src[i]] == 127 )
return( MBEDTLS_ERR_BASE64_INVALID_CHARACTER );
if( base64_dec_map[src[i]] < 64 && j != 0 )
return( MBEDTLS_ERR_BASE64_INVALID_CHARACTER );
n++;
}
if( n == 0 )
{
*olen = 0;
return( 0 );
}
/* The following expression is to calculate the following formula without
* risk of integer overflow in n:
* n = ( ( n * 6 ) + 7 ) >> 3;
*/
n = ( 6 * ( n >> 3 ) ) + ( ( 6 * ( n & 0x7 ) + 7 ) >> 3 );
n -= j;
if( dst == NULL || dlen < n )
{
*olen = n;
return( MBEDTLS_ERR_BASE64_BUFFER_TOO_SMALL );
}
for( j = 3, n = x = 0, p = dst; i > 0; i--, src++ )
{
if( *src == '\r' || *src == '\n' || *src == ' ' )
continue;
j -= ( base64_dec_map[*src] == 64 );
x = ( x << 6 ) | ( base64_dec_map[*src] & 0x3F );
if( ++n == 4 )
{
n = 0;
if( j > 0 ) *p++ = (unsigned char)( x >> 16 );
if( j > 1 ) *p++ = (unsigned char)( x >> 8 );
if( j > 2 ) *p++ = (unsigned char)( x );
}
}
*olen = p - dst;
return( 0 );
}