Java编程实现支持中文的Base64编码转换
网上有很多源代码可以实现Base64编码的转换,但是主要是对中文转换的时候有问题。
后来在网上找了很多资料,发现原来在Java默认的实现机制中,内部使用UTF-16编码,而所有算法是针对英文UTF-8的。因此,在对中文字符串转换的过程中会出现截取错误。后来在源代码的基础上进行了小的调整,最好可以实现Base64编码。
下面贴出一个实现代码:
1
package
com.aostarit.idm;
2
3 import java.io.UnsupportedEncodingException;
4 import java.util.Arrays;
5
6 /** */ /**
7 * A very fast and memory efficient class to encode and decode to and from
8 * BASE64 in full accordance with RFC 2045.<br>
9 * <br>
10 * On Windows XP sp1 with 1.4.2_04 and later ;), this encoder and decoder is
11 * about 10 times faster on small arrays (10 - 1000 bytes) and 2-3 times as fast
12 * on larger arrays (10000 - 1000000 bytes) compared to
13 * <code>sun.misc.Encoder()/Decoder()</code>.<br>
14 * <br>
15 *
16 * On byte arrays the encoder is about 20% faster than Jakarta Commons Base64
17 * Codec for encode and about 50% faster for decoding large arrays. This
18 * implementation is about twice as fast on very small arrays (< 30 bytes). If
19 * source/destination is a <code>String</code> this version is about three
20 * times as fast due to the fact that the Commons Codec result has to be recoded
21 * to a <code>String</code> from <code>byte[]</code>, which is very
22 * expensive.<br>
23 * <br>
24 *
25 * This encode/decode algorithm doesn't create any temporary arrays as many
26 * other codecs do, it only allocates the resulting array. This produces less
27 * garbage and it is possible to handle arrays twice as large as algorithms that
28 * create a temporary array. (E.g. Jakarta Commons Codec). It is unknown whether
29 * Sun's <code>sun.misc.Encoder()/Decoder()</code> produce temporary arrays
30 * but since performance is quite low it probably does.<br>
31 * <br>
32 *
33 * The encoder produces the same output as the Sun one except that the Sun's
34 * encoder appends a trailing line separator if the last character isn't a pad.
35 * Unclear why but it only adds to the length and is probably a side effect.
36 * Both are in conformance with RFC 2045 though.<br>
37 * Commons codec seem to always att a trailing line separator.<br>
38 * <br>
39 *
40 * <b>Note!</b> The encode/decode method pairs (types) come in three versions
41 * with the <b>exact</b> same algorithm and thus a lot of code redundancy. This
42 * is to not create any temporary arrays for transcoding to/from different
43 * format types. The methods not used can simply be commented out.<br>
44 * <br>
45 *
46 * There is also a "fast" version of all decode methods that works the same way
47 * as the normal ones, but har a few demands on the decoded input. Normally
48 * though, these fast verions should be used if the source if the input is known
49 * and it hasn't bee tampered with.<br>
50 * <br>
51 *
52 * If you find the code useful or you find a bug, please send me a note at
53 * base64 @ miginfocom . com.
54 *
55 * Licence (BSD): ==============
56 *
57 * Copyright (c) 2004, Mikael Grev, MiG InfoCom AB. (base64 @ miginfocom . com)
58 * All rights reserved.
59 *
60 * Redistribution and use in source and binary forms, with or without
61 * modification, are permitted provided that the following conditions are met:
62 * Redistributions of source code must retain the above copyright notice, this
63 * list of conditions and the following disclaimer. Redistributions in binary
64 * form must reproduce the above copyright notice, this list of conditions and
65 * the following disclaimer in the documentation and/or other materials provided
66 * with the distribution. Neither the name of the MiG InfoCom AB nor the names
67 * of its contributors may be used to endorse or promote products derived from
68 * this software without specific prior written permission.
69 *
70 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
71 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
72 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
73 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
74 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
75 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
76 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
77 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
78 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
79 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
80 * POSSIBILITY OF SUCH DAMAGE.
81 *
82 * @version 2.2
83 * @author Mikael Grev Date: 2004-aug-02 Time: 11:31:11
84 */
85
86 public class Base64 {
87 private static final char[] CA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
88 .toCharArray();
89 private static final int[] IA = new int[256];
90 static {
91 Arrays.fill(IA, -1);
92 for (int i = 0, iS = CA.length; i < iS; i++)
93 IA[CA[i]] = i;
94 IA['='] = 0;
95 }
96
97 // ****************************************************************************************
98 // * char[] version
99 // ****************************************************************************************
100
101 /** *//**
102 * Encodes a raw byte array into a BASE64 <code>char[]</code>
103 * representation i accordance with RFC 2045.
104 *
105 * @param sArr
106 * The bytes to convert. If <code>null</code> or length 0 an
107 * empty array will be returned.
108 * @param lineSep
109 * Optional "\r\n" after 76 characters, unless end of file.<br>
110 * No line separator will be in breach of RFC 2045 which
111 * specifies max 76 per line but will be a little faster.
112 * @return A BASE64 encoded array. Never <code>null</code>.
113 */
114 public final static char[] encodeToChar(byte[] sArr, boolean lineSep) {
115 // Check special case
116 int sLen = sArr != null ? sArr.length : 0;
117 if (sLen == 0)
118 return new char[0];
119
120 int eLen = (sLen / 3) * 3; // Length of even 24-bits.
121 int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count
122 int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of
123 // returned
124 // array
125 char[] dArr = new char[dLen];
126
127 // Encode even 24-bits
128 for (int s = 0, d = 0, cc = 0; s < eLen;) {
129 // Copy next three bytes into lower 24 bits of int, paying attension
130 // to sign.
131 int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8
132 | (sArr[s++] & 0xff);
133
134 // Encode the int into four chars
135 dArr[d++] = CA[(i >>> 18) & 0x3f];
136 dArr[d++] = CA[(i >>> 12) & 0x3f];
137 dArr[d++] = CA[(i >>> 6) & 0x3f];
138 dArr[d++] = CA[i & 0x3f];
139
140 // Add optional line separator
141 if (lineSep && ++cc == 19 && d < dLen - 2) {
142 dArr[d++] = '\r';
143 dArr[d++] = '\n';
144 cc = 0;
145 }
146 }
147
148 // Pad and encode last bits if source isn't even 24 bits.
149 int left = sLen - eLen; // 0 - 2.
150 if (left > 0) {
151 // Prepare the int
152 int i = ((sArr[eLen] & 0xff) << 10)
153 | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0);
154
155 // Set last four chars
156 dArr[dLen - 4] = CA[i >> 12];
157 dArr[dLen - 3] = CA[(i >>> 6) & 0x3f];
158 dArr[dLen - 2] = left == 2 ? CA[i & 0x3f] : '=';
159 dArr[dLen - 1] = '=';
160 }
161 return dArr;
162 }
163
164 /** *//**
165 * Decodes a BASE64 encoded char array. All illegal characters will be
166 * ignored and can handle both arrays with and without line separators.
167 *
168 * @param sArr
169 * The source array. <code>null</code> or length 0 will return
170 * an empty array.
171 * @return The decoded array of bytes. May be of length 0. Will be
172 * <code>null</code> if the legal characters (including '=') isn't
173 * divideable by 4. (I.e. definitely corrupted).
174 */
175 public final static byte[] decode(char[] sArr) {
176 // Check special case
177 int sLen = sArr != null ? sArr.length : 0;
178 if (sLen == 0)
179 return new byte[0];
180
181 // Count illegal characters (including '\r', '\n') to know what size the
182 // returned array will be,
183 // so we don't have to reallocate & copy it later.
184 int sepCnt = 0; // Number of separator characters. (Actually illegal
185 // characters, but that's a bonus)
186 for (int i = 0; i < sLen; i++)
187 // If input is "pure" (I.e. no line separators or illegal chars)
188 // base64 this loop can be commented out.
189 if (IA[sArr[i]] < 0)
190 sepCnt++;
191
192 // Check so that legal chars (including '=') are evenly divideable by 4
193 // as specified in RFC 2045.
194 if ((sLen - sepCnt) % 4 != 0)
195 return null;
196
197 int pad = 0;
198 for (int i = sLen; i > 1 && IA[sArr[--i]] <= 0;)
199 if (sArr[i] == '=')
200 pad++;
201
202 int len = ((sLen - sepCnt) * 6 >> 3) - pad;
203
204 byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
205
206 for (int s = 0, d = 0; d < len;) {
207 // Assemble three bytes into an int from four "valid" characters.
208 int i = 0;
209 for (int j = 0; j < 4; j++) { // j only increased if a valid char
210 // was found.
211 int c = IA[sArr[s++]];
212 if (c >= 0)
213 i |= c << (18 - j * 6);
214 else
215 j--;
216 }
217 // Add the bytes
218 dArr[d++] = (byte) (i >> 16);
219 if (d < len) {
220 dArr[d++] = (byte) (i >> 8);
221 if (d < len)
222 dArr[d++] = (byte) i;
223 }
224 }
225 return dArr;
226 }
227
228 /** *//**
229 * Decodes a BASE64 encoded char array that is known to be resonably well
230 * formatted. The method is about twice as fast as {@link #decode(char[])}.
231 * The preconditions are:<br> + The array must have a line length of 76
232 * chars OR no line separators at all (one line).<br> + Line separator must
233 * be "\r\n", as specified in RFC 2045 + The array must not contain illegal
234 * characters within the encoded string<br> + The array CAN have illegal
235 * characters at the beginning and end, those will be dealt with
236 * appropriately.<br>
237 *
238 * @param sArr
239 * The source array. Length 0 will return an empty array.
240 * <code>null</code> will throw an exception.
241 * @return The decoded array of bytes. May be of length 0.
242 */
243 public final static byte[] decodeFast(char[] sArr) {
244 // Check special case
245 int sLen = sArr.length;
246 if (sLen == 0)
247 return new byte[0];
248
249 int sIx = 0, eIx = sLen - 1; // Start and end index after trimming.
250
251 // Trim illegal chars from start
252 while (sIx < eIx && IA[sArr[sIx]] < 0)
253 sIx++;
254
255 // Trim illegal chars from end
256 while (eIx > 0 && IA[sArr[eIx]] < 0)
257 eIx--;
258
259 // get the padding count (=) (0, 1 or 2)
260 int pad = sArr[eIx] == '=' ? (sArr[eIx - 1] == '=' ? 2 : 1) : 0; // Count
261 // '='
262 // at
263 // end.
264 int cCnt = eIx - sIx + 1; // Content count including possible
265 // separators
266 int sepCnt = sLen > 76 ? (sArr[76] == '\r' ? cCnt / 78 : 0) << 1 : 0;
267
268 int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded
269 // bytes
270 byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
271
272 // Decode all but the last 0 - 2 bytes.
273 int d = 0;
274 for (int cc = 0, eLen = (len / 3) * 3; d < eLen;) {
275 // Assemble three bytes into an int from four "valid" characters.
276 int i = IA[sArr[sIx++]] << 18 | IA[sArr[sIx++]] << 12
277 | IA[sArr[sIx++]] << 6 | IA[sArr[sIx++]];
278
279 // Add the bytes
280 dArr[d++] = (byte) (i >> 16);
281 dArr[d++] = (byte) (i >> 8);
282 dArr[d++] = (byte) i;
283
284 // If line separator, jump over it.
285 if (sepCnt > 0 && ++cc == 19) {
286 sIx += 2;
287 cc = 0;
288 }
289 }
290
291 if (d < len) {
292 // Decode last 1-3 bytes (incl '=') into 1-3 bytes
293 int i = 0;
294 for (int j = 0; sIx <= eIx - pad; j++)
295 i |= IA[sArr[sIx++]] << (18 - j * 6);
296
297 for (int r = 16; d < len; r -= 8)
298 dArr[d++] = (byte) (i >> r);
299 }
300
301 return dArr;
302 }
303
304 // ****************************************************************************************
305 // * byte[] version
306 // ****************************************************************************************
307
308 /** *//**
309 * Encodes a raw byte array into a BASE64 <code>byte[]</code>
310 * representation i accordance with RFC 2045.
311 *
312 * @param sArr
313 * The bytes to convert. If <code>null</code> or length 0 an
314 * empty array will be returned.
315 * @param lineSep
316 * Optional "\r\n" after 76 characters, unless end of file.<br>
317 * No line separator will be in breach of RFC 2045 which
318 * specifies max 76 per line but will be a little faster.
319 * @return A BASE64 encoded array. Never <code>null</code>.
320 */
321 public final static byte[] encodeToByte(byte[] sArr, boolean lineSep) {
322 // Check special case
323 int sLen = sArr != null ? sArr.length : 0;
324 if (sLen == 0)
325 return new byte[0];
326
327 int eLen = (sLen / 3) * 3; // Length of even 24-bits.
328 int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count
329 int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of
330 // returned
331 // array
332 byte[] dArr = new byte[dLen];
333
334 // Encode even 24-bits
335 for (int s = 0, d = 0, cc = 0; s < eLen;) {
336 // Copy next three bytes into lower 24 bits of int, paying attension
337 // to sign.
338 int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8
339 | (sArr[s++] & 0xff);
340
341 // Encode the int into four chars
342 dArr[d++] = (byte) CA[(i >>> 18) & 0x3f];
343 dArr[d++] = (byte) CA[(i >>> 12) & 0x3f];
344 dArr[d++] = (byte) CA[(i >>> 6) & 0x3f];
345 dArr[d++] = (byte) CA[i & 0x3f];
346
347 // Add optional line separator
348 if (lineSep && ++cc == 19 && d < dLen - 2) {
349 dArr[d++] = '\r';
350 dArr[d++] = '\n';
351 cc = 0;
352 }
353 }
354
355 // Pad and encode last bits if source isn't an even 24 bits.
356 int left = sLen - eLen; // 0 - 2.
357 if (left > 0) {
358 // Prepare the int
359 int i = ((sArr[eLen] & 0xff) << 10)
360 | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0);
361
362 // Set last four chars
363 dArr[dLen - 4] = (byte) CA[i >> 12];
364 dArr[dLen - 3] = (byte) CA[(i >>> 6) & 0x3f];
365 dArr[dLen - 2] = left == 2 ? (byte) CA[i & 0x3f] : (byte) '=';
366 dArr[dLen - 1] = '=';
367 }
368 return dArr;
369 }
370
371 /** *//**
372 * Decodes a BASE64 encoded byte array. All illegal characters will be
373 * ignored and can handle both arrays with and without line separators.
374 *
375 * @param sArr
376 * The source array. Length 0 will return an empty array.
377 * <code>null</code> will throw an exception.
378 * @return The decoded array of bytes. May be of length 0. Will be
379 * <code>null</code> if the legal characters (including '=') isn't
380 * divideable by 4. (I.e. definitely corrupted).
381 */
382 public final static byte[] decode(byte[] sArr) {
383 // Check special case
384 int sLen = sArr.length;
385
386 // Count illegal characters (including '\r', '\n') to know what size the
387 // returned array will be,
388 // so we don't have to reallocate & copy it later.
389 int sepCnt = 0; // Number of separator characters. (Actually illegal
390 // characters, but that's a bonus)
391 for (int i = 0; i < sLen; i++)
392 // If input is "pure" (I.e. no line separators or illegal chars)
393 // base64 this loop can be commented out.
394 if (IA[sArr[i] & 0xff] < 0)
395 sepCnt++;
396
397 // Check so that legal chars (including '=') are evenly divideable by 4
398 // as specified in RFC 2045.
399 if ((sLen - sepCnt) % 4 != 0)
400 return null;
401
402 int pad = 0;
403 for (int i = sLen; i > 1 && IA[sArr[--i] & 0xff] <= 0;)
404 if (sArr[i] == '=')
405 pad++;
406
407 int len = ((sLen - sepCnt) * 6 >> 3) - pad;
408
409 byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
410
411 for (int s = 0, d = 0; d < len;) {
412 // Assemble three bytes into an int from four "valid" characters.
413 int i = 0;
414 for (int j = 0; j < 4; j++) { // j only increased if a valid char
415 // was found.
416 int c = IA[sArr[s++] & 0xff];
417 if (c >= 0)
418 i |= c << (18 - j * 6);
419 else
420 j--;
421 }
422
423 // Add the bytes
424 dArr[d++] = (byte) (i >> 16);
425 if (d < len) {
426 dArr[d++] = (byte) (i >> 8);
427 if (d < len)
428 dArr[d++] = (byte) i;
429 }
430 }
431
432 return dArr;
433 }
434
435 /** *//**
436 * Decodes a BASE64 encoded byte array that is known to be resonably well
437 * formatted. The method is about twice as fast as {@link #decode(byte[])}.
438 * The preconditions are:<br> + The array must have a line length of 76
439 * chars OR no line separators at all (one line).<br> + Line separator must
440 * be "\r\n", as specified in RFC 2045 + The array must not contain illegal
441 * characters within the encoded string<br> + The array CAN have illegal
442 * characters at the beginning and end, those will be dealt with
443 * appropriately.<br>
444 *
445 * @param sArr
446 * The source array. Length 0 will return an empty array.
447 * <code>null</code> will throw an exception.
448 * @return The decoded array of bytes. May be of length 0.
449 */
450 public final static byte[] decodeFast(byte[] sArr) {
451 // Check special case
452 int sLen = sArr.length;
453 if (sLen == 0)
454 return new byte[0];
455
456 int sIx = 0, eIx = sLen - 1; // Start and end index after trimming.
457
458 // Trim illegal chars from start
459 while (sIx < eIx && IA[sArr[sIx] & 0xff] < 0)
460 sIx++;
461
462 // Trim illegal chars from end
463 while (eIx > 0 && IA[sArr[eIx] & 0xff] < 0)
464 eIx--;
465
466 // get the padding count (=) (0, 1 or 2)
467 int pad = sArr[eIx] == '=' ? (sArr[eIx - 1] == '=' ? 2 : 1) : 0; // Count
468 // '='
469 // at
470 // end.
471 int cCnt = eIx - sIx + 1; // Content count including possible
472 // separators
473 int sepCnt = sLen > 76 ? (sArr[76] == '\r' ? cCnt / 78 : 0) << 1 : 0;
474
475 int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded
476 // bytes
477 byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
478
479 // Decode all but the last 0 - 2 bytes.
480 int d = 0;
481 for (int cc = 0, eLen = (len / 3) * 3; d < eLen;) {
482 // Assemble three bytes into an int from four "valid" characters.
483 int i = IA[sArr[sIx++]] << 18 | IA[sArr[sIx++]] << 12
484 | IA[sArr[sIx++]] << 6 | IA[sArr[sIx++]];
485
486 // Add the bytes
487 dArr[d++] = (byte) (i >> 16);
488 dArr[d++] = (byte) (i >> 8);
489 dArr[d++] = (byte) i;
490
491 // If line separator, jump over it.
492 if (sepCnt > 0 && ++cc == 19) {
493 sIx += 2;
494 cc = 0;
495 }
496 }
497
498 if (d < len) {
499 // Decode last 1-3 bytes (incl '=') into 1-3 bytes
500 int i = 0;
501 for (int j = 0; sIx <= eIx - pad; j++)
502 i |= IA[sArr[sIx++]] << (18 - j * 6);
503
504 for (int r = 16; d < len; r -= 8)
505 dArr[d++] = (byte) (i >> r);
506 }
507
508 return dArr;
509 }
510
511 // ****************************************************************************************
512 // * String version
513 // ****************************************************************************************
514
515 /** *//**
516 * Encodes a raw byte array into a BASE64 <code>String</code>
517 * representation i accordance with RFC 2045.
518 *
519 * @param sArr
520 * The bytes to convert. If <code>null</code> or length 0 an
521 * empty array will be returned.
522 * @param lineSep
523 * Optional "\r\n" after 76 characters, unless end of file.<br>
524 * No line separator will be in breach of RFC 2045 which
525 * specifies max 76 per line but will be a little faster.
526 * @return A BASE64 encoded array. Never <code>null</code>.
527 */
528 public final static String encodeToString(byte[] sArr, boolean lineSep) {
529 // Reuse char[] since we can't create a String incrementally anyway and
530 // StringBuffer/Builder would be slower.
531 return new String(encodeToChar(sArr, lineSep));
532 }
533
534 public final static String encode(String s) {
535 // Reuse char[] since we can't create a String incrementally anyway and
536 // StringBuffer/Builder would be slower.
537 try {
538 return new String(encodeToChar(s.getBytes("UTF-8"), false));
539 } catch (UnsupportedEncodingException e) {
540 System.err.println("Base64 encoding error: " + e.getMessage());
541 e.printStackTrace();
542 }
543 return null;
544 }
545
546 /** *//**
547 * Decodes a BASE64 encoded <code>String</code>. All illegal characters
548 * will be ignored and can handle both strings with and without line
549 * separators.<br>
550 * <b>Note!</b> It can be up to about 2x the speed to call
551 * <code>decode(str.toCharArray())</code> instead. That will create a
552 * temporary array though. This version will use <code>str.charAt(i)</code>
553 * to iterate the string.
554 *
555 * @param str
556 * The source string. <code>null</code> or length 0 will return
557 * an empty array.
558 * @return The decoded array of bytes. May be of length 0. Will be
559 * <code>null</code> if the legal characters (including '=') isn't
560 * divideable by 4. (I.e. definitely corrupted).
561 */
562 public final static byte[] decode(String str, boolean used) {
563 // Check special case
564 int sLen = str != null ? str.length() : 0;
565 if (sLen == 0)
566 return new byte[0];
567
568 // Count illegal characters (including '\r', '\n') to know what size the
569 // returned array will be,
570 // so we don't have to reallocate & copy it later.
571 int sepCnt = 0; // Number of separator characters. (Actually illegal
572 // characters, but that's a bonus)
573 for (int i = 0; i < sLen; i++)
574 // If input is "pure" (I.e. no line separators or illegal chars)
575 // base64 this loop can be commented out.
576 if (IA[str.charAt(i)] < 0)
577 sepCnt++;
578
579 // Check so that legal chars (including '=') are evenly divideable by 4
580 // as specified in RFC 2045.
581 if ((sLen - sepCnt) % 4 != 0)
582 return null;
583
584 // Count '=' at end
585 int pad = 0;
586 for (int i = sLen; i > 1 && IA[str.charAt(--i)] <= 0;)
587 if (str.charAt(i) == '=')
588 pad++;
589
590 int len = ((sLen - sepCnt) * 6 >> 3) - pad;
591
592 byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
593
594 for (int s = 0, d = 0; d < len;) {
595 // Assemble three bytes into an int from four "valid" characters.
596 int i = 0;
597 for (int j = 0; j < 4; j++) { // j only increased if a valid char
598 // was found.
599 int c = IA[str.charAt(s++)];
600 if (c >= 0)
601 i |= c << (18 - j * 6);
602 else
603 j--;
604 }
605 // Add the bytes
606 dArr[d++] = (byte) (i >> 16);
607 if (d < len) {
608 dArr[d++] = (byte) (i >> 8);
609 if (d < len)
610 dArr[d++] = (byte) i;
611 }
612 }
613 return dArr;
614 }
615
616 /** *//**
617 * Decodes a BASE64 encoded string that is known to be resonably well
618 * formatted. The method is about twice as fast as {@link #decode(String)}.
619 * The preconditions are:<br> + The array must have a line length of 76
620 * chars OR no line separators at all (one line).<br> + Line separator must
621 * be "\r\n", as specified in RFC 2045 + The array must not contain illegal
622 * characters within the encoded string<br> + The array CAN have illegal
623 * characters at the beginning and end, those will be dealt with
624 * appropriately.<br>
625 *
626 * @param s
627 * The source string. Length 0 will return an empty array.
628 * <code>null</code> will throw an exception.
629 * @return The decoded array of bytes. May be of length 0.
630 */
631 public final static byte[] decodeFast(String s) {
632 // Check special case
633 int sLen = s.length();
634 if (sLen == 0)
635 return new byte[0];
636
637 int sIx = 0, eIx = sLen - 1; // Start and end index after trimming.
638
639 // Trim illegal chars from start
640 while (sIx < eIx && IA[s.charAt(sIx) & 0xff] < 0)
641 sIx++;
642
643 // Trim illegal chars from end
644 while (eIx > 0 && IA[s.charAt(eIx) & 0xff] < 0)
645 eIx--;
646
647 // get the padding count (=) (0, 1 or 2)
648 int pad = s.charAt(eIx) == '=' ? (s.charAt(eIx - 1) == '=' ? 2 : 1) : 0; // Count
649 // '='
650 // at
651 // end.
652 int cCnt = eIx - sIx + 1; // Content count including possible
653 // separators
654 int sepCnt = sLen > 76 ? (s.charAt(76) == '\r' ? cCnt / 78 : 0) << 1
655 : 0;
656
657 int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded
658 // bytes
659 byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
660
661 // Decode all but the last 0 - 2 bytes.
662 int d = 0;
663 for (int cc = 0, eLen = (len / 3) * 3; d < eLen;) {
664 // Assemble three bytes into an int from four "valid" characters.
665 int i = IA[s.charAt(sIx++)] << 18 | IA[s.charAt(sIx++)] << 12
666 | IA[s.charAt(sIx++)] << 6 | IA[s.charAt(sIx++)];
667
668 // Add the bytes
669 dArr[d++] = (byte) (i >> 16);
670 dArr[d++] = (byte) (i >> 8);
671 dArr[d++] = (byte) i;
672
673 // If line separator, jump over it.
674 if (sepCnt > 0 && ++cc == 19) {
675 sIx += 2;
676 cc = 0;
677 }
678 }
679
680 if (d < len) {
681 // Decode last 1-3 bytes (incl '=') into 1-3 bytes
682 int i = 0;
683 for (int j = 0; sIx <= eIx - pad; j++)
684 i |= IA[s.charAt(sIx++)] << (18 - j * 6);
685
686 for (int r = 16; d < len; r -= 8)
687 dArr[d++] = (byte) (i >> r);
688 }
689
690 return dArr;
691 }
692
693 public static String decode(String s) throws UnsupportedEncodingException {
694 return new String(Base64.decodeFast(s), "UTF-8");
695 }
696
697 public static void main(String[] args) throws UnsupportedEncodingException {
698 String s = "测试账户";
699 String encodeS = Base64.encode(s);
700 System.out.println(encodeS);
701 System.out.println(Base64.decode(encodeS));
702 }
703}
704
2
3 import java.io.UnsupportedEncodingException;
4 import java.util.Arrays;
5
6 /** */ /**
7 * A very fast and memory efficient class to encode and decode to and from
8 * BASE64 in full accordance with RFC 2045.<br>
9 * <br>
10 * On Windows XP sp1 with 1.4.2_04 and later ;), this encoder and decoder is
11 * about 10 times faster on small arrays (10 - 1000 bytes) and 2-3 times as fast
12 * on larger arrays (10000 - 1000000 bytes) compared to
13 * <code>sun.misc.Encoder()/Decoder()</code>.<br>
14 * <br>
15 *
16 * On byte arrays the encoder is about 20% faster than Jakarta Commons Base64
17 * Codec for encode and about 50% faster for decoding large arrays. This
18 * implementation is about twice as fast on very small arrays (< 30 bytes). If
19 * source/destination is a <code>String</code> this version is about three
20 * times as fast due to the fact that the Commons Codec result has to be recoded
21 * to a <code>String</code> from <code>byte[]</code>, which is very
22 * expensive.<br>
23 * <br>
24 *
25 * This encode/decode algorithm doesn't create any temporary arrays as many
26 * other codecs do, it only allocates the resulting array. This produces less
27 * garbage and it is possible to handle arrays twice as large as algorithms that
28 * create a temporary array. (E.g. Jakarta Commons Codec). It is unknown whether
29 * Sun's <code>sun.misc.Encoder()/Decoder()</code> produce temporary arrays
30 * but since performance is quite low it probably does.<br>
31 * <br>
32 *
33 * The encoder produces the same output as the Sun one except that the Sun's
34 * encoder appends a trailing line separator if the last character isn't a pad.
35 * Unclear why but it only adds to the length and is probably a side effect.
36 * Both are in conformance with RFC 2045 though.<br>
37 * Commons codec seem to always att a trailing line separator.<br>
38 * <br>
39 *
40 * <b>Note!</b> The encode/decode method pairs (types) come in three versions
41 * with the <b>exact</b> same algorithm and thus a lot of code redundancy. This
42 * is to not create any temporary arrays for transcoding to/from different
43 * format types. The methods not used can simply be commented out.<br>
44 * <br>
45 *
46 * There is also a "fast" version of all decode methods that works the same way
47 * as the normal ones, but har a few demands on the decoded input. Normally
48 * though, these fast verions should be used if the source if the input is known
49 * and it hasn't bee tampered with.<br>
50 * <br>
51 *
52 * If you find the code useful or you find a bug, please send me a note at
53 * base64 @ miginfocom . com.
54 *
55 * Licence (BSD): ==============
56 *
57 * Copyright (c) 2004, Mikael Grev, MiG InfoCom AB. (base64 @ miginfocom . com)
58 * All rights reserved.
59 *
60 * Redistribution and use in source and binary forms, with or without
61 * modification, are permitted provided that the following conditions are met:
62 * Redistributions of source code must retain the above copyright notice, this
63 * list of conditions and the following disclaimer. Redistributions in binary
64 * form must reproduce the above copyright notice, this list of conditions and
65 * the following disclaimer in the documentation and/or other materials provided
66 * with the distribution. Neither the name of the MiG InfoCom AB nor the names
67 * of its contributors may be used to endorse or promote products derived from
68 * this software without specific prior written permission.
69 *
70 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
71 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
72 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
73 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
74 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
75 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
76 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
77 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
78 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
79 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
80 * POSSIBILITY OF SUCH DAMAGE.
81 *
82 * @version 2.2
83 * @author Mikael Grev Date: 2004-aug-02 Time: 11:31:11
84 */
85
86 public class Base64 {
87 private static final char[] CA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
88 .toCharArray();
89 private static final int[] IA = new int[256];
90 static {
91 Arrays.fill(IA, -1);
92 for (int i = 0, iS = CA.length; i < iS; i++)
93 IA[CA[i]] = i;
94 IA['='] = 0;
95 }
96
97 // ****************************************************************************************
98 // * char[] version
99 // ****************************************************************************************
100
101 /** *//**
102 * Encodes a raw byte array into a BASE64 <code>char[]</code>
103 * representation i accordance with RFC 2045.
104 *
105 * @param sArr
106 * The bytes to convert. If <code>null</code> or length 0 an
107 * empty array will be returned.
108 * @param lineSep
109 * Optional "\r\n" after 76 characters, unless end of file.<br>
110 * No line separator will be in breach of RFC 2045 which
111 * specifies max 76 per line but will be a little faster.
112 * @return A BASE64 encoded array. Never <code>null</code>.
113 */
114 public final static char[] encodeToChar(byte[] sArr, boolean lineSep) {
115 // Check special case
116 int sLen = sArr != null ? sArr.length : 0;
117 if (sLen == 0)
118 return new char[0];
119
120 int eLen = (sLen / 3) * 3; // Length of even 24-bits.
121 int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count
122 int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of
123 // returned
124 // array
125 char[] dArr = new char[dLen];
126
127 // Encode even 24-bits
128 for (int s = 0, d = 0, cc = 0; s < eLen;) {
129 // Copy next three bytes into lower 24 bits of int, paying attension
130 // to sign.
131 int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8
132 | (sArr[s++] & 0xff);
133
134 // Encode the int into four chars
135 dArr[d++] = CA[(i >>> 18) & 0x3f];
136 dArr[d++] = CA[(i >>> 12) & 0x3f];
137 dArr[d++] = CA[(i >>> 6) & 0x3f];
138 dArr[d++] = CA[i & 0x3f];
139
140 // Add optional line separator
141 if (lineSep && ++cc == 19 && d < dLen - 2) {
142 dArr[d++] = '\r';
143 dArr[d++] = '\n';
144 cc = 0;
145 }
146 }
147
148 // Pad and encode last bits if source isn't even 24 bits.
149 int left = sLen - eLen; // 0 - 2.
150 if (left > 0) {
151 // Prepare the int
152 int i = ((sArr[eLen] & 0xff) << 10)
153 | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0);
154
155 // Set last four chars
156 dArr[dLen - 4] = CA[i >> 12];
157 dArr[dLen - 3] = CA[(i >>> 6) & 0x3f];
158 dArr[dLen - 2] = left == 2 ? CA[i & 0x3f] : '=';
159 dArr[dLen - 1] = '=';
160 }
161 return dArr;
162 }
163
164 /** *//**
165 * Decodes a BASE64 encoded char array. All illegal characters will be
166 * ignored and can handle both arrays with and without line separators.
167 *
168 * @param sArr
169 * The source array. <code>null</code> or length 0 will return
170 * an empty array.
171 * @return The decoded array of bytes. May be of length 0. Will be
172 * <code>null</code> if the legal characters (including '=') isn't
173 * divideable by 4. (I.e. definitely corrupted).
174 */
175 public final static byte[] decode(char[] sArr) {
176 // Check special case
177 int sLen = sArr != null ? sArr.length : 0;
178 if (sLen == 0)
179 return new byte[0];
180
181 // Count illegal characters (including '\r', '\n') to know what size the
182 // returned array will be,
183 // so we don't have to reallocate & copy it later.
184 int sepCnt = 0; // Number of separator characters. (Actually illegal
185 // characters, but that's a bonus)
186 for (int i = 0; i < sLen; i++)
187 // If input is "pure" (I.e. no line separators or illegal chars)
188 // base64 this loop can be commented out.
189 if (IA[sArr[i]] < 0)
190 sepCnt++;
191
192 // Check so that legal chars (including '=') are evenly divideable by 4
193 // as specified in RFC 2045.
194 if ((sLen - sepCnt) % 4 != 0)
195 return null;
196
197 int pad = 0;
198 for (int i = sLen; i > 1 && IA[sArr[--i]] <= 0;)
199 if (sArr[i] == '=')
200 pad++;
201
202 int len = ((sLen - sepCnt) * 6 >> 3) - pad;
203
204 byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
205
206 for (int s = 0, d = 0; d < len;) {
207 // Assemble three bytes into an int from four "valid" characters.
208 int i = 0;
209 for (int j = 0; j < 4; j++) { // j only increased if a valid char
210 // was found.
211 int c = IA[sArr[s++]];
212 if (c >= 0)
213 i |= c << (18 - j * 6);
214 else
215 j--;
216 }
217 // Add the bytes
218 dArr[d++] = (byte) (i >> 16);
219 if (d < len) {
220 dArr[d++] = (byte) (i >> 8);
221 if (d < len)
222 dArr[d++] = (byte) i;
223 }
224 }
225 return dArr;
226 }
227
228 /** *//**
229 * Decodes a BASE64 encoded char array that is known to be resonably well
230 * formatted. The method is about twice as fast as {@link #decode(char[])}.
231 * The preconditions are:<br> + The array must have a line length of 76
232 * chars OR no line separators at all (one line).<br> + Line separator must
233 * be "\r\n", as specified in RFC 2045 + The array must not contain illegal
234 * characters within the encoded string<br> + The array CAN have illegal
235 * characters at the beginning and end, those will be dealt with
236 * appropriately.<br>
237 *
238 * @param sArr
239 * The source array. Length 0 will return an empty array.
240 * <code>null</code> will throw an exception.
241 * @return The decoded array of bytes. May be of length 0.
242 */
243 public final static byte[] decodeFast(char[] sArr) {
244 // Check special case
245 int sLen = sArr.length;
246 if (sLen == 0)
247 return new byte[0];
248
249 int sIx = 0, eIx = sLen - 1; // Start and end index after trimming.
250
251 // Trim illegal chars from start
252 while (sIx < eIx && IA[sArr[sIx]] < 0)
253 sIx++;
254
255 // Trim illegal chars from end
256 while (eIx > 0 && IA[sArr[eIx]] < 0)
257 eIx--;
258
259 // get the padding count (=) (0, 1 or 2)
260 int pad = sArr[eIx] == '=' ? (sArr[eIx - 1] == '=' ? 2 : 1) : 0; // Count
261 // '='
262 // at
263 // end.
264 int cCnt = eIx - sIx + 1; // Content count including possible
265 // separators
266 int sepCnt = sLen > 76 ? (sArr[76] == '\r' ? cCnt / 78 : 0) << 1 : 0;
267
268 int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded
269 // bytes
270 byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
271
272 // Decode all but the last 0 - 2 bytes.
273 int d = 0;
274 for (int cc = 0, eLen = (len / 3) * 3; d < eLen;) {
275 // Assemble three bytes into an int from four "valid" characters.
276 int i = IA[sArr[sIx++]] << 18 | IA[sArr[sIx++]] << 12
277 | IA[sArr[sIx++]] << 6 | IA[sArr[sIx++]];
278
279 // Add the bytes
280 dArr[d++] = (byte) (i >> 16);
281 dArr[d++] = (byte) (i >> 8);
282 dArr[d++] = (byte) i;
283
284 // If line separator, jump over it.
285 if (sepCnt > 0 && ++cc == 19) {
286 sIx += 2;
287 cc = 0;
288 }
289 }
290
291 if (d < len) {
292 // Decode last 1-3 bytes (incl '=') into 1-3 bytes
293 int i = 0;
294 for (int j = 0; sIx <= eIx - pad; j++)
295 i |= IA[sArr[sIx++]] << (18 - j * 6);
296
297 for (int r = 16; d < len; r -= 8)
298 dArr[d++] = (byte) (i >> r);
299 }
300
301 return dArr;
302 }
303
304 // ****************************************************************************************
305 // * byte[] version
306 // ****************************************************************************************
307
308 /** *//**
309 * Encodes a raw byte array into a BASE64 <code>byte[]</code>
310 * representation i accordance with RFC 2045.
311 *
312 * @param sArr
313 * The bytes to convert. If <code>null</code> or length 0 an
314 * empty array will be returned.
315 * @param lineSep
316 * Optional "\r\n" after 76 characters, unless end of file.<br>
317 * No line separator will be in breach of RFC 2045 which
318 * specifies max 76 per line but will be a little faster.
319 * @return A BASE64 encoded array. Never <code>null</code>.
320 */
321 public final static byte[] encodeToByte(byte[] sArr, boolean lineSep) {
322 // Check special case
323 int sLen = sArr != null ? sArr.length : 0;
324 if (sLen == 0)
325 return new byte[0];
326
327 int eLen = (sLen / 3) * 3; // Length of even 24-bits.
328 int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count
329 int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of
330 // returned
331 // array
332 byte[] dArr = new byte[dLen];
333
334 // Encode even 24-bits
335 for (int s = 0, d = 0, cc = 0; s < eLen;) {
336 // Copy next three bytes into lower 24 bits of int, paying attension
337 // to sign.
338 int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8
339 | (sArr[s++] & 0xff);
340
341 // Encode the int into four chars
342 dArr[d++] = (byte) CA[(i >>> 18) & 0x3f];
343 dArr[d++] = (byte) CA[(i >>> 12) & 0x3f];
344 dArr[d++] = (byte) CA[(i >>> 6) & 0x3f];
345 dArr[d++] = (byte) CA[i & 0x3f];
346
347 // Add optional line separator
348 if (lineSep && ++cc == 19 && d < dLen - 2) {
349 dArr[d++] = '\r';
350 dArr[d++] = '\n';
351 cc = 0;
352 }
353 }
354
355 // Pad and encode last bits if source isn't an even 24 bits.
356 int left = sLen - eLen; // 0 - 2.
357 if (left > 0) {
358 // Prepare the int
359 int i = ((sArr[eLen] & 0xff) << 10)
360 | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0);
361
362 // Set last four chars
363 dArr[dLen - 4] = (byte) CA[i >> 12];
364 dArr[dLen - 3] = (byte) CA[(i >>> 6) & 0x3f];
365 dArr[dLen - 2] = left == 2 ? (byte) CA[i & 0x3f] : (byte) '=';
366 dArr[dLen - 1] = '=';
367 }
368 return dArr;
369 }
370
371 /** *//**
372 * Decodes a BASE64 encoded byte array. All illegal characters will be
373 * ignored and can handle both arrays with and without line separators.
374 *
375 * @param sArr
376 * The source array. Length 0 will return an empty array.
377 * <code>null</code> will throw an exception.
378 * @return The decoded array of bytes. May be of length 0. Will be
379 * <code>null</code> if the legal characters (including '=') isn't
380 * divideable by 4. (I.e. definitely corrupted).
381 */
382 public final static byte[] decode(byte[] sArr) {
383 // Check special case
384 int sLen = sArr.length;
385
386 // Count illegal characters (including '\r', '\n') to know what size the
387 // returned array will be,
388 // so we don't have to reallocate & copy it later.
389 int sepCnt = 0; // Number of separator characters. (Actually illegal
390 // characters, but that's a bonus)
391 for (int i = 0; i < sLen; i++)
392 // If input is "pure" (I.e. no line separators or illegal chars)
393 // base64 this loop can be commented out.
394 if (IA[sArr[i] & 0xff] < 0)
395 sepCnt++;
396
397 // Check so that legal chars (including '=') are evenly divideable by 4
398 // as specified in RFC 2045.
399 if ((sLen - sepCnt) % 4 != 0)
400 return null;
401
402 int pad = 0;
403 for (int i = sLen; i > 1 && IA[sArr[--i] & 0xff] <= 0;)
404 if (sArr[i] == '=')
405 pad++;
406
407 int len = ((sLen - sepCnt) * 6 >> 3) - pad;
408
409 byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
410
411 for (int s = 0, d = 0; d < len;) {
412 // Assemble three bytes into an int from four "valid" characters.
413 int i = 0;
414 for (int j = 0; j < 4; j++) { // j only increased if a valid char
415 // was found.
416 int c = IA[sArr[s++] & 0xff];
417 if (c >= 0)
418 i |= c << (18 - j * 6);
419 else
420 j--;
421 }
422
423 // Add the bytes
424 dArr[d++] = (byte) (i >> 16);
425 if (d < len) {
426 dArr[d++] = (byte) (i >> 8);
427 if (d < len)
428 dArr[d++] = (byte) i;
429 }
430 }
431
432 return dArr;
433 }
434
435 /** *//**
436 * Decodes a BASE64 encoded byte array that is known to be resonably well
437 * formatted. The method is about twice as fast as {@link #decode(byte[])}.
438 * The preconditions are:<br> + The array must have a line length of 76
439 * chars OR no line separators at all (one line).<br> + Line separator must
440 * be "\r\n", as specified in RFC 2045 + The array must not contain illegal
441 * characters within the encoded string<br> + The array CAN have illegal
442 * characters at the beginning and end, those will be dealt with
443 * appropriately.<br>
444 *
445 * @param sArr
446 * The source array. Length 0 will return an empty array.
447 * <code>null</code> will throw an exception.
448 * @return The decoded array of bytes. May be of length 0.
449 */
450 public final static byte[] decodeFast(byte[] sArr) {
451 // Check special case
452 int sLen = sArr.length;
453 if (sLen == 0)
454 return new byte[0];
455
456 int sIx = 0, eIx = sLen - 1; // Start and end index after trimming.
457
458 // Trim illegal chars from start
459 while (sIx < eIx && IA[sArr[sIx] & 0xff] < 0)
460 sIx++;
461
462 // Trim illegal chars from end
463 while (eIx > 0 && IA[sArr[eIx] & 0xff] < 0)
464 eIx--;
465
466 // get the padding count (=) (0, 1 or 2)
467 int pad = sArr[eIx] == '=' ? (sArr[eIx - 1] == '=' ? 2 : 1) : 0; // Count
468 // '='
469 // at
470 // end.
471 int cCnt = eIx - sIx + 1; // Content count including possible
472 // separators
473 int sepCnt = sLen > 76 ? (sArr[76] == '\r' ? cCnt / 78 : 0) << 1 : 0;
474
475 int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded
476 // bytes
477 byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
478
479 // Decode all but the last 0 - 2 bytes.
480 int d = 0;
481 for (int cc = 0, eLen = (len / 3) * 3; d < eLen;) {
482 // Assemble three bytes into an int from four "valid" characters.
483 int i = IA[sArr[sIx++]] << 18 | IA[sArr[sIx++]] << 12
484 | IA[sArr[sIx++]] << 6 | IA[sArr[sIx++]];
485
486 // Add the bytes
487 dArr[d++] = (byte) (i >> 16);
488 dArr[d++] = (byte) (i >> 8);
489 dArr[d++] = (byte) i;
490
491 // If line separator, jump over it.
492 if (sepCnt > 0 && ++cc == 19) {
493 sIx += 2;
494 cc = 0;
495 }
496 }
497
498 if (d < len) {
499 // Decode last 1-3 bytes (incl '=') into 1-3 bytes
500 int i = 0;
501 for (int j = 0; sIx <= eIx - pad; j++)
502 i |= IA[sArr[sIx++]] << (18 - j * 6);
503
504 for (int r = 16; d < len; r -= 8)
505 dArr[d++] = (byte) (i >> r);
506 }
507
508 return dArr;
509 }
510
511 // ****************************************************************************************
512 // * String version
513 // ****************************************************************************************
514
515 /** *//**
516 * Encodes a raw byte array into a BASE64 <code>String</code>
517 * representation i accordance with RFC 2045.
518 *
519 * @param sArr
520 * The bytes to convert. If <code>null</code> or length 0 an
521 * empty array will be returned.
522 * @param lineSep
523 * Optional "\r\n" after 76 characters, unless end of file.<br>
524 * No line separator will be in breach of RFC 2045 which
525 * specifies max 76 per line but will be a little faster.
526 * @return A BASE64 encoded array. Never <code>null</code>.
527 */
528 public final static String encodeToString(byte[] sArr, boolean lineSep) {
529 // Reuse char[] since we can't create a String incrementally anyway and
530 // StringBuffer/Builder would be slower.
531 return new String(encodeToChar(sArr, lineSep));
532 }
533
534 public final static String encode(String s) {
535 // Reuse char[] since we can't create a String incrementally anyway and
536 // StringBuffer/Builder would be slower.
537 try {
538 return new String(encodeToChar(s.getBytes("UTF-8"), false));
539 } catch (UnsupportedEncodingException e) {
540 System.err.println("Base64 encoding error: " + e.getMessage());
541 e.printStackTrace();
542 }
543 return null;
544 }
545
546 /** *//**
547 * Decodes a BASE64 encoded <code>String</code>. All illegal characters
548 * will be ignored and can handle both strings with and without line
549 * separators.<br>
550 * <b>Note!</b> It can be up to about 2x the speed to call
551 * <code>decode(str.toCharArray())</code> instead. That will create a
552 * temporary array though. This version will use <code>str.charAt(i)</code>
553 * to iterate the string.
554 *
555 * @param str
556 * The source string. <code>null</code> or length 0 will return
557 * an empty array.
558 * @return The decoded array of bytes. May be of length 0. Will be
559 * <code>null</code> if the legal characters (including '=') isn't
560 * divideable by 4. (I.e. definitely corrupted).
561 */
562 public final static byte[] decode(String str, boolean used) {
563 // Check special case
564 int sLen = str != null ? str.length() : 0;
565 if (sLen == 0)
566 return new byte[0];
567
568 // Count illegal characters (including '\r', '\n') to know what size the
569 // returned array will be,
570 // so we don't have to reallocate & copy it later.
571 int sepCnt = 0; // Number of separator characters. (Actually illegal
572 // characters, but that's a bonus)
573 for (int i = 0; i < sLen; i++)
574 // If input is "pure" (I.e. no line separators or illegal chars)
575 // base64 this loop can be commented out.
576 if (IA[str.charAt(i)] < 0)
577 sepCnt++;
578
579 // Check so that legal chars (including '=') are evenly divideable by 4
580 // as specified in RFC 2045.
581 if ((sLen - sepCnt) % 4 != 0)
582 return null;
583
584 // Count '=' at end
585 int pad = 0;
586 for (int i = sLen; i > 1 && IA[str.charAt(--i)] <= 0;)
587 if (str.charAt(i) == '=')
588 pad++;
589
590 int len = ((sLen - sepCnt) * 6 >> 3) - pad;
591
592 byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
593
594 for (int s = 0, d = 0; d < len;) {
595 // Assemble three bytes into an int from four "valid" characters.
596 int i = 0;
597 for (int j = 0; j < 4; j++) { // j only increased if a valid char
598 // was found.
599 int c = IA[str.charAt(s++)];
600 if (c >= 0)
601 i |= c << (18 - j * 6);
602 else
603 j--;
604 }
605 // Add the bytes
606 dArr[d++] = (byte) (i >> 16);
607 if (d < len) {
608 dArr[d++] = (byte) (i >> 8);
609 if (d < len)
610 dArr[d++] = (byte) i;
611 }
612 }
613 return dArr;
614 }
615
616 /** *//**
617 * Decodes a BASE64 encoded string that is known to be resonably well
618 * formatted. The method is about twice as fast as {@link #decode(String)}.
619 * The preconditions are:<br> + The array must have a line length of 76
620 * chars OR no line separators at all (one line).<br> + Line separator must
621 * be "\r\n", as specified in RFC 2045 + The array must not contain illegal
622 * characters within the encoded string<br> + The array CAN have illegal
623 * characters at the beginning and end, those will be dealt with
624 * appropriately.<br>
625 *
626 * @param s
627 * The source string. Length 0 will return an empty array.
628 * <code>null</code> will throw an exception.
629 * @return The decoded array of bytes. May be of length 0.
630 */
631 public final static byte[] decodeFast(String s) {
632 // Check special case
633 int sLen = s.length();
634 if (sLen == 0)
635 return new byte[0];
636
637 int sIx = 0, eIx = sLen - 1; // Start and end index after trimming.
638
639 // Trim illegal chars from start
640 while (sIx < eIx && IA[s.charAt(sIx) & 0xff] < 0)
641 sIx++;
642
643 // Trim illegal chars from end
644 while (eIx > 0 && IA[s.charAt(eIx) & 0xff] < 0)
645 eIx--;
646
647 // get the padding count (=) (0, 1 or 2)
648 int pad = s.charAt(eIx) == '=' ? (s.charAt(eIx - 1) == '=' ? 2 : 1) : 0; // Count
649 // '='
650 // at
651 // end.
652 int cCnt = eIx - sIx + 1; // Content count including possible
653 // separators
654 int sepCnt = sLen > 76 ? (s.charAt(76) == '\r' ? cCnt / 78 : 0) << 1
655 : 0;
656
657 int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded
658 // bytes
659 byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
660
661 // Decode all but the last 0 - 2 bytes.
662 int d = 0;
663 for (int cc = 0, eLen = (len / 3) * 3; d < eLen;) {
664 // Assemble three bytes into an int from four "valid" characters.
665 int i = IA[s.charAt(sIx++)] << 18 | IA[s.charAt(sIx++)] << 12
666 | IA[s.charAt(sIx++)] << 6 | IA[s.charAt(sIx++)];
667
668 // Add the bytes
669 dArr[d++] = (byte) (i >> 16);
670 dArr[d++] = (byte) (i >> 8);
671 dArr[d++] = (byte) i;
672
673 // If line separator, jump over it.
674 if (sepCnt > 0 && ++cc == 19) {
675 sIx += 2;
676 cc = 0;
677 }
678 }
679
680 if (d < len) {
681 // Decode last 1-3 bytes (incl '=') into 1-3 bytes
682 int i = 0;
683 for (int j = 0; sIx <= eIx - pad; j++)
684 i |= IA[s.charAt(sIx++)] << (18 - j * 6);
685
686 for (int r = 16; d < len; r -= 8)
687 dArr[d++] = (byte) (i >> r);
688 }
689
690 return dArr;
691 }
692
693 public static String decode(String s) throws UnsupportedEncodingException {
694 return new String(Base64.decodeFast(s), "UTF-8");
695 }
696
697 public static void main(String[] args) throws UnsupportedEncodingException {
698 String s = "测试账户";
699 String encodeS = Base64.encode(s);
700 System.out.println(encodeS);
701 System.out.println(Base64.decode(encodeS));
702 }
703}
704
主要是在538行和694行的两个在UTF-8和UTF-16之间的转换。