
     最近学了一下MIME,里面有两种主要的编码方式,Base64和QP编码.现简单介绍一下这种编码规则.然后提供一段JAVA代码,用来解析和编码BASE64以及QB编码.    1.BASE64编码 href="file:///C:/DOCUME~1/Billie/LOCALS~1/Temp/msohtmlclip1/01/clip_filelist.xml" rel="File-List" /> href="file:///C:/DOCUME~1/Billie/LOCALS~1/Temp/msohtmlclip1/01/clip_themedata.thmx" rel="themeData" /> href="file:///C:/DOCUME~1/Billie/LOCALS~1/Temp/msohtmlclip1/01/clip_colorschememapping.xml" rel="colorSchemeMapping" />

   按照RFC2045的定义,Base64被定义为:Base64内容传送编码被设计用来把 任意序列的8位字节描述为一种不易被人直接识别的形式。(The Base64 Content-Transfer-Encoding is designed to represent arbitrary sequences of octets in a form that need not be humanly readable.

   编码原理: href="file:///C:/DOCUME~1/Billie/LOCALS~1/Temp/msohtmlclip1/01/clip_themedata.thmx" rel="themeData" /> href="file:///C:/DOCUME~1/Billie/LOCALS~1/Temp/msohtmlclip1/01/clip_colorschememapping.xml" rel="colorSchemeMapping" />3个字节转换成4个字节( (3 X 8) = 24 = (4 X 6) )先读入3个字节,每读一个字节,左移8,再右移四次,每次6,这样就有4个字节了.

href="file:///C:/DOCUME~1/Billie/LOCALS~1/Temp/msohtmlclip1/01/clip_filelist.xml" rel="File-List" /> href="file:///C:/DOCUME~1/Billie/LOCALS~1/Temp/msohtmlclip1/01/clip_themedata.thmx" rel="themeData" /> href="file:///C:/DOCUME~1/Billie/LOCALS~1/Temp/msohtmlclip1/01/clip_colorschememapping.xml" rel="colorSchemeMapping" />

   解码原理:4个字节转换成3个字节. 先读入46(用或运算),每次左移6,再右移3,每次8.这样就还原了.

   编码规则: href="file:///C:/DOCUME~1/Billie/LOCALS~1/Temp/msohtmlclip1/01/clip_filelist.xml" rel="File-List" /> href="file:///C:/DOCUME~1/Billie/LOCALS~1/Temp/msohtmlclip1/01/clip_themedata.thmx" rel="themeData" /> href="file:///C:/DOCUME~1/Billie/LOCALS~1/Temp/msohtmlclip1/01/clip_colorschememapping.xml" rel="colorSchemeMapping" /> 用“A…Za…z0…9+/”64个字符来表示6位长度的二进制数值,该数值从063依次对应从“A”至“/”的字符,编码时3个字节3个字节进行编码,每3个字节(24)以最高两位补0的形式分成四个字节,这样每个字节的值刚好能与“A…Za…z0…9+/”64个字符一一对应,当最后没有三个字节时,编码后则用”=“号补足四个字节。BASE64编码每行不得超过76个字符(不包含最后的“/r/n”两个字符),否则必须换行,换行方法是:在结尾处加上“/r/n ”.   2.QP编码 href="file:///C:/DOCUME~1/Billie/LOCALS~1/Temp/msohtmlclip1/01/clip_filelist.xml" rel="File-List" /> href="file:///C:/DOCUME~1/Billie/LOCALS~1/Temp/msohtmlclip1/01/clip_themedata.thmx" rel="themeData" /> href="file:///C:/DOCUME~1/Billie/LOCALS~1/Temp/msohtmlclip1/01/clip_colorschememapping.xml" rel="colorSchemeMapping" />    规则 1 :除了换行符,其它任何 8 位长度的字节必须被表示成一个“ = ”号加上高四位的十六进制数和低四位的十六进制数,十六进制数是用“ 0123456789ABCDEF ”的 ASCII 字符码表示,其中“ ABCDEF ”都必须是大写;

   规则 2 :当 8 位长度的字节值在 33 66( 包括 33 60) 62 126( 包括 62 126) 之间时,直接用该数据对应的 ASCII 码来表示;

  规则 3 :当 8 位长度的字节值是 9 32 时,用该数据对应的 ASCII (9 TAB(HT) 32 SPACE) 来表示,但若是出现在行尾则必须按规则 1 进行转换 ;    规则 4 :对于 CRLF 换行符必须用“ =0D=0A ”来代替;

   规则 5 QP 编码每行不得超过 76 个字符 ( 不包含最后的“ /r/n ”两个字符 ) ,否则必须换行,换行方法是:在结尾处加上“ =/r/n ”;

   其它:符号“ - ”不用进行 QP 编码,所以不要用“ - ”符号来表示实体之间的分界,最好用“ =_”来代替,对于“!”#$[/]^’{|}~”这些字符必须按规则1进行转换.

package test;



import java.util.BitSet;


 * @author Bill

 * @version Date:Jul 11, 2008 9:28:07 AM


* Encode an decode for base64 and qb */ public class CodeConversion { private static final String CHARSET_UTF8 = "UTF-8"; private static final String CHARSET_ASCII = "US-ASCII"; private static final byte ESCAPE_CHAR = '='; private static final BitSet PRINTABLE_CHARS = new BitSet(256); private static byte TAB = 9; private static byte SPACE = 32; static final int CHUNK_SIZE = 76; /** * Chunk separator per RFC 2045 section 2.1. * * @see RFC 2045 section 2.1 */ static final byte[] CHUNK_SEPARATOR = "/r/n".getBytes(); /** * The base length. */ static final int BASELENGTH = 255; /** * Lookup length. */ static final int LOOKUPLENGTH = 64; /** * Used to calculate the number of bits in a byte. */ static final int EIGHTBIT = 8; /** * Used when encoding something which has fewer than 24 bits. */ static final int SIXTEENBIT = 16; /** * Used to determine how many bits data contains. */ static final int TWENTYFOURBITGROUP = 24; /** * Used to get the number of Quadruples. */ static final int FOURBYTE = 4; /** * Used to test the sign of a byte. */ static final int SIGN = -128; // Create arrays to hold the base64 characters and a // lookup for base64 chars private static byte[] base64Alphabet = new byte[BASELENGTH]; private static byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH]; // Populating the lookup and character arrays static { for (int i = 0; i < BASELENGTH; i++) { base64Alphabet[i] = (byte) -1; } for (int i = 'Z'; i >= 'A'; i--) { base64Alphabet[i] = (byte) (i - 'A'); } for (int i = 'z'; i >= 'a'; i--) { base64Alphabet[i] = (byte) (i - 'a' + 26); } for (int i = '9'; i >= '0'; i--) { base64Alphabet[i] = (byte) (i - '0' + 52); } base64Alphabet['+'] = 62; base64Alphabet['/'] = 63; for (int i = 0; i <= 25; i++) { lookUpBase64Alphabet[i] = (byte) ('A' + i); } for (int i = 26, j = 0; i <= 51; i++, j++) { lookUpBase64Alphabet[i] = (byte) ('a' + j); } for (int i = 52, j = 0; i <= 61; i++, j++) { lookUpBase64Alphabet[i] = (byte) ('0' + j); } lookUpBase64Alphabet[62] = (byte) '+'; lookUpBase64Alphabet[63] = (byte) '/'; } static { for (int i = 33; i <= 60; i++) { PRINTABLE_CHARS.set(i); } for (int i = 62; i <= 126; i++) { PRINTABLE_CHARS.set(i); } PRINTABLE_CHARS.set(TAB); PRINTABLE_CHARS.set(SPACE); } public CodeConversion() { } /** * Convert a String to QP code * * @param qStr * @return * @throws Exception */ public static String encodeQP(String str) throws Exception { int count = 0; if (str == null) { return null; } byte[] bytes = str.getBytes(CHARSET_UTF8); ByteArrayOutputStream buffer = new ByteArrayOutputStream(); for (int i = 0; i < bytes.length; i++) { int b = bytes[i]; if (b < 0) { b = 256 + b; } count++; if (PRINTABLE_CHARS.get(b)) { if (count == 76) { count = 0; count++; buffer.write(ESCAPE_CHAR); buffer.write(CHUNK_SEPARATOR); } buffer.write(b); } else { count = encodeQuotedPrintable(b, buffer, count); } } return new String(buffer.toByteArray(), CHARSET_ASCII); } /** * Encodes byte into its quoted-printable representation. * * @param b * @param buffer * @throws IOException */ private static final int encodeQuotedPrintable(int b, ByteArrayOutputStream buffer, int count) throws IOException { if (count == 76) { count = 0; count++; buffer.write(ESCAPE_CHAR); buffer.write(CHUNK_SEPARATOR); } buffer.write(ESCAPE_CHAR); char hex1 = Character.toUpperCase(Character .forDigit((b >> 4) & 0xF, 16)); char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16)); count++; if (count == 76) { count = 0; count++; buffer.write(ESCAPE_CHAR); buffer.write(CHUNK_SEPARATOR); } buffer.write(hex1); count++; if (count == 76) { count = 0; count++; buffer.write(ESCAPE_CHAR); buffer.write(CHUNK_SEPARATOR); } buffer.write(hex2); return count; } /** * Convert a qp string to a normal string * * @param qpStr * @return * @throws Exception */ public static String decodeQP(String qpStr) throws Exception { if (qpStr == null) { return null; } qpStr = qpStr.replaceAll("=/r/n", ""); byte[] bytes = qpStr.getBytes(CHARSET_ASCII); ByteArrayOutputStream buffer = new ByteArrayOutputStream(); for (int i = 0; i < bytes.length; i++) { int b = bytes[i]; if (b == ESCAPE_CHAR) { try { int u = Character.digit((char) bytes[++i], 16); int l = Character.digit((char) bytes[++i], 16); if (u == -1 || l == -1) { throw new Exception("Invalid quoted-printable encoding"); } buffer.write((char) ((u << 4) + l)); } catch (ArrayIndexOutOfBoundsException e) { throw new Exception("Invalid quoted-printable encoding"); } } else { buffer.write(b); } } return new String(buffer.toByteArray(), CHARSET_UTF8); } /** * Convert a normal string to Base64 code format string * * @param str * @return */ public static String encodeBase64(String str) throws Exception { byte[] binaryData = str.getBytes(CHARSET_UTF8); int lengthDataBits = binaryData.length * EIGHTBIT; int fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP; int numberTriplets = lengthDataBits / TWENTYFOURBITGROUP; byte encodedData[] = null; int encodedDataLength = 0; int nbrChunks = 0; if (fewerThan24bits != 0) { // data not divisible by 24 bit encodedDataLength = (numberTriplets + 1) * 4; } else { // 16 or 8 bit encodedDataLength = numberTriplets * 4; } nbrChunks = (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math .ceil((float) encodedDataLength / CHUNK_SIZE)); encodedDataLength += nbrChunks * CHUNK_SEPARATOR.length; encodedData = new byte[encodedDataLength]; byte k = 0, l = 0, b1 = 0, b2 = 0, b3 = 0; int encodedIndex = 0; int dataIndex = 0; int i = 0; int nextSeparatorIndex = CHUNK_SIZE; int chunksSoFar = 0; for (i = 0; i < numberTriplets; i++) { dataIndex = i * 3; b1 = binaryData[dataIndex]; b2 = binaryData[dataIndex + 1]; b3 = binaryData[dataIndex + 2]; l = (byte) (b2 & 0x0f); k = (byte) (b1 & 0x03); byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0); byte val3 = ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc); encodedData[encodedIndex] = lookUpBase64Alphabet[val1]; encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)]; encodedData[encodedIndex + 2] = lookUpBase64Alphabet[(l << 2) | val3]; encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f]; encodedIndex += 4; if (encodedIndex == nextSeparatorIndex) { System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedIndex, CHUNK_SEPARATOR.length); chunksSoFar++; nextSeparatorIndex = (CHUNK_SIZE * (chunksSoFar + 1)) + (chunksSoFar * CHUNK_SEPARATOR.length); encodedIndex += CHUNK_SEPARATOR.length; } } // form integral number of 6-bit groups dataIndex = i * 3; if (fewerThan24bits == EIGHTBIT) { b1 = binaryData[dataIndex]; k = (byte) (b1 & 0x03); // log.debug("b1=" + b1); // log.debug("b1<<2 = " + (b1>>2) ); byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); encodedData[encodedIndex] = lookUpBase64Alphabet[val1]; encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4]; encodedData[encodedIndex + 2] = ESCAPE_CHAR; encodedData[encodedIndex + 3] = ESCAPE_CHAR; } else if (fewerThan24bits == SIXTEENBIT) { b1 = binaryData[dataIndex]; b2 = binaryData[dataIndex + 1]; l = (byte) (b2 & 0x0f); k = (byte) (b1 & 0x03); byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0); encodedData[encodedIndex] = lookUpBase64Alphabet[val1]; encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)]; encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2]; encodedData[encodedIndex + 3] = ESCAPE_CHAR; } // we also add a separator to the end of the final chunk. if (chunksSoFar < nbrChunks) { System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedDataLength - CHUNK_SEPARATOR.length, CHUNK_SEPARATOR.length); } return new String(encodedData); } /** * Convert a base64 string to a normal string * * @param bStr * @return * @throws Exception */ public static String decodeBase64(String bStr) throws Exception { byte[] base64Data = bStr.getBytes(); base64Data = discardNonBase64(base64Data); // handle the edge case, so we don't have to worry about it later if (base64Data.length == 0) { return null; } int numberQuadruple = base64Data.length / FOURBYTE; byte decodedData[] = null; byte b1 = 0, b2 = 0, b3 = 0, b4 = 0, marker0 = 0, marker1 = 0; // Throw away anything not in base64Data int encodedIndex = 0; int dataIndex = 0; { // this sizes the output array properly - rlw int lastData = base64Data.length; // ignore the '=' padding while (base64Data[lastData - 1] == ESCAPE_CHAR) { if (--lastData == 0) { return null; } } decodedData = new byte[lastData - numberQuadruple]; } for (int i = 0; i < numberQuadruple; i++) { dataIndex = i * 4; marker0 = base64Data[dataIndex + 2]; marker1 = base64Data[dataIndex + 3]; b1 = base64Alphabet[base64Data[dataIndex]]; b2 = base64Alphabet[base64Data[dataIndex + 1]]; if (marker0 != ESCAPE_CHAR && marker1 != ESCAPE_CHAR) { b3 = base64Alphabet[marker0]; b4 = base64Alphabet[marker1]; decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4); decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf)); decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4); } else if (marker0 == ESCAPE_CHAR) { decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4); } else if (marker1 == ESCAPE_CHAR) { b3 = base64Alphabet[marker0]; decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4); decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf)); } encodedIndex += 3; } return new String(decodedData, CHARSET_UTF8); } private static byte[] discardNonBase64(byte[] data) { byte groomedData[] = new byte[data.length]; int bytesCopied = 0; for (int i = 0; i < data.length; i++) { if (isBase64(data[i])) { groomedData[bytesCopied++] = data[i]; } } byte packedData[] = new byte[bytesCopied]; System.arraycopy(groomedData, 0, packedData, 0, bytesCopied); return packedData; } /** * Test the byte * * @param octect * @return */ private static boolean isBase64(byte octect) { if (octect == ESCAPE_CHAR) { return true; } else if (base64Alphabet[octect] == -1) { return false; } else { return true; } } }
