读取文件,判断文件编码格式
BufferedInputStream 才支持 mark、reset功能。
package sample.test.name; import df.util.Util; import df.util.type.StringUtil; import df.util.type.SysLog; import java.io.*; import java.util.Arrays; /** * Created by andrew on 2015/7/12. */ public class NameApp { private static final String TAG = Util.toTAG(NameApp.class); public static void main(String[] args) { File dir = new File("C:\\Users\\andrew\\name"); if (null != dir && dir.isDirectory()) { File[] files = dir.listFiles(new FileFilter() { @Override public boolean accept(File pathname) { String name = pathname.getName(); if (name.toLowerCase().endsWith(".txt")) { return true; } return false; } }); StringBuffer buf = new StringBuffer(); for (File f : files) { InputStream reader = null; try { reader = new BufferedInputStream(new FileInputStream(f)); boolean isSupport = reader.markSupported(); SysLog.v(TAG, " IS SUPPORT = ", isSupport); reader.mark(0); byte[] first3Bytes = new byte[3]; int read = reader.read(first3Bytes); String charset = "gbk"; if (3 == read) { //其中的 0xefbb、0xfffe、0xfeff、0x5c75这些都是这个文件的前面两个字节的16进制数 if (first3Bytes[0] == (byte) 0xFF && first3Bytes[1] == (byte) 0xFE) { charset = "UTF-16LE"; } else if (first3Bytes[0] == (byte) 0xFE && first3Bytes[1] == (byte) 0xFF) { charset = "UTF-16BE"; } else if (first3Bytes[0] == (byte) 0xEF && first3Bytes[1] == (byte) 0xBB && first3Bytes[2] == (byte) 0xBF) { charset = "UTF-8"; } else{ reader.reset(); } }else{ reader.reset(); } byte[] line = new byte[10]; buf.setLength(0); int len = 0; Arrays.fill(line, (byte) 0); while ((len = reader.read(line, 0, line.length)) != -1) { SysLog.v(TAG, " LINE=", StringUtil.toHexString(line)); buf.append(new String(line,charset)); Arrays.fill(line, (byte) 0); } SysLog.v(TAG,"filename=",f.getName(), charset,"end=", buf.length(),buf.toString()); } catch (Exception e) { SysLog.v(TAG, ", e=", e.getMessage()); e.printStackTrace(); } finally { if (null != reader) { try { reader.close(); } catch (IOException e) { e.printStackTrace(); } } } } } } }