灏咷BK鏂囦欢鎵归噺杞负UTF-8鏍煎紡

闂鏉ユ簮锛氫粠GitHub涓婁笅杞戒簡涓�涓皬椤圭洰锛屼絾鎵撳紑涔嬪悗锛屽彂鐜伴儴鍒嗘枃浠朵腑鏂囦贡鐮併�傚浣曡В鍐筹紵

鏈�绗ㄧ殑鏂规硶鏄敤璁颁簨鏈墦寮�锛岀劧鍚庡彟瀛樹负UTF-8鏍煎紡銆備絾鏂囦欢杈冨灏变笉鍚堥�備簡锛岄渶瑕佹壒閲忚浆鎹€�傜敤浠g爜瀹炵幇鍗曟枃浠惰浆鎹篃姣旇緝绠�鍗曚笉鐢ㄨВ閲婏細


    public static void gbk2Utf8(String fileName) {
        BufferedReader reader = null;
        BufferedWriter writer = null;
        try {
            StringBuffer sb = new StringBuffer();
            reader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "GBK"));
            String str;
            while ((str = reader.readLine()) != null) {
                sb.append(str).append("\r\n");
            }

            writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8"));
            writer.write(sb.toString());
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            if (reader != null) {
                try {
                    reader.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if (writer != null) {
                try {
                    writer.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }

濡傛灉鏄緢澶氭枃浠剁殑璇濓紝鍙互閫氳繃閬嶅巻鐩綍鍏堣幏鍙栨瘡涓枃浠�

 /**
     * 閫掑綊鑾峰彇鎸囧畾鐩綍涓嬫墍鏈夋寚瀹氱被鍨嬫枃浠�
     * 
     * @param strPath
     *            鏂囦欢澶瑰湴鍧�
     * @param suffix
     *            鏂囦欢鍚嶅悗缂�
     * @return
     */
    public static List getFileList(String strPath, String suffix) {
        List filelist = new ArrayList();
        File dir = new File(strPath);
        File[] files = dir.listFiles(); // 璇ユ枃浠剁洰褰曚笅鏂囦欢鍏ㄩ儴鏀惧叆鏁扮粍
        if (files != null) {
            for (int i = 0; i < files.length; i++) {
                String fileName = files[i].getName();
                if (files[i].isDirectory()) { // 濡傛灉鏄枃浠跺す灏遍�掑綊璋冪敤
                    getFileList(files[i].getAbsolutePath(), suffix);
                } else if (fileName.endsWith(suffix)) {
                    filelist.add(files[i]);
                }
            }
        }
        return filelist;
    }

鍓╀笅鐨勫氨鏄啓涓富鍑芥暟鍘昏皟鐢紝灏卞湪杩欐椂鍙戠幇涓�涓ぇ鍧戯紝涓嬭浇鐨勯」鐩腑鏈変簺鏄疓BK缂栫爜銆佹湁浜涙槸UTF8缂栫爜銆傛墍浠ヨ鍏堝垽鏂笅鏂囦欢缂栫爜鏍煎紡锛屽啀杩涜杞崲锛屽惁鍒欏彲鑳藉皢姝e父鏂囦欢杞爜涓轰贡鐮併��

閭d箞锛岄棶棰樻潵浜嗭紝濡備綍鍒ゆ柇鏂囦欢缂栫爜鏍煎紡鍛紵
缃戜笂鏈夊緢澶氫笉鏄緢涓ヨ皑鐨勬柟寮忥紝鏈�鍚庢垜閫夋嫨浜嗕竴绉嶇浉瀵归潬璋辩殑鏂瑰紡鏉ュ垽鏂紝浣嗛渶瑕佸紩鍏ョ涓夋柟渚濊禆jchardet锛�

        
            net.sourceforge.jchardet
            jchardet
            1.0
        

鐒跺悗锛屽啓涓垽鏂枃浠剁紪鐮佺殑鏂规硶锛�

    // 鏄惁鎵惧埌鍖归厤瀛楃闆�
    private static boolean isFind = false;
    // 濡傛灉瀹屽叏鍖归厤鏌愪釜瀛楃闆嗘娴嬬畻娉�, 鍒欒灞炴�т繚瀛樿瀛楃闆嗙殑鍚嶇О. 鍚﹀垯(濡備簩杩涘埗鏂囦欢)鍏跺�煎氨涓洪粯璁ゅ�� null
    private static String encoding = null;

    /**
     * 鑾峰彇鏂囦欢鐨勭紪鐮�
     * 
     * @param file
     * @return 鏂囦欢缂栫爜锛岃嫢鏃狅紝鍒欒繑鍥瀗ull
     * @throws IOException
     */
    private static String guessFileCharset(File file) throws IOException {
        nsDetector det = new nsDetector();
        det.Init(new nsICharsetDetectionObserver() {
            public void Notify(String charset) {
                isFind = true;
                encoding = charset;
            }
        });

        BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file));

        byte[] buf = new byte[1024];
        int len;
        boolean done = false;
        boolean isAscii = true;

        while ((len = bis.read(buf, 0, buf.length)) != -1) {
            if (isAscii) {
                isAscii = det.isAscii(buf, len);
            } else if (!done) {
                done = det.DoIt(buf, len, false);
            }
        }
        det.DataEnd();

        if (isAscii) {
            encoding = "ASCII";
            isFind = true;
        } else if (!isFind) {
            String prob[] = det.getProbableCharsets();
            if (prob.length > 0) {
                encoding = prob[0]; // 鍦ㄦ病鏈夊彂鐜版儏鍐典笅锛屽垯鍙栫涓�涓彲鑳界殑缂栫爜
            }
        }
        return encoding;
    }

鏈�鍚庢槸涓诲嚱鏁�

public static void main(String[] args) {
        List files = getFileList("鍏蜂綋鐨勬枃浠惰矾寰�", ".java");
        for (File file : files) {
            String charset = null;
            try {
                charset = guessFileCharset(file.getAbsoluteFile());
            } catch (IOException e) {
                System.err.println("鑾峰彇鏂囦欢缂栫爜鍙戠敓寮傚父锛�");
            }
            System.out.println(file.getName() + "[" + charset + "]");
            if ("GB2312".equals(charset)) {
                gbk2Utf8(file.getAbsolutePath());
            }
        }
    }

鏈潵鏄幓GitHub涓婂鎵剧瓟妗堢殑锛岀粨鏋滃張閬囧埌涓�鍫嗛棶棰樸�備笉杩囪繖鏄ソ浜嬶紝閫氳繃涓嶆柇鍙戠幇闂骞惰В鍐抽棶棰橈紝鍙互鎻愰珮鑷繁娲炲療闂鍜岃В鍐抽棶棰樼殑鑳藉姏銆�

你可能感兴趣的:(灏咷BK鏂囦欢鎵归噺杞负UTF-8鏍煎紡)