将网页中的 啊 unicode字符转换成中文

public class UnicodeConverter { private static final Logger log = LogFactory.getLog(UnicodeConverter.class); private static final UnicodeConverter unique = new UnicodeConverter(); public static UnicodeConverter getInstance() { return unique; } private final Map chineseMap = new HashMap(); private final Pattern unicodePattern = Pattern.compile("&#//d{5};"); private UnicodeConverter() { initCharMapping(); } private void initCharMapping() { try { InputStream is = UnicodeConverter.class.getResourceAsStream("/chinese.mapping"); //读入UTF-8编码的文件 InputStreamReader isr = new InputStreamReader(is, "UTF-8"); BufferedReader br = new BufferedReader(isr); String temp = null; if ((temp = br.readLine()) != null) { if (logger.isDebugEnabled()) { logger.debug("chinese====>" + temp); } String[] strs = temp.split(" "); for (String str : strs) { String[] arr = str.split(":"); String code = "&#" + arr[1] + ";"; String value = arr[0]; if (logger.isDebugEnabled()) { logger.debug(code + " To " + value); } chineseMap.put(code, value); } } } catch (Exception e) { logger.error("Init Char Mapping Fail", e); } } private String getByUnicode(String unicode) { return chineseMap.get(unicode); } public String unicodeToChinese(String str) { Matcher matcher = unicodePattern.matcher(str); while (matcher.find()) { String findStr = matcher.group(); String replaceStr = getByUnicode(findStr); if (replaceStr != null && replaceStr.length() > 0) { str = str.replaceAll(findStr, replaceStr); } } return str; } }

 

chinese.mapping中只有一行,格式为:    汉字:unicode码 汉字:unicode码 ......

例如:   啊:21834 阿:38463

 

chinese.mapping文件下载(下载后请将.jpg扩展名去掉)

 

 

你可能感兴趣的:(Java相关)