简体中文转繁体中文的工具,包括:1、编码转换(GBK->big5) 2、语义转换(根据词库,需要词库的请EMail联系我)
package i18n.converter;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Vector;
/**
* Author: [email protected]
*/
public class Gbk2Big5Converter {
protected Hashtable s2thash = new Hashtable();
static String[] dictFiles = new String[] { "mappings_gbk2big5_1-1.txt",
"mappings_gbk2big5_phrase.txt" };
String dataline;
public Gbk2Big5Converter() {
s2thash = getHashDict();
}
/*
* 根据GBK的词典生成简转繁的对应关系,包括词组和单字
*/
public Hashtable getHashDict() {
Hashtable hashDict = new Hashtable();
BufferedReader br = null;
for (String filename : dictFiles) {
try {
InputStream dictStream = getClass().getResourceAsStream(
filename);
br = new BufferedReader(
new InputStreamReader(dictStream, "gbk"));
String line = null;
while ((line = br.readLine()) != null) {
if (line.length() < 3 || line.charAt(0) == '#') {
continue;
}
int idx = line.indexOf(",");
if (idx > 0) {
String src = line.substring(0, idx).trim();
String tgt = line.substring(idx + 1).trim();
if (hashDict.get(src) != null) {
hashDict.remove(src);
}
hashDict.put(src, tgt);
}
}
} catch (Exception ex) {
ex.printStackTrace();
} finally {
if (br != null) {
try {
br.close();
} catch (IOException e) {
}
}
}
}
return hashDict;
}
/*
* 利用词典对一个字符串进行替换
*/
public String convertString(String inline) {
StringBuffer outline = new StringBuffer(inline);
convertStringBuffer(outline);
return outline.toString();
}
/*
* 利用词典对一个StringBuffer进行替换
*/
public void convertStringBuffer(StringBuffer dataline) {
String lin = dataline.toString();
// System.out.println("before:" + lin);
int startPostion = 0;
String currchar;
char charvalue;
for (int beginChar = startPostion; beginChar <= dataline.length(); beginChar++) {
String newStr = "";
// System.out.println("开始位置beginChar:" + beginChar);
// 在该位置下的子串最长度
int maxLengthOfSubstr = dataline.length() - beginChar;
// System.out.println("在该位置下的子串最大长度:" + maxLengthOfSubstr);
// 找出所有子串
for (int currentLen = maxLengthOfSubstr; currentLen >= 1; currentLen--) {
// 英文字符不用匹配,直接跳出
if (isSingleByte(dataline.substring(beginChar, beginChar + 1))) {
// System.out.println(inputString.substring(beginChar,beginChar+1));
break;
}
// 取得当前子串
if (beginChar + currentLen <= dataline.length()) {
// 当前子串
String subStr = dataline.substring(beginChar, beginChar
+ currentLen);
// System.out.println("当前子串:" + subStr);
if (s2thash.get(subStr) != null) {
// System.out.println("找到匹配:" + subStr + "->"+
// s2thash.get(subStr));
newStr = s2thash.get(subStr).toString();
dataline.replace(beginChar, beginChar + currentLen,
s2thash.get(subStr).toString());
String after = dataline.toString();
// System.out.println("本次替换后的字符串:" + after);
if (beginChar + newStr.length() < dataline.length()) {
// System.out.println("替换完成后开始字符:"+
// dataline.charAt(beginChar+ newStr.length()));
} else {
// System.out.println("本字符串没有新字符可以替换了!");
}
beginChar = beginChar + newStr.length() - 1;
// 找到匹配后,就不用继续往下找本起始字符下的更短的字符串了
// System.out.println("找到匹配后,就不用继续往下找本起始字符下的更短的字符串了");
break;
}
}
}
if (beginChar >= dataline.length())
break;
}
// System.out.println("after:" + dataline.toString());
}
/*
* 把目标文件或者文件夹(sourcedir,gbk编码)转成big5编码, 并另存为目标文件夹(targetdir,big5编码)
*/
public void convertFile(String sourcedir, String targetdir) {
int source_encoding = 0;
int target_encoding = 4;
BufferedReader srcbuffer;
BufferedWriter outbuffer;
String dataline;
Vector inputfiles = new Vector();
Vector outputfiles = new Vector();
inputfiles.add(sourcedir);
outputfiles.add(targetdir);
int i, j, working_encoding;
File tmpfile, tmpout;
String dirfiles[];
for (i = 0; i < inputfiles.size(); i++) {
tmpfile = new File((String) inputfiles.get(i));
if (tmpfile.exists() == false) {
System.out.println("ERROR: Source file "
+ (String) inputfiles.get(i) + " does not exist./n");
continue;
}
if (tmpfile.isDirectory() == true) {
tmpout = new File((String) outputfiles.get(i));
if (tmpout.exists() == false) {
tmpout.mkdir();
}
dirfiles = tmpfile.list();
if (dirfiles != null) {
for (j = 0; j < dirfiles.length; j++) {
inputfiles.add((String) inputfiles.get(i)
+ File.separator + dirfiles[j]);
outputfiles.add((String) outputfiles.get(i)
+ File.separator + dirfiles[j]);
}
}
continue;
}
System.out.println("Converting " + inputfiles.get(i) + " to "
+ outputfiles.get(i) + " with encoding " + source_encoding);
try {
working_encoding = source_encoding;
srcbuffer = new BufferedReader(new InputStreamReader(
new FileInputStream((String) inputfiles.get(i)), "gbk"));
outbuffer = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream((String) outputfiles.get(i)),
"big5"));
while ((dataline = srcbuffer.readLine()) != null) {
outbuffer.write(convertString(dataline));
outbuffer.newLine();
}
srcbuffer.close();
outbuffer.close();
} catch (Exception ex) {
System.err.println(ex);
}
}
}
public File convertSimpleString(String inputString) {
// System.out.println("before->inputString:"+inputString);
byte[] bytes = inputString.getBytes();
StringBuffer sb = new StringBuffer();
// write the string to a temp file
File result = new File("temp.txt");
try {
InputStream inputStream = new ByteArrayInputStream(inputString
.getBytes());
BufferedReader srcbuffer = new BufferedReader(
new InputStreamReader(inputStream, "gbk"));
BufferedWriter outbuffer = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(result), "big5"));
while ((dataline = srcbuffer.readLine()) != null) {
outbuffer.write(convertString(dataline));
outbuffer.newLine();
}
srcbuffer.close();
outbuffer.close();
} catch (Exception e) {
e.printStackTrace();
}
// System.out.println("after->result:"+result);
return result;
}
/*
* 判断是否单字节字,中文都不是单字节字
*/
public static boolean isSingleByte(String inStr) {
if (inStr.getBytes().length == inStr.length()) {
return true;
} else {
return false;
}
}
public void printDict() {
Gbk2Big5Converter aConverter = new Gbk2Big5Converter();
for (int i = 20; i > 0; i--) {
Enumeration enums = aConverter.s2thash.keys();
while (enums.hasMoreElements()) {
String ele = (String) enums.nextElement();
if (ele.length() == i) {
System.out.print(ele);
System.out.println("," + aConverter.s2thash.get(ele));
}
}
}
}
public static void main(String[] args) {
Gbk2Big5Converter aGbk2Big5Converter = new Gbk2Big5Converter();
String src = "src//resource_zh_CN.properties.org";
String tgt = "src//resource_zh_TW.properties.org";
System.out.println(new File(src).getAbsolutePath());
aGbk2Big5Converter.convertFile(src, tgt);
}
}