最近要做一个中文转为拼音,于是联想到 pinyin4j,网络上很多资料,可是不全,整理一下中文转成拼音(支持多音字输出)
详细代码:
package com.baison.util;
import java.util.HashSet;
import java.util.Set;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
/*******************************************************************************
* pinyin4j is a plug-in, you can kind of Chinese characters into phonetic.Multi-tone character,Tone
* Detailed view http://pinyin4j.sourceforge.net/
*
* @author Administrator
* @ClassName: Pinyin4jUtil
* @Description: TODO
* @author [email protected]
* @date Jan 13, 2012 9:28:28 AM
*/
public class Pinyin4jUtil {
/***************************************************************************
* 获取中文汉字拼音 默认输出
* @Name: Pinyin4jUtil.java
* @Description: TODO
* @author: [email protected]
* @version: Jan 13, 2012 9:54:01 AM
* @param chinese
* @return
*/
public static String getPinyin(String chinese) {
return getPinyinZh_CN(makeStringByStringSet(chinese));
}
/***************************************************************************
* 拼音大写输出
*
* @Name: Pinyin4jUtil.java
* @Description: TODO
* @author: [email protected]
* @version: Jan 13, 2012 9:58:45 AM
* @param chinese
* @return
*/
public static String getPinyinToUpperCase(String chinese) {
return getPinyinZh_CN(makeStringByStringSet(chinese)).toUpperCase();
}
/***************************************************************************
* 拼音小写输出
*
* @Name: Pinyin4jUtil.java
* @Description: TODO
* @author: [email protected]
* @version: Jan 13, 2012 9:58:45 AM
* @param chinese
* @return
*/
public static String getPinyinToLowerCase(String chinese) {
return getPinyinZh_CN(makeStringByStringSet(chinese)).toLowerCase();
}
/***************************************************************************
* 首字母大写输出
*
* @Name: Pinyin4jUtil.java
* @Description: TODO
* @author: [email protected]
* @version: Jan 13, 2012 10:00:54 AM
* @param chinese
* @return
*/
public static String getPinyinFirstToUpperCase(String chinese) {
return getPinyin(chinese);
}
/***************************************************************************
* 拼音简拼输出
*
* @Name: Pinyin4jUtil.java
* @Description: TODO
* @author: [email protected]
* @version: Jan 13, 2012 11:08:15 AM
* @param chinese
* @return
*/
public static String getPinyinJianPin(String chinese) {
return getPinyinConvertJianPin(getPinyin(chinese));
}
/***************************************************************************
* 字符集转换
*
* @Name: Pinyin4jUtil.java
* @Description: TODO
* @author: [email protected]
* @version: Jan 13, 2012 9:34:11 AM
* @param chinese
* 中文汉字
* @throws BadHanyuPinyinOutputFormatCombination
*/
public static Set makeStringByStringSet(String chinese) {
char[] chars = chinese.toCharArray();
if (chinese != null && !chinese.trim().equalsIgnoreCase("")) {
char[] srcChar = chinese.toCharArray();
String[][] temp = new String[chinese.length()][];
for (int i = 0; i < srcChar.length; i++) {
char c = srcChar[i];
// 是中文或者a-z或者A-Z转换拼音
if (String.valueOf(c).matches("[\\u4E00-\\u9FA5]+")) {
try {
temp[i] = PinyinHelper.toHanyuPinyinStringArray(
chars[i], getDefaultOutputFormat());
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else if (((int) c >= 65 && (int) c <= 90)
|| ((int) c >= 97 && (int) c <= 122)) {
temp[i] = new String[] { String.valueOf(srcChar[i]) };
} else {
temp[i] = new String[] { "" };
}
}
String[] pingyinArray = Exchange(temp);
Set zhongWenPinYin = new HashSet();
for (int i = 0; i < pingyinArray.length; i++) {
zhongWenPinYin.add(pingyinArray[i]);
}
return zhongWenPinYin;
}
return null;
}
/***************************************************************************
* Default Format 默认输出格式
*
* @Name: Pinyin4jUtil.java
* @Description: TODO
* @author: [email protected]
* @version: Jan 13, 2012 9:35:51 AM
* @return
*/
public static HanyuPinyinOutputFormat getDefaultOutputFormat() {
HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
format.setCaseType(HanyuPinyinCaseType.LOWERCASE);// 小写
format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);// 没有音调数字
format.setVCharType(HanyuPinyinVCharType.WITH_U_AND_COLON);// u显示
return format;
}
/***************************************************************************
*
* @Name: Pinyin4jUtil.java
* @Description: TODO
* @author: [email protected]
* @version: Jan 13, 2012 9:39:54 AM
* @param strJaggedArray
* @return
*/
public static String[] Exchange(String[][] strJaggedArray) {
String[][] temp = DoExchange(strJaggedArray);
return temp[0];
}
/***************************************************************************
*
* @Name: Pinyin4jUtil.java
* @Description: TODO
* @author: [email protected]
* @version: Jan 13, 2012 9:39:47 AM
* @param strJaggedArray
* @return
*/
private static String[][] DoExchange(String[][] strJaggedArray) {
int len = strJaggedArray.length;
if (len >= 2) {
int len1 = strJaggedArray[0].length;
int len2 = strJaggedArray[1].length;
int newlen = len1 * len2;
String[] temp = new String[newlen];
int Index = 0;
for (int i = 0; i < len1; i++) {
for (int j = 0; j < len2; j++) {
temp[Index] = capitalize(strJaggedArray[0][i])
+ capitalize(strJaggedArray[1][j]);
Index++;
}
}
String[][] newArray = new String[len - 1][];
for (int i = 2; i < len; i++) {
newArray[i - 1] = strJaggedArray[i];
}
newArray[0] = temp;
return DoExchange(newArray);
} else {
return strJaggedArray;
}
}
/***************************************************************************
* 首字母大写
*
* @Name: Pinyin4jUtil.java
* @Description: TODO
* @author: [email protected]
* @version: Jan 13, 2012 9:36:18 AM
* @param s
* @return
*/
public static String capitalize(String s) {
char ch[];
ch = s.toCharArray();
if (ch[0] >= 'a' && ch[0] <= 'z') {
ch[0] = (char) (ch[0] - 32);
}
String newString = new String(ch);
return newString;
}
/***************************************************************************
* 字符串集合转换字符串(逗号分隔)
*
* @Name: Pinyin4jUtil.java
* @Description: TODO
* @author: [email protected]
* @version: Jan 13, 2012 9:37:57 AM
* @param stringSet
* @return
*/
public static String getPinyinZh_CN(Set stringSet) {
StringBuilder str = new StringBuilder();
int i = 0;
for (String s : stringSet) {
if (i == stringSet.size() - 1) {
str.append(s);
} else {
str.append(s + ",");
}
i++;
}
return str.toString();
}
/***************************************************************************
* 获取每个拼音的简称
*
* @Name: Pinyin4jUtil.java
* @Description: TODO
* @author: [email protected]
* @version: Jan 13, 2012 11:05:58 AM
* @param chinese
* @return
*/
public static String getPinyinConvertJianPin(String chinese) {
String[] strArray = chinese.split(",");
String strChar = "";
for (String str : strArray) {
char arr[] = str.toCharArray(); // 将字符串转化成char型数组
for (int i = 0; i < arr.length; i++) {
if (arr[i] >= 65 && arr[i] < 91) { // 判断是否是大写字母
strChar += new String(arr[i] + "");
}
}
strChar += ",";
}
return strChar;
}
/***************************************************************************
* Test
*
* @Name: Pinyin4jUtil.java
* @Description: TODO
* @author: [email protected]
* @version: Jan 13, 2012 9:49:27 AM
* @param args
*/
public static void main(String[] args) {
String str = "张三";
System.out.println("小写输出:" + getPinyinToLowerCase(str));
System.out.println("大写输出:" + getPinyinToUpperCase(str));
System.out.println("首字母大写输出:" + getPinyinFirstToUpperCase(str));
System.out.println("简拼输出:" + getPinyinJianPin(str));
}
}