同音字查询

package org.autumn.kettle;

import static org.junit.Assert.*;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.junit.Test;

/**
 * 同音字查询。<br/>
 * 
 * 根据中文查拼音,考虑多音字的情况,一个中文会对应多个拼音,之后根据拼音去查中文。
 * 
 * @author sunny
 * 
 */
public class ChineseSoundex {

	public static Map<String, String> chineseMap = new HashMap<String, String>();

	public static Map<String, String> pinyinMap = new HashMap<String, String>();

	public static final String SEPARATOR = "_";

	static {
		File file = new File("src/GB2312汉字拼音对照表7809字.txt");

		BufferedReader reader = null;
		try {
			reader = new BufferedReader(new FileReader(file));
			String tempString = null;
			while ((tempString = reader.readLine()) != null) {
				String chinese = tempString.substring(0, 1);
				String pinyin = tempString.substring(2, tempString.length());
				
				if (chineseMap.get(chinese) == null) {
					chineseMap.put(chinese, pinyin);
				} else {
					chineseMap.put(chinese, chineseMap.get(chinese) + SEPARATOR
							+ pinyin);
				}

				if (pinyinMap.get(pinyin) == null) {
					pinyinMap.put(pinyin, chinese);
				} else {
					pinyinMap.put(pinyin, pinyinMap.get(pinyin) + SEPARATOR
							+ chinese);
				}
			}
			reader.close();
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException ioe) {
			ioe.printStackTrace();
		} finally {
			if (reader != null) {
				try {
					reader.close();
				} catch (IOException ioe) {
					ioe.printStackTrace();
				}
			}
		}
	}

	private boolean soundex(String str1, String str2) {
		List<String> thisList = getPinyin(str1);
		List<String> thatList = getPinyin(str2);
		for (String that : thatList) {
			if (thisList.contains(that))
				return true;
		}
		return false;
	}

	private static List<String> getPinyin(String chinese) {
		List<String> pinyins = new ArrayList<String>();
		String pinyin = chineseMap.get(chinese);
		if (pinyin.contains(SEPARATOR)) {
			String[] strs = pinyin.split(SEPARATOR);
			for (int i = 0; i < strs.length; i++) {
				pinyins.add(strs[i]);
			}
		} else {
			pinyins.add(pinyin);
		}
		return pinyins;
	}

	@Test
	public void testSoundex1() {
		assertTrue(soundex("真", "甄"));
		assertFalse(soundex("真", "振"));
	}

	@Test
	public void testSoundex2() {
		assertTrue(soundex("行", "杭"));
		assertTrue(soundex("行", "形"));
	}

	public static void main(String[] args) {
		System.out.println(soundex("行"));
	}

	private static List<String> soundex(String chinese) {
		List<String> result = new ArrayList<String>();

		List<String> pinyins = getPinyin(chinese);
		for (String pinyin : pinyins) {
			result.addAll(getChinese(pinyin, chinese));
		}
		return result;
	}

	private static List<String> getChinese(String pinyin, String searchChinese) {
		List<String> result = new ArrayList<String>();

		String chineses = pinyinMap.get(pinyin);
		if (chineses.contains(SEPARATOR)) {
			String[] strs = chineses.split(SEPARATOR);
			for (int i = 0; i < strs.length; i++) {
				if (!searchChinese.equals(strs[i]))
					result.add(strs[i]);
			}
		} else {
			if (!searchChinese.equals(chineses))
				result.add(chineses);
		}
		return result;
	}

}


遗留问题,多音字怎么转拼音?有好的解决方案请留言,谢谢。

你可能感兴趣的:(查询)