第四步我们完成了组合的数组,现在我们需要对接第二步的方法,先做一个数组结合成字符串方法:
public static void main(String[] args) { String item = "娱乐八卦"; // System.out.println(Arrays.toString(split(item))); // System.out.println(Arrays.toString(split("ylbg"))); // System.out.println(Arrays.toString(split("yu,le,ba,gua"))); ArrayList<String[]> list = pinyin(item); // for (String[] arr : list) { // System.out.println(Arrays.toString(arr)); // } // CompositeUtil<String> t = new CompositeUtil<String>(list); // for (String[] strings : t.getResult()) { // System.out.println(Arrays.toString(strings)); // } System.out.println(Arrays.toString(composite(list))); } /** * 拼音组合 */ public static String[] composite(ArrayList<String[]> list) { StringBuffer buff = new StringBuffer(); CompositeUtil<String> t = new CompositeUtil<String>(list); List<String[]> result = t.getResult(); int length = result.size(); String[] back = new String[length]; for (int i = 0; i < length; i++) { buff = new StringBuffer(); for (String s : result.get(i)) { if (buff.length() > 0) buff.append(","); buff.append(s); } back[i] = buff.toString(); } return back; }
输出:
[yu,le,ba,gua, yu,yue,ba,gua]
现在对每个组合进行分割:
public static void main(String[] args) { String item = "娱乐八卦"; // System.out.println(Arrays.toString(split(item))); // System.out.println(Arrays.toString(split("ylbg"))); // System.out.println(Arrays.toString(split("yu,le,ba,gua"))); ArrayList<String[]> list = pinyin(item); // for (String[] arr : list) { // System.out.println(Arrays.toString(arr)); // } // CompositeUtil<String> t = new CompositeUtil<String>(list); // for (String[] strings : t.getResult()) { // System.out.println(Arrays.toString(strings)); // } String[] pinyin = composite(list); System.out.println(Arrays.toString(pinyin)); for (String p : pinyin) System.out.println(Arrays.toString(split(p))); }
输出:
[yu,le,ba,gua, yu,yue,ba,gua] [y, yu, yu, yul, yule, yule, yuleb, yuleba, yuleba, yulebag, yulebagu, yulebagua, l, le, le, leb, leba, leba, lebag, lebagu, lebagua, b, ba, ba, bag, bagu, bagua, g, gu, gua] [y, yu, yu, yuy, yuyu, yuyue, yuyue, yuyueb, yuyueba, yuyueba, yuyuebag, yuyuebagu, yuyuebagua, y, yu, yue, yue, yueb, yueba, yueba, yuebag, yuebagu, yuebagua, b, ba, ba, bag, bagu, bagua, g, gu, gua]
我们还有一个小问题没有解决,就是词语拼音首个字母组合,有以上的基础,我们只要稍微修改下就能满足我们的需求:
public static void main(String[] args) { boolean first = true; String item = "娱乐八卦"; // System.out.println(Arrays.toString(split(item, first))); // System.out.println(Arrays.toString(split("ylbg", first))); // System.out.println(Arrays.toString(split("yu,le,ba,gua"))); ArrayList<String[]> list = pinyin(item, first); for (String[] arr : list) { System.out.println(Arrays.toString(arr)); } // CompositeUtil<String> t = new CompositeUtil<String>(list); // for (String[] strings : t.getResult()) { // System.out.println(Arrays.toString(strings)); // } String[] pinyin = composite(list, first); System.out.println(Arrays.toString(pinyin)); for (String p : pinyin) System.out.println(Arrays.toString(split(p, first))); } /** * 拼音组合 */ public static String[] composite(ArrayList<String[]> list, boolean first) { StringBuffer buff = new StringBuffer(); CompositeUtil<String> t = new CompositeUtil<String>(list); List<String[]> result = t.getResult(); int length = result.size(); String[] back = new String[length]; for (int i = 0; i < length; i++) { buff = new StringBuffer(); for (String s : result.get(i)) { if (!first && buff.length() > 0) buff.append(","); buff.append(s); } back[i] = buff.toString(); } return back; } /** * 词语拼音 */ public static ArrayList<String[]> pinyin(String s, boolean first) { HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE); // 小写 defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); // 不用声标 String[] pinyinArray = null; ArrayList<String> temp; ArrayList<String[]> list = new ArrayList<String[]>(); for (char c : s.toCharArray()) { try { pinyinArray = PinyinHelper.toHanyuPinyinStringArray(c, defaultFormat); } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } if (pinyinArray == null) { list.add(new String[] { String.valueOf(c) }); } else { temp = new ArrayList<String>(); for (String p : pinyinArray) { if (first) { if (!temp.contains(p.substring(0, 1))) { temp.add(p.substring(0, 1)); } } else { if (!temp.contains(p)) { temp.add(p); } } } list.add(temp.toArray(new String[temp.size()])); } } return list; } /** * 切割词语 */ public static String[] split(String s, boolean first) { int next = 0; String temp = ""; int len = s.length(); ArrayList<String> list = new ArrayList<String>(); for (int i = 0; i < len; i++) { if (s.charAt(i) == ',') { next = 0; } else { if (i != 0 && next == 0) next = 1; } if (first || next == 0) for (int j = i + 1; j <= len; j++) { temp = s.substring(i, j).replace(",", ""); if (temp.length() > 0) list.add(temp); } } return list.toArray(new String[list.size()]); }
对composite、pinyin和split方法加入first参数,表示如果想获得拼音首个字母,传入true,否则传入false。输出:
[ylbg, yybg] [y, yl, ylb, ylbg, l, lb, lbg, b, bg, g] [y, yy, yyb, yybg, y, yb, ybg, b, bg, g]
搜索数据机构已经准备完毕,接下来只是把这些准备好的东西实现需要的数据结构了:
public static void main(String[] args) { // boolean first = true; // String item = "娱乐八卦"; // System.out.println(Arrays.toString(split(item, first))); // System.out.println(Arrays.toString(split("ylbg", first))); // System.out.println(Arrays.toString(split("yu,le,ba,gua"))); // ArrayList<String[]> list = pinyin(item, first); // for (String[] arr : list) { // System.out.println(Arrays.toString(arr)); // } // CompositeUtil<String> t = new CompositeUtil<String>(list); // for (String[] strings : t.getResult()) { // System.out.println(Arrays.toString(strings)); // } // String[] pinyin = composite(list, first); // System.out.println(Arrays.toString(pinyin)); // for (String p : pinyin) // System.out.println(Arrays.toString(split(p, first))); // String[] items = { "娱乐八卦", "经济论坛" }; HashMap<String, ArrayList<String>> map = map(items); System.out.println(map); } /** * 构造数据结构 */ public static HashMap<String, ArrayList<String>> map(String[] items) { ArrayList<String> values = new ArrayList<String>(); HashMap<String, ArrayList<String>> map = new HashMap<String, ArrayList<String>>(); String[] temp; for (String item : items) { // 中文词语切割 temp = split(item, true); for (String t : temp) { if (map.containsKey(t)) { if (!map.get(t).contains(item)) map.get(t).add(item); } else { values = new ArrayList<String>(); values.add(item); map.put(t, values); } } // 中文拼音首个字母 temp = composite(pinyin(item, true), true); for (String str : temp) { for (String t : split(str, true)) { if (map.containsKey(t)) { if (!map.get(t).contains(item)) map.get(t).add(item); } else { values = new ArrayList<String>(); values.add(item); map.put(t, values); } } } // 中文拼音 temp = composite(pinyin(item, false), false); for (String str : temp) for (String t : split(str, false)) { if (map.containsKey(t)) { if (!map.get(t).contains(item)) map.get(t).add(item); } else { values = new ArrayList<String>(); values.add(item); map.put(t, values); } } } return map; }
输出结构:
{yulebag=[娱乐八卦], 经济论=[经济论坛], jingjilu=[经济论坛], yuyuebagu=[娱乐八卦], yuyuebag=[娱乐八卦], 经济论坛=[经济论坛], lun=[经济论坛], gua=[娱乐八卦], bagua=[娱乐八卦], lebag=[娱乐八卦], jingjilunta=[经济论坛], 论=[经济论坛], luntan=[经济论坛], 娱=[娱乐八卦], jilun=[经济论坛], yuy=[娱乐八卦], bag=[娱乐八卦], yul=[娱乐八卦], 乐=[娱乐八卦], ylb=[娱乐八卦], yue=[娱乐八卦], 娱乐八卦=[娱乐八卦], yuyue=[娱乐八卦], gu=[娱乐八卦], tan=[经济论坛], ylbg=[娱乐八卦], jingjiluntan=[经济论坛], yybg=[娱乐八卦], 济=[经济论坛], lu=[经济论坛], lt=[经济论坛], 论坛=[经济论坛], 娱乐=[娱乐八卦], yuebagu=[娱乐八卦], 八卦=[娱乐八卦], jiluntan=[经济论坛], jingjil=[经济论坛], g=[娱乐八卦], 乐八卦=[娱乐八卦], b=[娱乐八卦], yuyueb=[娱乐八卦], l=[娱乐八卦, 经济论坛], j=[经济论坛], yulebagu=[娱乐八卦], t=[经济论坛], yuebag=[娱乐八卦], yuyu=[娱乐八卦], leb=[娱乐八卦], y=[娱乐八卦], lunt=[经济论坛], 坛=[经济论坛], jl=[经济论坛], jj=[经济论坛], jin=[经济论坛], jil=[经济论坛], jingji=[经济论坛], ta=[经济论坛], jingjilunt=[经济论坛], bg=[娱乐八卦], yyb=[娱乐八卦], yuyuebagua=[娱乐八卦], jingj=[经济论坛], yulebagua=[娱乐八卦], leba=[娱乐八卦], jjl=[经济论坛], le=[娱乐八卦], 经济=[经济论坛], lb=[娱乐八卦], jilunt=[经济论坛], 乐八=[娱乐八卦], yuyueba=[娱乐八卦], yueb=[娱乐八卦], jilunta=[经济论坛], yy=[娱乐八卦], yueba=[娱乐八卦], lebagu=[娱乐八卦], 经=[经济论坛], jjlt=[经济论坛], 八=[娱乐八卦], jilu=[经济论坛], yuleba=[娱乐八卦], ybg=[娱乐八卦], lebagua=[娱乐八卦], ba=[娱乐八卦], 娱乐八=[娱乐八卦], yb=[娱乐八卦], jlt=[经济论坛], jing=[经济论坛], jingjilun=[经济论坛], lunta=[经济论坛], 济论=[经济论坛], yuleb=[娱乐八卦], 济论坛=[经济论坛], lbg=[娱乐八卦], yu=[娱乐八卦], 卦=[娱乐八卦], yuebagua=[娱乐八卦], ji=[经济论坛], yule=[娱乐八卦], bagu=[娱乐八卦], yl=[娱乐八卦]}
到这里,可以解决这个问题了,能否更好解决这个问题?