/** * Author: yiminghe * Date: 2008-10-24 * Time: 15:08:32 * Any problem ,contact [email protected]. */ import java.io.*; import java.util.*; /** * 利用 后缀树算法 * * LCS 问题及其扩展,找到多个字符串的所有公共子串 * */ class LCS { public static void main(String[] args) throws IOException { String[] source = {"axybcde", "cdefxy", "xyccde"}; SuffixTreeNode st = buildSuffixTree(source); String result; result = Lcs(st.firstChild, source.length); if (result.equals("")) System.out.println("No common string!"); else System.out.println("The longest common substring is : " + result + " ."); String[] commons = commonString(source); System.out.println(Arrays.asList(commons)); } /** * 建立一个后缀树 * * @param ss 字符串数组 * @return 后缀树的根结点 */ public static SuffixTreeNode buildSuffixTree(String ss[]) { HashMap<String, String> belong = new HashMap<String, String>(); belong.put("0", ""); SuffixTreeNode SuffixTree = new SuffixTreeNode(-1, "", 0, belong); //Add suffixs... for (int i = 0; i < ss.length; i++) { System.err.print("后缀树[" + (i + 1) + "]"); belong = new HashMap<String, String>(); belong.put("" + (i + 1), ""); for (int index = 0; index < ss[i].length(); index++) { String str = ss[i].substring(index); SuffixTree.insert(index, str, 0, belong); } System.err.println(" - OK"); } return SuffixTree; } /** * 深度遍历 * * @param suffixtree 根的后缀树结点 * @param count 字符串总数 * @return 最长公共子串 */ public static String Lcs(SuffixTreeNode suffixtree, int count) { String result = ""; String result2; while (suffixtree != null) { int flag = suffixtree.belongTo.size(); if (flag == count) { if (suffixtree.isLeaf()) { //找到最大 if (result.length() < suffixtree.label.length()) result = suffixtree.label; } else { //只是后缀的后缀 result2 = Lcs(suffixtree.firstChild, count); //要完整的后缀 if (result.length() < (suffixtree.label.length() + result2.length())) result = suffixtree.label + result2; } } suffixtree = suffixtree.next; } return result; } /** * 找到所有的相同子串,子串间不相互包含 * * @param source 字符串集合 * @return 字符串集合 */ public static String[] commonString(String[] source) { HashSet<String> r = new HashSet<String>(); SuffixTreeNode st = buildSuffixTree(source); recurCommon(r, st.firstChild, source.length); String[] original = r.toArray(new String[r.size()]); ArrayList<String> result = new ArrayList<String>(); for (int i = 0; i < original.length; i++) { int j = 0; for (j = 0; j < original.length; j++) { //有和其它元素相互包含 ,舍弃 if (i != j && original[j].endsWith(original[i])) { break; } } if (j == original.length) { result.add(original[i]); } } return result.toArray(new String[result.size()]); } //搜集子串,并且去掉明显的嵌套子串 private static boolean recurCommon(HashSet<String> r, SuffixTreeNode suffixtree, int count) { boolean result = false; while (suffixtree != null) { int flag = suffixtree.belongTo.size(); if (flag == count) { result = true; if (suffixtree.isLeaf()) { String re = suffixtree.label; SuffixTreeNode temp = suffixtree; while (temp.parent != null) { temp = temp.parent; re = temp.label + re; } r.add(re); } else { //只是后缀的后缀 boolean has = recurCommon(r, suffixtree.firstChild, count); //要完整的后缀 if (!has) { String re = suffixtree.label; SuffixTreeNode temp = suffixtree; while (temp.parent != null) { temp = temp.parent; re = temp.label + re; } r.add(re); } } } suffixtree = suffixtree.next; } return result; } } class SuffixTreeNode { //原字符串的位置 //公共就没意义了 int index; //后缀值 String label; //兄弟关系 SuffixTreeNode next; //第一个孩子关系 SuffixTreeNode firstChild = null; //父亲 SuffixTreeNode parent = null; //树的层数 int level; //属于哪个字符串 HashMap<String, String> belongTo = null; SuffixTreeNode(int i, String s, int level, HashMap<String, String> flag) { this.index = i; this.label = s; this.level = level; if (belongTo == null) belongTo = new HashMap<String, String>(); //Put subject-to information to belongTo... belongTo.putAll(flag); } void setChilden(SuffixTreeNode n) { this.firstChild = n; if (n != null) n.parent = this; } boolean isLeaf() { return (this.firstChild == null); } /** * 在当前结点下插入 新的后缀树结点 * * @param ind index * @param str insert_str * @param level level * @param belong belong */ public void insert(int ind, String str, int level, HashMap<String, String> belong) { SuffixTreeNode newnode, firstChild, prev; String strtemp, prefix; int index_i; //第一次 只有根结点 if (this.isLeaf()) { newnode = new SuffixTreeNode(ind, str, level + 1, belong); this.setChilden(newnode); return; } firstChild = this.firstChild; if (firstChild.label.charAt(0) > str.charAt(0)) { newnode = new SuffixTreeNode(ind, str, level + 1, belong); this.setChilden(newnode); newnode.next = firstChild; return; } prev = firstChild; //合适的子结点插入位置 while ((firstChild != null) && (firstChild.label.charAt(0) < str.charAt(0))) { prev = firstChild; firstChild = firstChild.next; } if (firstChild == null) { newnode = new SuffixTreeNode(ind, str, level + 1, belong); newnode.parent = this; prev.next = newnode; return; } if (firstChild.label.charAt(0) > str.charAt(0)) { newnode = new SuffixTreeNode(ind, str, level + 1, belong); prev.next = newnode; newnode.parent = this; newnode.next = firstChild; return; } //与 str 完全相同 if (str.equals(firstChild.label)) { //公共前缀属性共有 firstChild.belongTo.putAll(belong); return; } //首字母相同 int minLength = Math.min(firstChild.label.length(), str.length()); for (index_i = 1; index_i < minLength; index_i++) { if (firstChild.label.charAt(index_i) != str.charAt(index_i)) { break; } } //temp 较短 ,或与 str 完全相同 if (index_i == firstChild.label.length()) { //str 比 temp 长的部分 strtemp = str.substring(index_i); firstChild.insert(ind, strtemp, level + 1, belong); //公共前缀属性共有 firstChild.belongTo.putAll(belong); return; } //str 较短,或者 与 temp 中间 有不同元素 //原来的 temp 前缀 共有 prefix = firstChild.label.substring(0, index_i); strtemp = firstChild.label.substring(index_i); //原来 temp 的 与 str 不同的后缀 分离 prev = new SuffixTreeNode(firstChild.index, strtemp, level + 1, firstChild.belongTo); prev.setChilden(firstChild.firstChild); firstChild.setChilden(prev); firstChild.index = -1; firstChild.label = prefix; firstChild.belongTo.putAll(belong); prev.lowDown(); //加入 原来 str 与 temp 不同的后缀 if (index_i < str.length()) { strtemp = str.substring(index_i); firstChild.insert(ind, strtemp, level + 1, belong); } } void print() { } /** * 加入中间树结点,对原树结点中保存的层次信息进行刷新 */ void lowDown() { SuffixTreeNode temp; this.level++; if (this.isLeaf()) return; temp = this.firstChild; while (temp != null) { temp.lowDown(); temp = temp.next; } } }