找出两个字符串中,连续相同的字符个数最少为min的字符串
(具体涉及一些不能公布的,还是不说了,有更好的算法,请拍砖讨论)
主要是这个函数--Compare2Str(String str1, String str2, int min),其他不用管
-----------------------------似乎还有点问题!等待明天继续调试
package com.ssj.test; import java.util.ArrayList; import java.util.Iterator; import java.util.List; public class CompareString { /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub String contig1 = "AAGAAACGTATCCGTGTTTACATGGATGGATGTTTCGATCTCATGCATTATGGACACGCA"+ "AATGCTTTAAGACAAGCTAAAGCTTTAGGAGATGAACTAGTAGTTGGAATTGTAAGTGAT" +"GAAGAAATCATCAAGAACAAAGGTCCTCCTGTTTTATCAATGGAGGAAAGATTGGCACTT" +"GTTAGTGGATTGAAGTGGGTTGATGAAGTTATTGCTAATGCACCATATGCTATTACCGAA" +"GACTTCATGAACAGTCTATTTAAAGAACATAAGATTGATTATATCATTCATGGAGATGAT" +"CCTTGTTTGCTTCCTGATGGAAGTGATGCATATGCTTTAGCAAAAAAAGTCGGTCGTTAC" +"AAGCAAATTAAACGTACAGAAGGCGTATCAAGCACCGACATTGTAGGAAGGATACTTGCA" +"TCCATGGAAGATAAAGAAGTATGTGAAGTTAATGGAGAAAGTAATGAAATGAATAAAAAT" +"TTGGACAGCCATTTCAAGGCCAAACATGCCTCTAATTTTTTGCCTACATCAAGAAGAATT" +"GTTCAGTTT"; String EL372564 = "AAGAAACGTATCCGTGTGTACATGGATGGATGTTTTGATCTCATGCATTATGGTCACGCAAATGCTTTAAGACAAGCTAAAGCTTTAGGAGACGAATTAGTGGTTGGAATTGTAAGTGATGAAGAAATCATCAAGAACAAAGGTCCTCCTGTTTTATCAATGGAGGAAAGATTGGCACTTGTTAGTGGATTGAAGTGGGTTGATGAAGTTATTGCTAATGCACCTTATGCTATTACTGAAGACTTCATGAACAGTCTATTTAAAGAACATAAGATTGATTATATCATTCATGGAGATGATCCTTGTTTGCTTCCTGATGGAAGTGATGCATATGCTTTAGCAAAAAAA"; String EL371249 = "AAGAAACGTATCCGTGTGTACATGGATGGATGTTTTGATCTCATGCATTATGGTCACGCAAATGCTTTAAGACAAGCTAAAGCTTTAGGAGACGAATTAGTGGTTGGAATTGTAAGTGATGAAGAAATCGTCAAGAACAAAGGTCCTCCTGTTTTATCAATGGAGGAAAGATTGGCACTTGTTAGTGGATTGAAGTGGGTTGATGAAGTTATTGCTAATGCACCTTATGCTATTACTGAAGACTTCATGAACAGTCTATTTAAAGAACATAAGATTGATTATATCATTCATGGAGATGATCCTTGTTTGCTTCCTGATGGAAGTGATGCATATGCTTTAGCAAAAAAA"; String EL347301 = "AAGAAACGTATCCGTGTGTACATGGATGGATGTTTTGATCTCATGCATTATGGTCACGCAAATGCTTTAAGACAAGCTAAAGCTTTAGGAGACGAATTAGTGGTTGGAATTGTAAGTGATGAAGAAATCATCAAGAACAAAGGTCCTCCTGTTTTATCAATGGAGGAAAGATTGGCACTTGTTAGTGGATTGAAGTGGGTTGATGAAGTTATTGCTAATGCACCTTATGCTATTACTGAAGACTTCATGAACAGTCTATTTAAAGAACATAAGATTGATTATATCATTCATGGAGATGATCCTTGTTTGCTTCCTGATGGAAGTGATGCATATGCTTTAGCAAAAAAAGTTGGTCGTTACAAACAAATTAAACGTACAGAAGGCGTATCAAGCACCGACATTGTAGGAAGGATACTTGCATCCATGGAAGAT"; String EH685069 = "AAGAAACGTATCCGTGTTTACATGGATGGATGTTTCGATCTCATGCATTATGGACACGCAAATGCTTTAAGACAAGCTAAAGCTTTAGGAGATGAACTAGTAGTTGGAATTGTAAGTGATGAAGAAATCATCAAGAACAAAGGTCCTCCTGTTTTATCAATGGAGGAAACATTGGCACTTGTTAGTGGATTGAAGTGGGTTGATGAAGTTATTGCTAATGCACCATATGCTATTACCGAAGACTTCATGAACAGTCTATTTAAAGAACATAAGATTGATTATATCATTCATGGAGATGATCCTTGTCTGCTTCCTGATGGAAGTGATGCATATGCTTTAGCAAAAAAAGTCGGTCGTTACAAGCAAATTAAACGTACAGAAAGCGTATCAAGCACCGACATTGTAGGAAGGGTACTTGCATCCATGGAAGATAAAGAACTATGTGAA"; // String str3 = "AAGAAACGTATCCGTGT--TTACATGGATGGATGTTTCGATCTCATGCATTATGGACACGCAAATGC"; System.out.println("--------contig1 and EL372564---------"); CompareTest(contig1, EL372564); System.out.println("\n--------EL371249 and EL372564---------"); CompareTest(EL371249, EL372564); System.out.println("\n--------EL372564 and EL347301---------"); CompareTest(EL372564, EL347301); System.out.println("\n--------contig1 and EH685069---------"); CompareTest(contig1, EH685069); //System.out.println("AAGAAACGTATCCGTGTTTACATGGATGGATGTTTCGATCTCATGCATTATGGACACGCA".length()); String contig = "TNNNNNNNNNNNNNNNNNNNNNACTTATGCACAGACCAACCTGAATGTTGACGTGCTCCA" +"CATTTCCTACCTATCTTACACTTACCATAATGGGTGGAATATGTTCTTTCTAAAGTATTT" +"TCATTAGGTTTTAAAGCCCACATGCATCTTAAAGAAAATGTAAACCACATGTTTGACATT" +"CATTTACACTTAACTACTTTTTTTCCTATAAAAAAGAAAAATGTAAATGTGATGTTCAAA" +"TTGCCAATTTGAGAGCCATATTGTGTACATTAAAATTAGTGGCATTTCAAGTCATTTTTC" +"TTTCCATGGAATTATTACAAAGGAAAATGCCTTTGAACAATAGAAAGTCATGGATCTATC" +"CCAACTCTAAATAAGTGGTATTTTAGTGGGTTTTAAACATGGTTAAAGTTCTTATCCCAG" +"AGCTCTAAACCACAAAGCCCAGTTATATTGGTTCTCTTCATCTGTCAGAGGTAAAAACAG" +"AGGGTATTGATCTTTTATTTATCTTTGAAGCTTTAAAGTATAATTAGTTAA"+ "AGAAAAAGT" +"TTCTGTACTTAGAACATCTCGGTTCTAGCCTTGACTTATGCTAGAAACAAGGATTATATC" +"TGTATTAGTCTGGTCATACACTGTTATAAAGAACTACCTGAGACTGGATAATTTATAAAG" +"AAAAGAGGTTTAATTAGCTTATGGTTCCACAGGCTGTACAGGAAGCATGCCTGGGGAGGC" +"CTTAAGAAACTTACAATCATGGCAGAATGTGAAGAGGAAGAAAGCATGTTTTCTCATGGC" +"CAGAGCAAGAGGAAGAGAGAGAAGGGGGAGGTGCTACTCCACTTTTAANGCAACCCAGAT" +"CTCATGAGAACTCCACTCCATTGTGCACAAAAATGGCCAAGGGCGGAAATTTCCACCCTC" +"GTGATCNNCAATCACCTCCCACCAGGCCCCTCCTCTATTATTGGGGATTACAATTTGGCA" +"TGAGATCTGGGTGAGGACACAAATCCAAACCATATCAACATCCCTACTCTACTTACTTTA" +"TAAACTTGGTGGGAGAATCAAGTGGCATGGGGATGAAAGTCAATGGATACCGGAAAGAGG" +"ACTAATCCATGGCTGAAAGGGGGGTATCCAATTACACCAAGGCTTTACAGGGGAAATATA" +"CAGAGGTTAAAACAAAGGTTTGGTCTTTCCAAAA"; String ENV1180 = "TNNNNNNNNNNNNNNNNNNNNNACTTATGCACAGACCAACCTGAATGTTGACGTGCTCCA" +"CATTTCCTACCTATCTTACACTTACCATAATGGGTGGAATATGTTCTTTCTAAAGTATTT" +"TCATTAGGTTTTAAAGCCCACATGCATCTTAAAGAAAATGTAAACCACATGTTTGACATT" +"CATTTACACTTAACTACTTTTTTTCCTATAAAAAAGAAAAATGTAAATGTGATGTTCAAA" +"TTGCCAATTTGAGAGCCATATTGTGTACATTAAAATTAGTGGCATTTCAAGTCATTTTTC" +"TTTCCATGGAATTATTACAAAGGAAAATGCCTTTGAACAATAGAAAGTCATGGATCTATC" +"CCAACTCTAAATAAGTGGTATTTTAGTGGGTTTTAAACATGGTTAAAGTTCTTATCCCAG" +"AGCTCTAAACCACAAAGCCCAGTTATATTGGTTCTCTTCATCTGTCAGAGGTAAAAACAG" +"AGGGTATTGATCTTTTATTTATCTTTGAAGCTTTAAAGTATAATTAGTTAA" + "GGAAAAGTT" +"CTGTACTTAGAACATCTCGGTTCTAGCCTTGACTTATGCTAGAAACAAGGATTATATCTG" +"TATTAGTCTGGTCTACACTGTTATAAAGAACTACCTG"; String ENV446 = "CCXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTATCCAACTCTAAATAAGTGGTATTT" +"AGTGGGTTTTAAACATGGTTAAAGTTCTTATCCCAGAGCTCTAAACCACAAAGCCCAGTT" +"ATATTTGTTCTCTTCATCTGTCAGAGGTAAAAACAGAGGGTATTGATCTTTTATTTATCT" +"TTGAAGCTTTAAAGTATAATTTGTTCAAGGAAAAGTTTCTGTACTTAGAACATCTCGGTT" +"CTAGCCTTGACTTATGCTAGAAACAAGGATTATATCTGTATTAGTCTGTTCATACACTGT" +"TATAAAGAACTACCTGAGACTGGATAATTTATAAAGAAAAGAGGTTTAATTAGCTTATGG" +"TTCCACAGGCTGTACAGGAAGCATGCCTGGGGAGGCCTTAAGAAACTTACAATCATGGCA" +"GAATGTGAAGAGGAAGAAAGCATGTTTTCTCATGGCCAGAGCAAGAGGAAGAGAGAGAAG" +"GGGGAGGTGCTACTCCACTTTTAANGCAACCCAGATCTCATGAGAACTCCACTCCATTGT" +"GCACAAAAATGGCCAAGGGCGGAAATTTCCACCCTCGTGATCNNCAATCACCTCCCACCA" +"GGCCCCTCCTCTATTATTGGGGATTACAATTTGGCATGAGATCTGGGTGAGGACACAAAT" +"CCAAACCATATCAACATCCCTACTCTACTTACTTTATAAACTTGGTGGGAGAATCAAGTG" +"GCATGGGGATGAAAGTCAATGGATACCGGAAAGAGGACTAATCCATGGCTGAAAGGGGGG" +"TATCCAATTACACCAAGGCTTTACAGGGGAAATATACAGAGGTTAAAACAAAGGTTTGGT" +"CTTTCCAAAA"; System.out.println("\n--------contig and ENV1180---------"); CompareTest(ENV1180, contig); String str = ""; // Compare2Str("abcdbce","cbce",2); Compare2Str(ENV446,ENV1180,20); // System.out.println(ENV446.indexOf()); } public static void CompareTest(String str1, String str2){ int len1 = str1.length(); int len2= str2.length(); StringBuffer strb = new StringBuffer(); int k = (len1>len2 ? len2 : len1); //System.out.println("k = "+k); // int num = 0; for (int i = 0; i<k; i++){ if(str1.charAt(i) == str2.charAt(i)){ strb.append(str1.charAt(i)); } else { // num = i; break; } } System.out.println("the num of the same char : "+strb.length()); System.out.println("the same String is : " +strb.toString()); // System.out.println("the arrays1 is : " +strb.toString()); // if(strb.toString().equals(str1.substring(0,num))){ // System.out.println("Very Good!!!"); // } } public static void Compare2Str(String str1, String str2, int min){ //找出两个字符串中,连续相同的字符个数最少为min的字符串 int len1 = str1.length(); int len2= str2.length(); StringBuffer strb = new StringBuffer(); String maxStr, minStr; int minLen = 0; int maxLen = 0; if(len1 > len2){ maxLen = len1; minLen = len2; maxStr = str1; minStr = str2; } else{ maxLen = len2; minLen = len1; maxStr = str2; minStr = str1; } List strList = new ArrayList(); char c1, c2; for (int i=0; i<maxLen; i++){ StringBuffer tempStr = new StringBuffer(""); c1 = maxStr.charAt(i); for(int j=0; j<minLen; j++){ c2 = minStr.charAt(j); while(c1 == c2){ tempStr.append(c1); if(++i<maxLen && ++j<minLen){ c1 = maxStr.charAt(i); c2 = minStr.charAt(j); } else{ break; } } if(tempStr.length()>=min){ strList.add(tempStr); } } } Iterator iter = strList.iterator(); while(iter.hasNext()){ StringBuffer s = (StringBuffer)iter.next(); System.out.println("-------" + s.toString()); } } public static void isHaveSubString(String str1, String str2){ if(str1.startsWith(str2)){ System.out.println("---OK--"); } } }