GenBank filetype to Fasta filetype

Bioinfo~~~

GenBank filetype to Fasta filetype

.gb -> .fasta

 

public class G2F {

 private String LOCUS =  "LOCUS       ",  LOCUS_S = "begin", //"SOURCE(name)"+"." +"LOCUS"
   ORGANISM =   "  ORGANISM  ",  ORGANISM_S = "begin",
   ACCESSION =  "ACCESSION   ",  ACCESSION_S = "begin",
   VERSION =   "VERSION     ",  VERSION_S = "begin",
   DEFINITION =  "DEFINITION  ",  DEFINITION_S = "begin",
   ORIGIN =   "ORIGIN      ",  ORIGIN_S = "begin",
   END_DATA =   "//";
 
 String short_name = null, accession_name = null,
   version_name = null, definition_name = null,
   organism_name = null, warning_mess = null,
   origin = null,
   genbank_name = null,
   sequence = "",
   firstline = null, secondline =null;
 
 public void resetState()
 {
  LOCUS_S = "begin";
  ORGANISM_S = "begin";
  ACCESSION_S = "begin";
  VERSION_S = "begin";
  DEFINITION_S = "begin";
  ORIGIN_S = "begin";
 }
 
 public void resetName()
 {
  short_name = null;
  accession_name = null;
  version_name = null;
  definition_name = null;
  organism_name = null;
  warning_mess = null;
  genbank_name = null;
  origin = null;
  sequence = "";
  firstline = null;
  secondline =null;
 }
 
 public void scan1squence(String inputfile, String outputfile) throws IOException
 {
  BufferedReader in = new BufferedReader(new FileReader(inputfile));
  PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(outputfile)));
  
  String sline = null;
  
  /*String firstline = "";
  String secondline = "";*/
  while((sline = in.readLine()) != null){
   
   if( ! sline.equals(END_DATA) && ! sline.equals("")){
    String stemp = sline.substring(0, 12);
    String resub = sline.substring(12);
    //System.out.println(stemp);
    if(stemp.equals(ORGANISM)){
     organism_name = resub;
     String s[] = resub.split(" ");
     if(s[1].length() < 3){
      s[1] = "XXX";
     }
     short_name = s[0].substring(0, 4)+ "_" + s[1].substring(0,4);
     ORGANISM_S = "done";
    }
    if(stemp.equals(ACCESSION)){
     accession_name = resub;
     ACCESSION_S = "done";
    }
    if(stemp.equals(VERSION)){
     String s[] = resub.split("GI:");
     version_name = s[0].trim();
     genbank_name = s[1].trim();
     //System.out.println(version_name);
     VERSION_S = "done";
    }
    if(stemp.equals(DEFINITION)){
     definition_name = resub;
     DEFINITION_S = "done";
    }
    if(stemp.equals(ORIGIN)){
     ORIGIN_S = "done";
     //sline = in.readLine();
    }
    if( ORIGIN_S.equals("done") ){
     while( ! (sline = in.readLine()).equals(END_DATA) ){
      String tempsequence = sline.substring(10);
      sequence += tempsequence.replace(" ","").toUpperCase();
     }
     ORIGIN_S = "nextS";
     //System.out.println(sequence);
    }
    if(ORGANISM_S.equals("done") && ACCESSION_S.equals("done")
      && VERSION_S.equals("done") && DEFINITION_S.equals("done")
      && ORIGIN_S.equals("nextS"))
    {
     firstline = ">"+short_name+"."+accession_name+
        " ( "+version_name+" GI:"+genbank_name+" ) { "+definition_name+
        " } [ "+organism_name+" ]";
     /*if(firstline.equals(">Cich_endi.EL372564 ( EL372564.1 GI:125358052 ) { CCEL5375.b1_N24.ab1 CCE(LMS) endive Cichorium endivia cDNA clone } [ Cichorium endivia ]"))
      System.out.println("----------true-----------");*/
     secondline = sequence;
     /*System.out.println(firstline);
     System.out.println(secondline);
     if(secondline.equals("TATTCCAGAATCTCACACCTTTTTACACTAGCAGAAAGCCAGAAACACAGACCAACAACAGACGAGGAGGCACGAATTCCAACACAGAAAGGTTTTGTCTTCTCTTTCAACATCAAAGAGGGCTCTAGAAGCCCCCTGAGACCAAATCTTCAAACCAACATGGAATACCAAGCAAACTATTCAATTTGGGATGGTTTATACTACCATCCACACCTATTCGGTGGCATTATGCTAACAGTTGCATTGCTTGGTCTTTCCACAAGCTATCTAAGTGGCATAGCTGGCTTCCCTACTTTACCCTACATGTTACCTTATTTAGGAAACTTCCAGAAACAAAAAACCAACAAGAAACGTATCCGTGTGTACATGGATGGATGTTTTGATCTCATGCATTATGGTCACGCAAATGCTTTAAGACAAGCTAAAGCTTTAGGAGACGAATTAGTGGTTGGAATTGTAAGTGATGAAGAAATCATCAAGAACAAAGGTCCTCCTGTTTTATCAATGGAGGAAAGATTGGCACTTGTTAGTGGATTGAAGTGGGTTGATGAAGTTATTGCTAATGCACCTTATGCTATTACTGAAGACTTCATGAACAGTCTATTTAAAGAACATAAGATTGATTATATCATTCATGGAGATGATCCTTGTTTGCTTCCTGATGGAAGTGATGCATATGCTTTAGCAAAAAAAAAGTTGGTCGTTACAA"))
      System.out.println("----------true-----------");*/
     out.println(firstline);
     out.println(secondline);
     resetName();
     resetState();
     continue;
    }
   }
   else if(sline.equals(END_DATA)){
    //resetName();
    resetState();
   }
   
  }
  
  out.close();
  in.close();
  
 }
}

 

 

public class G2F {

	private String LOCUS = 	"LOCUS       ", 	LOCUS_S = "begin", //"SOURCE(name)"+"." +"LOCUS"
			ORGANISM = 		"  ORGANISM  ", 	ORGANISM_S = "begin",
			ACCESSION = 	"ACCESSION   ", 	ACCESSION_S = "begin",
			VERSION = 		"VERSION     ", 	VERSION_S = "begin",
			DEFINITION = 	"DEFINITION  ", 	DEFINITION_S = "begin",
			ORIGIN = 		"ORIGIN      ",		ORIGIN_S = "begin",
			END_DATA = 		"//";
	
	String short_name = null, accession_name = null,
			version_name = null, definition_name = null,
			organism_name = null, warning_mess = null,
			origin = null,
			genbank_name = null,
			sequence = "",
			firstline = null, secondline =null;
	
	public void resetState()
	{
		LOCUS_S = "begin";
		ORGANISM_S = "begin";
		ACCESSION_S = "begin";
		VERSION_S = "begin";
		DEFINITION_S = "begin";
		ORIGIN_S = "begin";
	}
	
	public void resetName()
	{
		short_name = null;
		accession_name = null;
		version_name = null;
		definition_name = null;
		organism_name = null;
		warning_mess = null;
		genbank_name = null;
		origin = null;
		sequence = "";
		firstline = null;
		secondline =null;
	}
	
	public void scan1squence(String inputfile, String outputfile) throws IOException
	{
		BufferedReader in = new BufferedReader(new FileReader(inputfile));
		PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(outputfile)));
		
		String sline = null;
		
		/*String firstline = "";
		String secondline = "";*/
		while((sline = in.readLine()) != null){
			
			if( ! sline.equals(END_DATA) && ! sline.equals("")){
				String stemp = sline.substring(0, 12);
				String resub = sline.substring(12);
				//System.out.println(stemp);
				if(stemp.equals(ORGANISM)){
					organism_name = resub;
					String s[] = resub.split(" ");
					if(s[1].length() < 3){
						s[1] = "XXX";
					}
					short_name = s[0].substring(0, 4)+ "_" + s[1].substring(0,4);
					ORGANISM_S = "done";
				}
				if(stemp.equals(ACCESSION)){
					accession_name = resub;
					ACCESSION_S = "done";
				}
				if(stemp.equals(VERSION)){
					String s[] = resub.split("GI:");
					version_name = s[0].trim();
					genbank_name = s[1].trim();
					//System.out.println(version_name);
					VERSION_S = "done";
				}
				if(stemp.equals(DEFINITION)){
					definition_name = resub;
					DEFINITION_S = "done";
				}
				if(stemp.equals(ORIGIN)){
					ORIGIN_S = "done";
					//sline = in.readLine();
				}
				if( ORIGIN_S.equals("done") ){
					while( ! (sline = in.readLine()).equals(END_DATA) ){
						String tempsequence = sline.substring(10);
						sequence += tempsequence.replace(" ","").toUpperCase();
					}
					ORIGIN_S = "nextS";
					//System.out.println(sequence);
				}
				if(ORGANISM_S.equals("done") && ACCESSION_S.equals("done") 
						&& VERSION_S.equals("done") && DEFINITION_S.equals("done")
						&& ORIGIN_S.equals("nextS"))
				{
					firstline = ">"+short_name+"."+accession_name+
								" ( "+version_name+" GI:"+genbank_name+" ) { "+definition_name+
								" } [ "+organism_name+" ]";
					/*if(firstline.equals(">Cich_endi.EL372564 ( EL372564.1 GI:125358052 ) { CCEL5375.b1_N24.ab1 CCE(LMS) endive Cichorium endivia cDNA clone } [ Cichorium endivia ]"))
						System.out.println("----------true-----------");*/
					secondline = sequence;
					/*System.out.println(firstline);
					System.out.println(secondline);
					if(secondline.equals("TATTCCAGAATCTCACACCTTTTTACACTAGCAGAAAGCCAGAAACACAGACCAACAACAGACGAGGAGGCACGAATTCCAACACAGAAAGGTTTTGTCTTCTCTTTCAACATCAAAGAGGGCTCTAGAAGCCCCCTGAGACCAAATCTTCAAACCAACATGGAATACCAAGCAAACTATTCAATTTGGGATGGTTTATACTACCATCCACACCTATTCGGTGGCATTATGCTAACAGTTGCATTGCTTGGTCTTTCCACAAGCTATCTAAGTGGCATAGCTGGCTTCCCTACTTTACCCTACATGTTACCTTATTTAGGAAACTTCCAGAAACAAAAAACCAACAAGAAACGTATCCGTGTGTACATGGATGGATGTTTTGATCTCATGCATTATGGTCACGCAAATGCTTTAAGACAAGCTAAAGCTTTAGGAGACGAATTAGTGGTTGGAATTGTAAGTGATGAAGAAATCATCAAGAACAAAGGTCCTCCTGTTTTATCAATGGAGGAAAGATTGGCACTTGTTAGTGGATTGAAGTGGGTTGATGAAGTTATTGCTAATGCACCTTATGCTATTACTGAAGACTTCATGAACAGTCTATTTAAAGAACATAAGATTGATTATATCATTCATGGAGATGATCCTTGTTTGCTTCCTGATGGAAGTGATGCATATGCTTTAGCAAAAAAAAAGTTGGTCGTTACAA"))
						System.out.println("----------true-----------");*/
					out.println(firstline);
					out.println(secondline);
					resetName();
					resetState();
					continue;
				}
			}
			else if(sline.equals(END_DATA)){
				//resetName();
				resetState();
			}
			
		}
		
		out.close();
		in.close();
		
	}
}

 

你可能感兴趣的:(File)