UVa760 - DNA Sequencing(后缀数组)

 A DNA molecule consists of two strands that wrap around each other to resemble atwisted ladder whose sides, made of sugar and phosphate molecules, are connected byrungs of nitrogen-containing chemicals called bases. Each strand is a linear arrangementof repeating similar units called nucleotides, which are each composed of one sugar, onephosphate, and a nitrogenous base. Four different bases are present in DNA: adenine (A),thymine (T), cytosine (C), and guanine (G). The particular order of the bases arrangedalong the sugar-phosphate backbone is called the DNA sequence; the sequence specifiesthe exact genetic instructions required to create a particular organism with its own uniquetraits.


Geneticists often compare DNA strands and are interested in finding the longest commonbase sequence in the two strands. Note that these strands can be represented as stringsconsisting of the lettersa,t,c and g. So, the longest common sequence in the twostrandsatgc andtga istg. It is entirely possible that two different common sequencesexist that are the same length and are the longest possible common sequences. Forexample in the strandsatgc andgctg, the longest common sequences aregcand tg.

Input and Output 

Write a program that accepts as input two strings representing DNAstrands, and prints as output the longest common sequence(s) inlexicographical order.

If there isn't any common sequence between the two strings, just print:``No common sequence."

If there are more than one test cases, it must be ablank line between two consecutive, both in input and output files.

The strings are at most 300 characters-long.

Sample Input 

atgc
tga

atgc
gctg

Sample Output 

tg

gc
tg


import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Scanner;

public class Main implements Runnable
{
	private static final boolean DEBUG = false;
	private static final int MAXN = 660;
	private PrintWriter cout;
	private Scanner cin;
	private char[] s = new char[MAXN];
	private int[] sa = new int [MAXN];
	private int[] x = new int[MAXN];
	private int[] y = new int[MAXN];
	private int[] intTmp = new int[MAXN];
	private int[] c = new int[MAXN];
	private int[] rank = new int[MAXN];
	private int[] height = new int[MAXN];
	private int n, len;
	private boolean first = true;
	
	private void init() 
	{
		try {
			if (DEBUG) {
				cin = new Scanner(new BufferedInputStream(new FileInputStream(
						"d:\\OJ\\uva_in.txt")));
			} else {
				cin = new Scanner(new BufferedInputStream(System.in));
			}
		} catch (Exception e) {
			e.printStackTrace();
		}

		cout = new PrintWriter(new OutputStreamWriter(System.out));
	}

	private boolean input()
	{
		if (!cin.hasNext()) return false;
		
		String tmp = cin.next();
		len = tmp.length();
		for (int i = 0; i < len; i++) s[i] = tmp.charAt(i);
		s[len] = '#';
		
		tmp = cin.next();
		n = len + 1 + tmp.length();
		for (int i = 0; i < tmp.length(); i++) s[len + 1 + i] = tmp.charAt(i);
		s[n] = 0;
		
		return true;
	}
	
	void build_sa(int n, int m)
	{
		for (int i = 0; i < m; i++) c[i] = 0;
		for (int i = 0; i < n; i++) c[x[i] = s[i]]++;
		for (int i = 1; i < m; i++) c[i] += c[i - 1];
		for (int i = n - 1; i >= 0; i--) sa[--c[x[i]]] = i;
		
		for (int k = 1; k <= n; k <<= 1) {
			int p = 0;
			for (int i = n - k; i < n; i++) y[p++] = i;
			for (int i = 0; i < n; i++) if (sa[i] >= k) y[p++] = sa[i] - k;
			
			for (int i = 0; i < m; i++) c[i] = 0;
			for (int i = 0; i < n; i++) c[x[y[i]]]++;
			for (int i = 1; i < m; i++) c[i] += c[i - 1];
			for (int i = n - 1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];
			
			System.arraycopy(y, 0, intTmp, 0, MAXN);
			System.arraycopy(x, 0, y, 0, MAXN);
			System.arraycopy(intTmp, 0, x, 0, MAXN);
			
			p = 1;
			x[sa[0]] = 0;
			for (int i = 1; i < n; i++) {
				if (y[sa[i - 1]] == y[sa[i]] && y[sa[i - 1] + k] == y[sa[i] + k]) x[sa[i]] = p - 1;
				else x[sa[i]] = p++;
			}
			
			if (p >= n) break;
			m = p;
		}
	}
	
	private void getHeight(int n)
	{
		for (int i = 0; i < n + 1; i++) rank[sa[i]] = i;
		
		int k = 0;
		for (int i = 0; i < n; i++) {
			if (k > 0) k--;
			else k = 0;
			
			int j = sa[rank[i] - 1];
			while (s[i + k] == s[j + k]) k++;
			height[rank[i]] = k;
		}
	}
	
	boolean check(int n, int len, int mid)
	{
		for (int i = 1; i <= n; i++) {
			if (height[i] >= mid) {
				for (int j = i; j <= n && height[j] >= mid; j++) {
					if ((sa[j - 1] < len && sa[j] > len) ||
							(sa[j - 1] > len && sa[j] < len)) return true;
				}
			}
		}
		
		return false;
	}
	
	private void solve()
	{
		if (!first) cout.println();
		
		build_sa(n + 1, 'z' + 1);
		
		getHeight(n);
		
		int low = 0, high = n, mid;
		
		while (low < high) {
			mid = (low + high) >> 1;
			if (check(n, len, mid)) low = mid + 1;
			else high = mid;
		}
		
		low--;
		if (low != 0) {
			for (int i = 1; i <= n; i++) {
				if (height[i] >= low) {
					int j, k;
					for (j = i; j <= n && height[j] >= low; j++);
					for (k = i; k < j; k++) {
						if ((sa[k - 1] < len && sa[k] > len) 
							|| (sa[k - 1] > len && sa[k] < len)) break;
					}
					
					if (k != j) {
						for (k = sa[i]; k < sa[i] + low; k++) {
							cout.print(s[k]);
						}
						cout.println();
					}
					i = j - 1;
				}
			}
		} else {
			cout.println("No common sequence.");
		}
		
		if (first) first = false;
		
		cout.flush();
	}
	
	public void run()
	{
		init();
		
		while (input()) {
			solve();
		}
	}
	
	public static void main(String[] args) 
	{
		new Thread(new Main()).start();
	}
}

你可能感兴趣的:(UVa760 - DNA Sequencing(后缀数组))