【原】 POJ 1007 DNA Sorting 逆序数 解题报告

 

http://poj.org/problem?id=1007

求逆序数的方法:
1、此题每个string只有4种字母,所以可以用类似counting sort的方法来以逆序扫描字符串,并以a[1...3]记录相应字符串组的个数,计算每位数与其后面几位的逆序数。复杂度n。但输入若没有限制就不能靠a[1...3]这样做了,那样的话可能每扫描一个字符++的数组位很多,需要判断的分支也很多,因此复杂度太高。
2、利用merge sort求逆序数n*lgn,对所有字符串的逆序数排序(这里用multimap简单代替)

Description

One measure of ``unsortedness'' in a sequence is the number of pairs of entries that are out of order with respect to each other. For instance, in the letter sequence ``DAABEC'', this measure is 5, since D is greater than four letters to its right and E is greater than one letter to its right. This measure is called the number of inversions in the sequence. The sequence ``AACEDGG'' has only one inversion (E and D)---it is nearly sorted---while the sequence ``ZWQM'' has 6 inversions (it is as unsorted as can be---exactly the reverse of sorted).
You are responsible for cataloguing a sequence of DNA strings (sequences containing only the four letters A, C, G, and T). However, you want to catalog them, not in alphabetical order, but rather in order of ``sortedness'', from ``most sorted'' to ``least sorted''. All the strings are of the same length.

Input

The first line contains two integers: a positive integer n (0 < n <= 50) giving the length of the strings; and a positive integer m (0 < m <= 100) giving the number of strings. These are followed by m lines, each containing a string of length n.

Output

Output the list of input strings, arranged from ``most sorted'' to ``least sorted''. Since two strings can be equally sorted, then output them according to the orginal order.

Sample Input

10 6

AACATGAAGG

TTTTGGCCAA

TTTGGCCAAA

GATCAGATTT

CCCGGGGGGA

ATCGATGCAT

Sample Output

CCCGGGGGGA

AACATGAAGG

GATCAGATTT

ATCGATGCAT

TTTTGGCCAA

TTTGGCCAAA

 

   1:  
   2: #include <stdio.h>
   3: #include <iostream>
   4: #include <string>
   5: #include <map>
   6: #include <fstream>
   7:  
   8: using namespace std ;
   9:  
  10: //逆序扫描字符串求逆序数
  11: //复杂度n
  12: __int64 GetInversionCount( char *str, int n )
  13: {
  14:     __int64 cnt ;
  15:     __int64 a[4] = {0} ; //a[1]:A , a[2]:A,C , a[3]:A,C,G 的个数
  16:     int i ;
  17:  
  18:     //从后往前扫描,计算每位数与其后面几位的逆序数
  19:     cnt = 0 ;
  20:     for( i=n-1 ; i>=0 ; --i )  
  21:     {
  22:         switch(str[i])
  23:         {
  24:         case 'A' :  //与其后面不会形成逆序对
  25:             ++a[1] ;
  26:             ++a[2] ;
  27:             ++a[3] ;
  28:             break ;
  29:         case 'C' :  //与其后面的A形成逆序,所以逆序数要加上其后A的个数
  30:             ++a[2] ;
  31:             ++a[3] ;
  32:             cnt += a[1] ;
  33:             break ;
  34:         case 'G' :  //与其后面的A、C形成逆序,所以逆序数要加上其后A、C的个数
  35:             ++a[3] ;
  36:             cnt += a[2] ;
  37:             break ;
  38:         case 'T' :  //与其后面的A、C、G形成逆序,所以逆序数要加上其后A、C、G的个数
  39:             cnt += a[3] ;
  40:             break ;
  41:         }
  42:     }
  43:     return cnt ;
  44: }
  45:  
  46: //**********************************
  47:  
  48: __int64 inversionNum = 0 ;
  49:  
  50: void Merge( char *a, char* tmpArr, int lb, int rb, int re )
  51: {
  52:     int le = rb-1 ;
  53:     int tmpStart = lb ;
  54:     int tmpEnd = re ;
  55:     int tmpIndex = lb ;
  56:     while( lb<=le && rb<=re )
  57:     {
  58:         if( a[lb] <= a[rb] )
  59:             tmpArr[tmpIndex++] = a[lb++] ;
  60:         else
  61:         {
  62:             tmpArr[tmpIndex++] = a[rb++] ;
  63:             inversionNum += le-lb+1 ;
  64:         }
  65:     }
  66:  
  67:     while(lb<=le)
  68:         tmpArr[tmpIndex++] = a[lb++] ;
  69:     while(rb<=re)
  70:         tmpArr[tmpIndex++] = a[rb++] ;
  71:  
  72:     while(tmpStart<=tmpEnd)
  73:     {
  74:         a[tmpStart] = tmpArr[tmpStart] ;
  75:         ++tmpStart ;
  76:     }    
  77: }
  78:  
  79: void Msort( char *a, char *tmpArr, int b, int e )
  80: {
  81:     if(b>=e)
  82:         return ;
  83:     int mid = b+(e-b)/2 ;
  84:     Msort(a,tmpArr,b,mid) ;
  85:     Msort(a,tmpArr,mid+1,e) ;
  86:     Merge(a,tmpArr,b,mid+1,e) ;
  87: }
  88:  
  89: void MergeSort( char *a , int n )
  90: {
  91:     char *tmpArr = new char[n+1] ;
  92:     Msort(a,tmpArr,0,n-1) ;
  93:     delete []tmpArr ;
  94: }
  95:  
  96: //**********************************
  97:  
  98: void run1007()
  99: {
 100:     ifstream in("in.txt");
 101:  
 102:     int n,m ;
 103:     string tmpStr ;
 104:     multimap< __int64 , string > strMap ;
 105:     multimap< __int64 , string >::iterator iter ;
 106:  
 107:     in>>n>>m ;
 108:  
 109:     char *a = new char[n+1] ;  //末尾'0'
 110:     
 111:     while( m-- && in>>a )  //不能用gets(a),不然会从第一行输入的末尾开始,其末尾为换行符,而gets不会读入换行符
 112:     {                       //所以gets读入的第一行字符为空字符
 113:         tmpStr = a ;
 114:         MergeSort(a,n) ;
 115:         strMap.insert( make_pair(inversionNum,tmpStr) ) ;
 116:         inversionNum = 0 ;
 117:     }
 118:  
 119:     for( iter=strMap.begin() ; iter!=strMap.end() ; ++iter )
 120:         cout<<iter->second<<endl ;
 121:  
 122:     delete []a ;
 123: }
 124:  
 125:  
 126: void run1007_1()
 127: {
 128:     ifstream in("in.txt");
 129:  
 130:     int n,m ;
 131:     string tmp ;
 132:     multimap< __int64 , string > strMap ;
 133:     multimap< __int64 , string >::iterator iter ;
 134:  
 135:     in>>n>>m ;
 136:  
 137:     char *a = new char[n+1] ;  //末尾'0'
 138:     
 139:     while( m-- && in>>a )  //不能用gets(a),不然会从第一行输入的末尾开始,其末尾为换行符,而gets不会读入换行符
 140:     {                       //所以gets读入的第一行字符为空字符
 141:         /*
 142:         //这样会导致strMap中所有的second都是最后一个读入的字符串
 143:         //因为作为second的a每次输入时都会被改变,其并没有复制
 144:         __int64 count = GetInversionCount(a,n) ;
 145:         strMap.insert( make_pair(count,a) ) ;
 146:         */
 147:         tmp = a ;
 148:         __int64 count = GetInversionCount(a,n) ;
 149:         strMap.insert( make_pair(count,tmp) ) ;
 150:  
 151:     }
 152:  
 153:     for( iter=strMap.begin() ; iter!=strMap.end() ; ++iter )
 154:         cout<<iter->second<<endl ;
 155:  
 156:     delete []a ;
 157: }

你可能感兴趣的:(sort)