现实生活中常有找“最大”、“最小”及“中位数”等需求,解决这样的问题不用将整个序列排序。寻找“最大”、“最小”问题可以用“堆”来完成( 时间复杂度不会超过O(logn) )。对于寻找“中位数”问题,可以将其抽象为寻找序列n中第k小元素的问题。《编程珠玑》中完成寻找序列n第k小元素问题的“划分序列”思路跟“快速排序”中“划分序列”思想结构相同。完成寻找第k小元素问题分两个层次来完成,1划分序列得第m小元素;2选择序列得k小元素。
当以t值将整个序列划分完毕时,A[L…U]这个序列被划分为如下结构
此时整个序列被分为两部分,小于t的部分及大于等于t的部分。此时m所指向的元素就是整个序列中第(m – L + 1)小的元素。此时A[L…m-1] < A[m] <= A[M+1…U]。
当以t值将整个序列划分完毕时,将A[L…U]这个序列被划分为如下结构
此时整个序列被分为两部分,小于t的部分及大于等于t的部分。此时m所指向的元素就是整个序列中第(m – L + 1)小的元素。此时A[L…m-1] < A[m] <= A[M+1…U]。
从AWK代码表述的算法还原而来,AWK函数易于被AWK程序测试。
需要用到的两元素交换函数Swap()和返回[L,U]间的随机函数RandInt()代码如下
typedef int TYPE; //产生一个随机数m, l <= m < h int RandInt(int l, int h) { if( !(h - l) ) return l; return l + rand() % (h-l) ; } //交换TYPE类型数组中下标为i和j两个元素的值 void swap(TYPE *a, int i, int j) { TYPE t; t = a[i]; a[i] = a[j]; a[j] = t; }
int SedgewickRecursion(TYPE *a, int l, int u, int k) { int i, m, r; TYPE t; if(l <=u ){ //Both m and i point to the element which is next to the end one m = i = u + 1; r = RandInt(l, u + 1); swap(l, r); //Flag the first element t = a[l]; do{ //Go on when meet the element which is smaller than t //Until to the first element while(a[--i] < t); //Swap the element which is not smaller than t with --m swap(--m, i); }while(i != l); if(m < k - 1){ //The k samll element in a[m + 1, u] SedgewickRecursion(m + 1, u, k); }else if(m > k - 1){ //The k small element in a[l, m -1] SedgewickRecursion(l, m - 1, k); } } return a[m]; }
int SedgewickIteration(TYPE *a, int l, int u, int k) { int i, m, r; TYPE t; while(l <=u ){ //Both m and i point to the element which is next to the end one m = i = u + 1; r = RandInt(l, u + 1); swap(l, r); //Flag the first element t = a[l]; do{ //Go on when meet the element which is smaller than t //Until to the first element while(a[--i] < t); //Swap the element which is not smaller than t with --m swap(--m, i); }while(i != l); if(m < k - 1){ //The k samll element in a[m + 1, u] l = m + 1; }else if(m > k - 1){ //The k small element in a[l, m -1] u = m -1; }else { return a[m]; } } }
在Linux终端中新建一个文件用来编写AWK程序:vi TestAlgo.awk,建立好后保存此文件。根据AWK程序框架编写AWK程序。程序中定义的函数除了语法与C稍有不同之外,其实现过程跟C程序都是一样的。所以,有关于C语言程序的算法都可以以AWK程序格式定义到AWK程序中,算法经测试后再用C将算法表达出来。
命令行终端运行AWK代码格式为,
awk ‘awk-code’ file 或者 awk ‘awk-code’ lly7@debian:~/AWK$ awk '$1==1 {print $2}' awk.dat happy lly7@debian:~/AWK$ awk '$1==1 {print $2}' 1 happy happy |
运行AWK程序文件格式为
awk -f ‘awk-file’ file 或者 awk -f ‘awk-file’
如以下为TestAlgo.awk程序用文件作为输入时测试产生[L, U]整数的随机函数的界面, lly7@debian:~/AWK$ awk -f TestAlgo.awk awk.dat AWK Application Start 0.429662 0.3236170.026259 0.339192 0.793282 2 3 4 1 1 3 0 2 3 1 AWK Application END lly7@debian:~/AWK$ |
lly7@debian:~/AWK$ awk -f TestAlgo.awk AWK Application Start fill 5 0.585005 0.1711470.462567 0.370760 0.368752 randint 10 0 4 3 1 2 2 3 1 4 0 ^C lly7@debian:~/AWK$ |
分析Bob Sedgewick划分下实现的Hoare选择算法的时间复杂度。
所以被Hoare选择算法划分的元素的总数在[N, N(N+1)/2]之间。
.G1 L = 1; U = 100; K = int( (L + U) / 2 ); Y = (U - L + 1) * (L + U) / 2; MUL = Y; COUNT = 1; define RandInt { ($1) + int( ( ($2) - ($1) ) * rand() )} I = 134; frame invis ticks left in from 0 to 0 ticks bot in from 0 to 0 label bot "Hoare find the k small number" line from K,0 to K,Y for l from L to U by 1 do{ if L <= U then { Y = Y - int(MUL / 10); srand(I); I = I + 1; M = RandInt(L, U); COUNT = COUNT + U - L + 1; line from L, Y to U, Y bullet at M,Y; if K <= M then {U = M - 1}; if K >= M then {L = M + 1}; } } print COUNT .G2修改随机数I及序列U的值后,在Linux命令行终端运行此grap程序:grap k_small.g | pic | groff > k_small.
不同随机数种子下寻找[1,10]中位数的过程
不同随机数种子寻找[1, 100]中位数的过程
int LomutoRecursion(TYPE *a, int l, int u, int k) { int i, m, r; TYPE t; if(l <=u ){ m = l; //Rand number r is between l and u + 1 r = RandInt(l, u + 1); //Save the firt element in t t = a[l]; //Lomuto Divide for(i = l + 1; i <= u; ++i){ if(a[i] < t) swap(++m, i); } //Exchange the t and a[m] swap(l, m); if(m < k - 1){ //The k samll element in a[m + 1, u] LomutoRecursion(m + 1, u, k); }else if(m > k - 1){ //The k small element in a[l, m -1] LomutoRecursion(l, m - 1, k); } } return a[m]; }
int LomutoIteration(TYPE *a, int l, int u, int k) { int i, m, r; TYPE t; while( l <= u){ m = l; //Rand number r is between l and u + 1 r = RandInt(l, u + 1); //Save first element in t t = a[l]; ////Lomuto Divide for(i = l + 1; i <= u; ++i){ if(a[i] < t) swap(++m, i); } //Exchange the t and a[m] swap(l, m); if(m < k - 1){ //The k samll element in a[m + 1, u] l = m + 1; }else if(m > k - 1){ //The k small element in a[l, m -1] u = m -1; }else { return a[m]; } } }
BEGIN{ print "AWK Application Start" MULTI = 10000 } function RandInt(l, h) { if( !(h - l) ) return l; return l + int(rand() * MULTI) % (h-l) ; } function swap(i, j, t) { t = a[i]; a[i] = a[j]; a[j] = t; } function SedgewickRecursion(l, u, k, i, m, t, r) { if(l <=u ){ #Both m and i point to the element which is next to the end one m = i = u + 1; r = RandInt(l, u + 1); print "r: " r swap(l, r); #Flag the first element t = a[l]; do{ #Go on when meet the element which is smaller than t #Until to the first element while(a[--i] < t); #Swap the element which is not smaller than t with --m swap(--m, i); }while(i != l); print "m: " m if(m < k - 1){ #The k samll element in a[m + 1, u] SedgewickRecursion(m + 1, u, k); }else if(m > k - 1){ #The k small element in a[l, m -1] SedgewickRecursion(l, m - 1, k); } } } function SedgewickIteration(l, u, k, i, m, t, r) { while(l <=u ){ #Both m and i point to the element which is next to the end one m = i = u + 1; r = RandInt(l, u + 1); print "r: " r swap(l, r); #Flag the first element t = a[l]; do{ #Go on when meet the element which is smaller than t #Until to the first element while(a[--i] < t); #Swap the element which is not smaller than t with --m swap(--m, i); }while(i != l); print "m: " m if(m < k - 1){ #The k samll element in a[m + 1, u] l = m + 1; }else if(m > k - 1){ #The k small element in a[l, m -1] u = m -1; }else { return a[m]; } } } function LomutoRecursion(l, u, k, i, m, t, r) { if(l <=u ){ m = l; r = RandInt(l, u + 1); print "r: " r t = a[l]; for(i = l + 1; i <= u; ++i){ if(a[i] < t) swap(++m, i); } #Exchange the t and a[m] swap(l, m); print "m: " m if(m < k - 1){ #The k samll element in a[m + 1, u] LomutoRecursion(m + 1, u, k); }else if(m > k - 1){ #The k small element in a[l, m -1] LomutoRecursion(l, m - 1, k); } } } function LomutoIteration(l, u, k, i, m, t, r) { while( l <= u){ m = l; r = RandInt(l, u + 1); print "r: " r t = a[l]; for(i = l + 1; i <= u; ++i){ if(a[i] < t) swap(++m, i); } #Exchange the t and a[m] swap(l, m); print "m: " m if(m < k - 1){ #The k samll element in a[m + 1, u] l = m + 1; }else if(m > k - 1){ #The k small element in a[l, m -1] u = m -1; }else { return a[m]; } } } $1=="fill" { n = $2; for(j = 0; j < n; ++j) a[j] = rand(); printf("fill: "); for(j = 0; j < n; ++j) printf("%f ", a[j]); print "\n" } $1=="randint"{ for(j = 0; j < $2; ++j) printf("%d ",RandInt(0, n)); print " " } $1=="swap" { r1 = RandInt(0, n);r2 = RandInt(0, n); printf("r1: %d, r2: %d\n", r1, r2); swap(r1, r2); for(j = 0; j < n; ++j) printf("%f ", a[j]); print "\n" } $1=="m" { a[0]=$2;a[1]=$3; } $1=="sdrec" { SedgewickRecursion(0, n - 1, 2); printf("sdrec: "); for(j = 0; j < n; ++j) printf("%f ", a[j]); print " " } $1=="sdite" { SedgewickIteration(0, n - 1, 5); printf("sdite: "); for(j = 0; j < n; ++j) printf("%f ", a[j]); print " " } $1=="ltrec" { LomutoRecursion(0, n - 1, 1); printf("ltrec: "); for(j = 0; j < n; ++j) printf("%f ", a[j]); print " " } $1=="ltite" { LomutoIteration(0, n - 1, 3); printf("ltite: "); for(j = 0; j < n; ++j) printf("%f ", a[j]); print " " } END{ print "AWK Application END" }