题目描述:
Let X[1 .. n] and Y [1 .. n] be two arrays, each containing n numbers already in sorted order. Give an O(lg n)-time algorithm to find the median of all 2n elements in arrays X and Y.
题目是《算法导论》上的一道习题,不过已多次出现在面试题当中
算法导论给出的分析:(针对长度相等的两有序数组)
代码实现:
template <typename T>
T find_median(T seq1[],T seq2[],int n,int low,int high)
{
if(low>high)
return (T)-999999;
int k=(low+high)/2;
if(k==n-1 && seq1[n-1]<=seq2[0])
return seq1[n-1];
else if(k<n-1 && seq1[k]>=seq2[n-k-1] && seq1[k]<=seq2[n-k])
return seq1[k];
else if(seq1[k]>seq2[n-k])
return find_median(seq1,seq2,n,low,k-1);
else
return find_median(seq1,seq2,n,k+1,high);
}
template <typename T>
T two_array_median(T seq1[],T seq2[],int n)
{
T median=find_median(seq1,seq2,n,0,n-1);
if(median==(T)-999999)
return find_median(seq2,seq1,n,0,n-1);
else
return median;
}
问题扩展1:
求两有序数组(长度不一定相等)合并后的第i个元素(不一定是中位数)。
思路分析:
假设两个有序数组是A[1...n]和B[1...n],由于是寻找第i个元素,那么该元素只可能在A[1...i]与B[1...i]中,现在比较A[i/2]与B[i/2]
(1) A[i/2]==B[i/2],那么A[i/2](或者B[i/2])即要找的元素
(2) A[i/2]>B[i/2],那么第i个元素在A[1...i/2]和B[i/2...i]中,递归的在上述两个数组中查找第i/2个元素
(3) A[i/2]<B[i/2],那么第i个元素在A[i/2...i]和B[1...i/2]中,递归的在上述两个数组中查找第i/2个元素
很显然,上述算法时间上为O(logi)。
不过有个缺陷,i的取值得小于两数组的长度该算法才能得出正确的结果。
代码实现:
#include <iostream>
#include <string>
using namespace std;
template <typename T>
T median2(T* X, T* Y, int size)
{
int m = (size - 1) / 2;
if (X[m] == Y[m])
return X[m];
else if (X[m] > Y[m])
return size == 1 ? Y[m] : median2 (X, Y + size - m - 1, m + 1);
else
return size == 1 ? X[m] : median2 (X + size - m - 1, Y, m + 1);
}
int main(int argc,char* argv[])
{
int t1[]={1,2,3,3,4,4,5,6,9};
int t2[]={2,2,3,4,5,7,8};
cout<<"median 3 :"<<median2(t1,t2,7);
system("pause");
return 0;
}
问题扩展2:
求两长度不等的有序数组的中位数。
思路分析:
It is easy to find the median of each array in O(1) time.
Assume the median of array A is m and the median of array B is n.
Then,
1' If m=n, then clearly the median after merging is also m, the algorithm holds.
2' If m<n, then reserve the half of sequence A in which all numbers are greater than
m, also reserve the half of sequence B in which all numbers are smaller than n.
Run the algorithm on the two new arrays.
3' If m>n, then reserve the half of sequence A in which all numbers are smaller than
m, also reserve the half of sequence B in which all numbers are larger than n.
Run the algorithm on the two new arrays.
Time complexity: O(logn)
代码实现:
#include <iostream>
#include <string>
using namespace std;
template <typename T>
T find_median_in_two_sorted_arr(T seq1[],T seq2[],int len1,int len2)
{
int ma=0,na=len1-1;
int mb=0,nb=len2-1;
while(1)
{
int ka=(na+ma+1)/2;
int kb=(nb+mb+1)/2;
if(na<ma)
{
return seq2[kb];
}
if(nb<mb)
{
return seq1[ka];
}
if(seq1[ka]==seq2[kb])//find the value
{
return seq1[ka];
}
if((ma==na)&&((nb-mb+1)%2==0))//there is only one element at A[]
{
if((seq1[na]<seq2[kb])&&(seq1[na]>=seq2[kb-1]))
{
return seq1[na];
}
}
if((ma==na)&&((nb-mb+1)%2))
{
if((seq1[na]>seq2[kb])&&(seq1[na]<=seq2[kb+1]))
{
return seq1[na];
}
}
if((mb==nb)&&((na-ma+1)%2==0))//there is only one element at B[]
{
if((seq2[nb]<seq1[ka])&&(seq2[nb]>=seq1[ka-1]))
{
return seq2[nb];
}
}
if((mb==nb)&&((na-ma+1)%2))
{
if((seq2[nb]>seq1[ka])&&(seq2[nb]<=seq1[ka+1]))
{
return seq2[nb];
}
}
int offset=ka-ma>kb-mb?kb-mb:ka-ma;
if(offset==0)
offset++;
if(seq1[ka]<seq2[kb])
{
ma+=offset;
nb-=offset;
}
if(seq1[ka]>seq2[kb])
{
na-=offset;
mb+=offset;
}
}
}
int main(int argc,char* argv[])
{
int A[]={1,3,5,7,8,9,10};
int B[]={2,4,6,10,11,12,13,14,17,19,20};
int sizeA = sizeof(A)/sizeof(int);
int sizeB = sizeof(B)/sizeof(int);
cout<<"median : "<<find_median_in_two_sorted_arr(A,B,sizeA,sizeB);
system("pause");
return 0;
}