在前一篇文章的基础上,现在先看一下MPEG4 编码标准中应用的码率控制算法,总结起来,各大算法都是在解决两个问题:RD 率失真的优化以及避免缓冲区的上溢下溢。
MPEG-4 VM8 码率控制算法
在这里要先介绍一个非常著名的二项式率失真模型,其实说来也简单,在前面我们提到拉普拉斯分布信源的概率密度函数为:
代码分析
下面结合JM代码来看一下H.264的码率控制算法。
首先需要知道的是,在JM中默认使用的是基于JVT-G012r1的码率控制方法。
该提案中提出了自适应基本单元层码率控制方案,提出基本单元和线性模型的概念。其中,基本单元可为一帧、一片或一个宏块。而线性模型是用前一帧相同位置处的基本单元的MAD值来预测当前帧当前基本单元的MAD值,这样求MAD值就可以解决蛋鸡悖论。
解决过程如下:采用漏桶模型和线性跟踪理论,根据已经确定的帧率、当前的缓冲占有率、目标缓冲级别和可获取的信道带宽来算出当前帧的目标码率。剩余比特数则平均分配给当前帧中没有编码的基本单元,因为这些基本单元的MAD值还不知道。通过线性模型,可用前一帧相同位置处的基本单元的实际MAD值来预测出当前基本单元的MAD值。之后,用二次率失真模型来计算相应的QP值,从而用来对当前基本单元的每一宏块进行率失真优化。
这里首先设计到三个新的概念,基本单元、MAD线性预测模型以及用于计算缓冲区充盈度的流体传输模型。
Main-init_encoder-init_global_buffers-rc_allocate_memory (rc_quadratic.c)-rc_alloc_generic(ratectl.c)-rc_alloc_quadratic (rc_quadratic.c) 在初始化过程的代码中同时完成了基本单元数目的计算 int rcBufSize = p_Vid->FrameSizeInMbs / p_Inp->basicunit; 此外 Main-init_encoder-rc_init_sequence-rc_init_seq 其中同样包含有计算基本单元总数的代码 p_quad->TotalNumberofBasicUnit = p_Vid->FrameSizeInMbs/p_Inp->basicunit;
Main-init_encoder-rc_init_sequence-rc_init_seq 在该函数的代码中即可看到MAD预测模型参数的初始化 /* linear prediction model for P picture*/ p_quad->PMADPictureC1 = 1.0; p_quad->PMADPictureC2 = 0.0; main-encode_sequence-encode_one_frame-perform_encode_frame-rc_init_frame-updateQPRC0-predictCurrPicMAD-update current picture MAD p_quad->CurrentFrameMAD=p_quad->MADPictureC1*p_quad->BUPFMAD[p_quad->TotalNumberofBasicUnit-p_quad->NumberofBasicUnit]+p_quad->MADPictureC2; p_quad->TotalBUMAD=0; for(i=p_quad->TotalNumberofBasicUnit-1; i>=(p_quad->TotalNumberofBasicUnit-p_quad->NumberofBasicUnit);i--) { p_quad->CurrentBUMAD=p_quad->MADPictureC1*p_quad->BUPFMAD[i]+p_quad->MADPictureC2; p_quad->TotalBUMAD +=p_quad->CurrentBUMAD*p_quad->CurrentBUMAD; }
由此,该方案按如下步骤进行:
GOP层的码率控制
首先
main-encode_sequence-prepare_frame_params-rc_init_gop_params-rc_init_GOP /*compute the total number of bits for the current GOP*/ AllocatedBits = (int64) floor((1 + np + nb) * p_quad->bit_rate / p_quad->frame_rate + 0.5); p_gen->RemainingBits += AllocatedBits; p_quad->Np = np; p_quad->Nb = nb; main-encode_sequence-encode_one_frame-perform_encode_frame-rc_init_frame-rc_init_pict /* Since the available bandwidth may vary at any time, the total number of bits is updated picture by picture*/ if(prc->PrevBitRate!=prc->bit_rate) generic_RC->RemainingBits +=(int) floor((prc->bit_rate-prc->PrevBitRate)*(prc->Np + prc->Nb)/prc->frame_rate+0.5);其次,第一个GOP的初始QP为一个预先确定的值QP0。此时,这个GOP的I帧和第一个P帧以QP0编码。QP0是基于可获得的信道带宽和GOP长度而预先确定的。一般情况下,信道带宽高,选小QP0;信道带宽低,选大QP0。带宽一定时,GOP长度增加15,QP0减小1。
Main-init_encoder-rc_init_sequence-rc_init_seq 实际代码中对第一个GOP的初始QP计算如下 if (p_Inp->SeinitialQP==0) { /*compute the initial QP*/ //bpp即每像素比特数 bpp = 1.0*p_quad->bit_rate /(p_quad->frame_rate*p_Vid->size); if (p_Vid->width == 176) { L1 = 0.1; L2 = 0.3; L3 = 0.6; } else if (p_Vid->width == 352) { L1 = 0.2; L2 = 0.6; L3 = 1.2; } else { L1 = 0.6; L2 = 1.4; L3 = 2.4; } if (bpp<= L1) qp = 35; else if(bpp<=L2) qp = 25; else if(bpp<=L3) qp = 20; else qp = 10; p_Inp->SeinitialQP = qp; }
main-encode_sequence-prepare_frame_params-rc_init_gop_params-rc_init_GOP /*compute the average QP of P frames in the previous GOP*/ p_quad->PAverageQp=(int)(1.0 * p_quad->TotalQpforPPicture / p_quad->NumberofPPicture+0.5); GOPDquant=(int)((1.0*(np+nb+1)/15.0) + 0.5); if(GOPDquant>2) GOPDquant=2; p_quad->PAverageQp -= GOPDquant; if (p_quad->PAverageQp > (p_quad->QPLastPFrame - 2)) p_quad->PAverageQp--; // QP is constrained by QP of previous GOP p_quad->PAverageQp = iClip3(p_quad->QPLastGOP - 2, p_quad->QPLastGOP + 2, p_quad->PAverageQp); // Also clipped within range. p_quad->PAverageQp = iClip3(p_Vid->RCMinQP + p_quad->bitdepth_qp_scale, p_Vid->RCMaxQP + p_quad->bitdepth_qp_scale, p_quad->PAverageQp); p_quad->MyInitialQp = p_quad->PAverageQp; p_quad->Pm_Qp = p_quad->PAverageQp; p_quad->PAveFrameQP = p_quad->PAverageQp; p_quad->QPLastGOP = p_quad->MyInitialQp; p_quad->PrevLastQP = p_quad->CurrLastQP; p_quad->CurrLastQP = p_quad->MyInitialQp - 1;
if(p_Inp->NumberBFrames==1) { p_quad->m_Qc = imin(p_quad->PrevLastQP, p_quad->CurrLastQP) + 2; p_quad->m_Qc = imax(p_quad->m_Qc, imax(p_quad->PrevLastQP, p_quad->CurrLastQP)); p_quad->m_Qc = imax(p_quad->m_Qc, p_quad->CurrLastQP + 1); p_quad->m_Qc = iClip3(p_Vid->RCMinQP + p_quad->bitdepth_qp_scale, p_Vid->RCMaxQP + p_quad->bitdepth_qp_scale, p_quad->m_Qc); // Clipping } else { BFrameNumber = (p_quad->NumberofBFrames + 1) % p_Inp->NumberBFrames; if(BFrameNumber==0) BFrameNumber = p_Inp->NumberBFrames; if((p_quad->CurrLastQP-p_quad->PrevLastQP)<=(-2*p_Inp->NumberBFrames-3)) StepSize=-3; else if((p_quad->CurrLastQP-p_quad->PrevLastQP)==(-2*p_Inp->NumberBFrames-2)) StepSize=-2; else if((p_quad->CurrLastQP-p_quad->PrevLastQP)==(-2*p_Inp->NumberBFrames-1)) StepSize=-1; else if((p_quad->CurrLastQP-p_quad->PrevLastQP)==(-2*p_Inp->NumberBFrames)) StepSize=0; else if((p_quad->CurrLastQP-p_quad->PrevLastQP)==(-2*p_Inp->NumberBFrames+1)) StepSize=1; else StepSize=2; p_quad->m_Qc = p_quad->PrevLastQP + StepSize; p_quad->m_Qc += iClip3( -2 * (BFrameNumber - 1), 2*(BFrameNumber-1), (BFrameNumber-1)*(p_quad->CurrLastQP-p_quad->PrevLastQP)/(p_Inp->NumberBFrames-1)); p_quad->m_Qc = iClip3(p_Vid->RCMinQP + p_quad->bitdepth_qp_scale, p_Vid->RCMaxQP + p_quad->bitdepth_qp_scale, p_quad->m_Qc); // Clipping } return p_quad->m_Qc;
然后再看一下P帧的QP值的计算
P帧的QP值通过下面两步求出:
main-encode_sequence-encode_one_frame-perform_encode_frame-rc_init_frame-rc_init_pict /* predefine the target buffer level for each picture. basic unit layer rate control */ if(p_gen->NumberofGOP==1) { if(p_quad->NumberofPPicture==1) { p_quad->TargetBufferLevel = (double) p_gen->CurrentBufferFullness; p_quad->DeltaP = (p_gen->CurrentBufferFullness - p_quad->GOPTargetBufferLevel)/(p_quad->TotalPFrame - 1); p_quad->TargetBufferLevel -= p_quad->DeltaP; } else if(p_quad->NumberofPPicture>1) p_quad->TargetBufferLevel -= p_quad->DeltaP; } else if(p_gen->NumberofGOP>1) { if(p_quad->NumberofPPicture==0) { p_quad->TargetBufferLevel = (double) p_gen->CurrentBufferFullness; p_quad->DeltaP = (p_gen->CurrentBufferFullness - p_quad->GOPTargetBufferLevel) / p_quad->TotalPFrame; p_quad->TargetBufferLevel -= p_quad->DeltaP; } else if(p_quad->NumberofPPicture>0) p_quad->TargetBufferLevel -= p_quad->DeltaP; } …… if(p_quad->NumberofCodedPFrame==1) p_quad->AveWp = p_quad->Wp; if((p_quad->NumberofCodedPFrame<8)&&(p_quad->NumberofCodedPFrame>1)) p_quad->AveWp = (p_quad->AveWp + p_quad->Wp * (p_quad->NumberofCodedPFrame-1))/p_quad->NumberofCodedPFrame; else if(p_quad->NumberofCodedPFrame>1) p_quad->AveWp = (p_quad->Wp + 7 * p_quad->AveWp) / 8; …… // compute the average complexity of B frames if(p_Inp->NumberBFrames>0) { // compute the target buffer level p_quad->TargetBufferLevel += (p_quad->AveWp * (p_Inp->NumberBFrames + 1)*p_quad->bit_rate\ /(p_quad->frame_rate*(p_quad->AveWp+p_quad->AveWb*p_Inp->NumberBFrames))-p_quad->bit_rate/p_quad->frame_rate); } …… //compute the average weight if(p_gen->NumberofCodedBFrame<8) p_quad->AveWb = (p_quad->AveWb + p_quad->Wb*(p_gen->NumberofCodedBFrame-1)) / p_gen->NumberofCodedBFrame; else p_quad->AveWb = (p_quad->Wb + 7 * p_quad->AveWb) / 8;
Main-init_encoder-rc_init_sequence-rc_init_seq 实际代码中对γ和β的设置如下 /*control parameter*/ if(p_Inp->NumberBFrames>0) { p_quad->GAMMAP=0.25; p_quad->BETAP=0.9; } else { p_quad->GAMMAP=0.5; p_quad->BETAP=0.5; } main encode_sequence encode_one_frame perform_encode_frame rc_init_frame rc_init_pict p_quad->Target = (int) (floor( p_quad->Wp * p_gen->RemainingBits / (p_quad->Np * p_quad->Wp + p_quad->Nb * p_quad->Wb) + 0.5)); tmp_T = imax(0, (int) (floor(p_quad->bit_rate / p_quad->frame_rate - p_quad->GAMMAP * (p_gen->CurrentBufferFullness-p_quad->TargetBufferLevel) + 0.5))); p_quad->Target = (int) (floor(p_quad->BETAP * (p_quad->Target - tmp_T) + tmp_T + 0.5));
main-encode_sequence-encode_one_frame-perform_encode_frame-rc_init_frame-rc_init_pict-rc_updateQP /* predict the MAD of current picture*/ p_quad->CurrentFrameMAD = p_quad->MADPictureC1*p_quad->PreviousPictureMAD + p_quad->MADPictureC2; /*compute the number of bits for the texture*/ if(p_quad->Target < 0) { p_quad->m_Qc=m_Qp+MaxQpChange; p_quad->m_Qc = iClip3(p_Vid->RCMinQP + p_quad->bitdepth_qp_scale, p_Vid->RCMaxQP + p_quad->bitdepth_qp_scale, p_quad->m_Qc); // Clipping } else { m_Bits = p_quad->Target-m_Hp; m_Bits = imax(m_Bits, (int)(p_quad->bit_rate/(MINVALUE*p_quad->frame_rate))); updateModelQPFrame( p_quad, m_Bits ); dtmp = p_quad->CurrentFrameMAD * p_quad->m_X1 * p_quad->CurrentFrameMAD * p_quad->m_X1 + 4 * p_quad->m_X2 * p_quad->CurrentFrameMAD * m_Bits; if ((p_quad->m_X2 == 0.0) || (dtmp < 0) || ((sqrt (dtmp) - p_quad->m_X1 * p_quad->CurrentFrameMAD) <= 0.0)) // fall back 1st order mode m_Qstep = (float) (p_quad->m_X1 * p_quad->CurrentFrameMAD / (double) m_Bits); else // 2nd order mode m_Qstep = (float) ((2 * p_quad->m_X2 * p_quad->CurrentFrameMAD) / (sqrt (dtmp) - p_quad->m_X1 * p_quad->CurrentFrameMAD)); p_quad->m_Qc = Qstep2QP(m_Qstep, p_quad->bitdepth_qp_scale); p_quad->m_Qc = iClip3(p_Vid->RCMinQP + p_quad->bitdepth_qp_scale, p_Vid->RCMaxQP + p_quad->bitdepth_qp_scale, p_quad->m_Qc); // clipping p_quad->m_Qc = iClip3(m_Qp-MaxQpChange, m_Qp+MaxQpChange, p_quad->m_Qc); // control variation }
更新MAD线性预测模型的参数 main-encode_sequence-encode_one_frame-rc_update_pict_frame-rc_update_picture-rc_update_pict-updateRCModel-updateMADModel 更新二次R-D模型的参数 main-encode_sequence-encode_one_frame-rc_update_pict_frame-rc_update_picture-rc_update_pict-updateRCModel 更新缓冲充盈度 main encode_sequence encode_one_frame rc_update_pict_frame rc_update_picture rc_update_pict int delta_bits = (nbits - (int)floor(p_quad->bit_rate / p_quad->frame_rate + 0.5F) ); // remaining # of bits in GOP p_gen->RemainingBits -= nbits; p_gen->CurrentBufferFullness += delta_bits;