图形图像处理-之-误差扩散 中篇
[email protected] 2008.04.22
(2010.01.05 文章由2篇变成3篇,对误差扩散的速度和质量作进一步探讨!
代码也有一些更新,容纳到我的图像处理建议框架内,并提供源代码下载!
测试环境也有了变动;由AMD64x2 4200+(2.37G)DDR2 677(双通道) 升级为i7-920 DDR3 1333(三通道) )
(2008.12.01 修正一处bug,颜色误差多累加了一次; 该错误由QueQuan发现,表示感谢! )
tag: 误差扩散,真彩色到高彩色转换,色阶,减色,半色调
摘要: 在图像的颜色转换过程中,由于颜色值域的不同,转换过程中可能会产生误差;
误差扩散算法通过将误差传递到周围像素而减轻其造成的视觉误差。
上篇:简单实现; 中篇:简单的速度优化; 下篇: 更快的速度或更好的效果.
(测试源代码下载: https://github.com/sisong/demoForHssBlog )
正文:
代码使用C++,编译器:VC2005
测试平台:(CPU:i7-920(3.44G); 内存:DDR3 1333(三通道); 编译器:VC2005)
(请先参看文章的上篇)
E: 将误差扩散的浮点实现改写为一个整数的定点数实现:
struct TErrorColor{
long dR;
long dG;
long dB;
};
inline long getBestRGB16_555Color( const long wantColor){
const long rMax = ( 1 << 5 ) - 1 ;
if (wantColor <= 0 )
return 0 ;
else if (wantColor >= (rMax << 3 ))
return rMax;
else
return wantColor >> 3 ;
}
inline long getC8Color( const long rColor){
return rColor*(255*(1<<16)/((1<<5)-1)) >>16; // rColor*255/((1<<5)-1);
}
void CvsPic32To16_ErrorDiffuse_Line_1(UInt16 * pDst, const Color32 * pSrc, long width,TErrorColor * PHLineErr){
TErrorColor HErr;
HErr.dR = 0 ; HErr.dG = 0 ; HErr.dB = 0 ;
PHLineErr[ - 1 ].dB = 0 ; PHLineErr[ - 1 ].dG = 0 ; PHLineErr[ - 1 ].dR = 0 ;
for ( long x = 0 ;x < width; ++ x)
{
long cB = (pSrc[x].b + HErr.dB*2 + PHLineErr[x].dB );
long cG = (pSrc[x].g + HErr.dG*2 + PHLineErr[x].dG );
long cR = (pSrc[x].r + HErr.dR*2 + PHLineErr[x].dR );
long rB = getBestRGB16_555Color(cB);
long rG = getBestRGB16_555Color(cG);
long rR = getBestRGB16_555Color(cR);
pDst[x] = rB | (rG << 5 ) | (rR << 10 );
HErr.dB = (cB - getC8Color(rB)) >> 2 ;
HErr.dG = (cG - getC8Color(rG)) >> 2 ;
HErr.dR = (cR - getC8Color(rR)) >> 2 ;
PHLineErr[x - 1 ].dB += HErr.dB;
PHLineErr[x - 1 ].dG += HErr.dG;
PHLineErr[x - 1 ].dR += HErr.dR;
PHLineErr[x] = HErr;
}
}
void CvsPic32To16_ErrorDiffuse_1( const TPicRegion_RGB16_555 & dst, const TPixels32Ref & src){
UInt16 * pDst = (UInt16 * )dst.pdata;
const Color32 * pSrc = src.pdata;
const long width = src.width;
TErrorColor * _HLineErr = new TErrorColor[width + 2 ];
for ( long x = 0 ;x < width + 2 ; ++ x){
_HLineErr[x].dR = 0 ;
_HLineErr[x].dG = 0 ;
_HLineErr[x].dB = 0 ;
}
TErrorColor * HLineErr =& _HLineErr[ 1 ];
for ( long y = 0 ;y < src.height; ++ y){
CvsPic32To16_ErrorDiffuse_Line_1(pDst,pSrc,width,HLineErr);
(UInt8 *& )pDst += dst.byte_width;
(UInt8 *& )pSrc += src.byte_width;
}
delete[]_HLineErr;
}
速度测试:
//////////////////////////////////////////////////////////////
//CvsPic32To16_ErrorDiffuse_1 283.77 FPS
//////////////////////////////////////////////////////////////
F:继续优化
getBestRGB16_555Color函数有条件分支,建立一个查找表来代替,完整的实现如下:
static UInt8 _BestRGB16_555Color_Table[ 256 * 5 ];
const UInt8 * BestRGB16_555Color_Table =& _BestRGB16_555Color_Table[ 256 * 2 ];
struct _TAutoInit_BestRGB16_555Color_Table{
_TAutoInit_BestRGB16_555Color_Table(){
for ( long i = 0 ;i < 256 * 5 ; ++ i){
_BestRGB16_555Color_Table[i] = getBestRGB16_555Color(i - 256 * 2 );
}
}
};
static _TAutoInit_BestRGB16_555Color_Table _AutoInit_BestRGB16_555Color_Table;
//实际代码中建议预先生成_BestRGB16_555Color_Table的数据,从而避免初始化顺序依赖的问题
void CvsPic32To16_ErrorDiffuse_Line_2(UInt16 * pDst, const Color32 * pSrc, long width,TErrorColor * PHLineErr){
TErrorColor HErr;
HErr.dR = 0 ; HErr.dG = 0 ; HErr.dB = 0 ;
PHLineErr[ - 1 ].dB = 0 ; PHLineErr[ - 1 ].dG = 0 ; PHLineErr[ - 1 ].dR = 0 ;
for ( long x = 0 ;x < width; ++ x)
{
long cB = (pSrc[x].b + HErr.dB*2 + PHLineErr[x].dB );
long cG = (pSrc[x].g + HErr.dG*2 + PHLineErr[x].dG );
long cR = (pSrc[x].r + HErr.dR*2 + PHLineErr[x].dR );
long rB = BestRGB16_555Color_Table[cB];
long rG = BestRGB16_555Color_Table[cG];
long rR = BestRGB16_555Color_Table[cR];
pDst[x] = rB | (rG << 5 ) | (rR << 10 );
//做乘法比较慢的cpu体系下可以尝试把getC8Color也做成一个数组表
HErr.dB = (cB - getC8Color(rB)) >> 2 ;
HErr.dG = (cG - getC8Color(rG)) >> 2 ;
HErr.dR = (cR - getC8Color(rR)) >> 2 ;
PHLineErr[x - 1 ].dB += HErr.dB;
PHLineErr[x - 1 ].dG += HErr.dG;
PHLineErr[x - 1 ].dR += HErr.dR;
PHLineErr[x] = HErr;
}
}
void CvsPic32To16_ErrorDiffuse_2( const TPicRegion_RGB16_555 & dst, const TPixels32Ref & src){
UInt16 * pDst = (UInt16 * )dst.pdata;
const Color32 * pSrc = src.pdata;
const long width = src.width;
TErrorColor * _HLineErr = new TErrorColor[width + 2 ];
for ( long x = 0 ;x < width + 2 ; ++ x){
_HLineErr[x].dR = 0 ;
_HLineErr[x].dG = 0 ;
_HLineErr[x].dB = 0 ;
}
TErrorColor * HLineErr =& _HLineErr[ 1 ];
for ( long y = 0 ;y < src.height; ++ y){
CvsPic32To16_ErrorDiffuse_Line_2(pDst,pSrc,width,HLineErr);
(UInt8 *& )pDst += dst.byte_width;
(UInt8 *& )pSrc += src.byte_width;
}
delete[]_HLineErr;
}
速度测试:
//////////////////////////////////////////////////////////////
//CvsPic32To16_ErrorDiffuse_2 316.62 FPS
//////////////////////////////////////////////////////////////
函数效果:
(文章的 下篇 将讨论更快的速度或者更好的效果)