阅读提示:
《Delphi图像处理》系列以效率为侧重点,一般代码为PASCAL,核心代码采用BASM。
《C++图像处理》系列以代码清晰,可读性为主,全部使用C++代码。
尽可能保持二者内容一致,可相互对照。
本文代码必须包括文章《Delphi图像处理 -- 数据类型及公用过程》中的ImageData.pas单元。
本文在《GDI+ ColorMatrix的完全揭秘》的ColorMatrix原理揭秘的基础上,用Delphi代码来完整实现GDI+的ColorMatrix功能。
GDI+中设置ColorMatrix时有2个枚举选项,在实际运用中极少使用,所以代码中以GDI+设置ColorMatrix的缺省方式实现。
先给一个简易的浮点版本,因为该过程没有考虑子图处理,所以称之为简易版本,主要方便阅读者理解ColorMatrix实现原理:
procedure SetColorMatrixF(Data: TImageData; Matrix: TColorMatrix); var I, J, Count: Integer; P: PRGBQuad; MainValue: Boolean; v: Integer; procedure SetPixel; var Pixel: array[0..3] of Byte; I, J: Integer; ps: PByteArray; begin ps := Pointer(P); // 注意:为使矩阵与ARGB排列顺序一致,以下运算中调整了行列的顺序 for I := 0 to 3 do begin if I < 3 then J := 2 - I else J := I; // 如果只存在主对角线数据,只处理颜色缩放 if MainValue then Pixel[J] := Round(Matrix[I, I] * ps[J]) // 否则,处理所有颜色变换 else Pixel[J] := Max(0, Min(255, Round(Matrix[0, I] * ps[2] + Matrix[1, I] * ps[1] + Matrix[2, I] * ps[0] + Matrix[3, I] * ps[3] + Matrix[4, I] * 255))); end; for I := 0 to 3 do ps[I] := Pixel[I]; end; begin // 处理矩阵中大与255的值(取模),并判断主对角线外是否存在数据 MainValue := True; for I := 0 to 4 do for J := 0 to 4 do begin v := Round(Matrix[I, J]) div 256; if v > 0 then Matrix[I, J] := Matrix[I, J] - 256.0 * v; if (I <> J) and (Matrix[I, J] <> 0) then MainValue := False; end; Count := Data.Width * Data.Height; P := Data.Scan0; for I := 1 to Count do begin SetPixel; Inc(P); end; end;
因代码已经有了注释,而实现原理、公式已经在《GDI+ ColorMatrix的完全揭秘》中进行了详尽的介绍,所以本文不再累述。
该过程代码的特点是简单易读,缺点是效率较低,在我的P4 2.8G计算机上,处理一张千万像素的照片,耗时为1000ms左右(不包括GDI+图像格式转换耗时。千万像素的24位格式图像转换为32位格式,耗时就达650ms)。
下面是一个MMX BASM代码的整数ColorMatrix实现过程:
过程定义: // 设置图像颜色矩阵。参数: // Dest输出图,Source原图,Data自身操作图像 // Matrix颜色矩阵 procedure ImageSetColorMatrix(var Data: TImageData; Matrix: TColorMatrix); overload; {$IF RTLVersion >= 17.00}inline;{$IFEND} procedure ImageSetColorMatrix(var Dest: TImageData; const Source: TImageData; Matrix: TColorMatrix); overload; 实现代码: type PARGBQuadW = ^TARGBQuadW; TARGBQuadW = packed record wBlue: Word; wGreen: Word; wRed: Word; wAlpha: Word; end; procedure ImageSetColorMatrix(var Dest: TImageData; const Source: TImageData; Matrix: TColorMatrix); asm push esi push edi push ebx mov ebx, eax mov edi, ecx // edi = matrix mov esi, 4 // for (i = 4; i >= 0; i --) fldz // { @@iLoop: mov ecx, 4 // for (j = 4; j >= 0; j --) @@jLoop: // { cmp ecx, esi je @@1 mov eax, esi imul eax, 5 add eax, ecx fcom dword ptr [edi+eax*4] fstsw ax sahf je @@1 fstp st(0) // if (i != j && matrix[i, j] != 0) jmp @@TransformAll // goto TransformAll @@1: dec ecx jns @@jLoop // } dec esi jns @@iLoop // } fstp st(0) fwait // 处理颜色缩放(主对角线的数据) sub esp, 8+2 mov dword ptr [esp], 256 fild dword ptr [esp] fld st(0) fmul dword ptr [edi+(2*5+2)*4] fistp dword ptr [esp] // matrixI[0, 0] = matrix[2, 2] * 256 fld st(0) fmul dword ptr [edi+(1*5+1)*4] fistp dword ptr [esp+2] // matrixI[0, 1] = matrix[1, 1] * 256 fld st(0) fmul dword ptr [edi+(0*5+0)*4] fistp dword ptr [esp+4] // matrixI[0, 2] = matrix[0, 0] * 256 fmul dword ptr [edi+(3*5+3)*4] fistp dword ptr [esp+6] // matrixI[0, 3] = matrix[3, 3] * 256 mov eax, ebx call _SetCopyRegs pxor mm7, mm7 movq mm1, [esp] // mm1 = m44 m11 m22 m33 @@yLoop: push ecx @@xLoop: movd mm0, [esi] punpcklbw mm0, mm7 // mm0 = 00 A 00 R 00 G 00 B pmullw mm0, mm1 // mm0 = A*m44 R*m11 G*m22 B*m33 psrlw mm0, 8 // mm0 = A*m44/256 R*m11/256 G*m22/256 B*m33/256 packuswb mm0, mm0 // mm0 = 00 00 00 00 An Rn Gn Bn movd [edi], mm0 add esi, 4 add edi, 4 loop @@xLoop add esi, eax add edi, ebx pop ecx dec edx jnz @@yLoop add esp, 8+2 jmp @@end // 处理全部颜色变换 @@TransformAll: sub esp, 5*8+2 // 浮点颜色矩阵行列交换转换为128倍整数 mov dword ptr [esp], 128 fild dword ptr [esp] mov esi, esp // esi = matrixI mov eax, edi mov ecx, 4 // for (i = 0; i < 4; i ++) @@cvtLoop: // { fld st(0) fmul dword ptr [edi] fistp dword ptr [esi] // matrixI[i, 0] = matrix[0, i] * 128 fld st(0) fmul dword ptr [edi+1*5*4] fistp dword ptr [esi+2] // matrixI[i, 1] = matrix[1, i] * 128 fld st(0) fmul dword ptr [edi+2*5*4] fistp dword ptr [esi+4] // matrixI[i, 2] = matrix[2, i] * 128 fld st(0) fmul dword ptr [edi+3*5*4] fistp dword ptr [esi+6] // matrixI[i, 3] = matrix[3, i] * 128 add esi, 8 add edi, 4 loop @@cvtLoop // } fstp st(0) add eax, 4*5*4 // 浮点数平移量转换为255倍整数 mov dword ptr [esi], 255 fild dword ptr [esi] mov ecx, 4 // for (j = 0; j < 4; j ++) @@tLoop: fld st(0) fmul dword ptr [eax] fistp dword ptr [esi] // matrixI[4, j] = matrix[4, j] * 255 add esi, 2 add eax, 4 loop @@tLoop fstp st(0) mov esi, esp // 红蓝(0、2列)交换 mov ecx, 5 // for (i = 0; i < 5; i ++) @@swapLoop: // matrixI[i, 0] <--> matrixI[i, 2] mov ax, [esi].TARGBQuadW.wBlue xchg ax, [esi].TARGBQuadW.wRed mov [esi].TARGBQuadW.wBlue, ax add esi, 8 loop @@swapLoop mov eax, ebx call _SetCopyRegs pxor mm7, mm7 pcmpeqb mm4, mm4 // mm4 = FF FF FF FF FF FF FF FF psrlw mm4, 15 // mm4 = 00 01 00 01 00 01 00 01 @@yLoopA: push ecx @@xLoopA: movd mm0, [esi] punpcklbw mm0, mm7 // mm0 = 00 A 00 R 00 G 00 B movq mm1, mm0 movq mm2, mm0 movq mm3, mm0 // esp+4: ecx push stack pmaddwd mm0, [esp+16+4] // mm0 = A*m43+R*m13 G*m23+B*m33 蓝色行 pmaddwd mm1, [esp+8+4] // mm1 = A*m42+R*m12 G*m22+B*m32 绿色行 pmaddwd mm2, [esp+4] // mm2 = A*m41+R*m11 G*m21+B*m31 红色行 pmaddwd mm3, [esp+24+4] // mm3 = A*m44+R*m14 G*m24+B*m34 Alpha行 psrad mm0, 7 // mm0 = A*m43+R*m13/128 G*m23+B*m33/128 psrad mm1, 7 // mm1 = A*m42+R*m12/128 G*m22+B*m32/128 psrad mm2, 7 // mm2 = A*m41+R*m11/128 G*m21+B*m31/128 psrad mm3, 7 // mm3 = A*m44+R*m14/128 G*m24+B*m34/128 packssdw mm0, mm1 // mm0 = Ag+Rg Gg+Bg Ab+Rb Gb+Bb packssdw mm2, mm3 // mm2 = Aa+Ra Ga+Ba Ar+Rr Gr+Br pmaddwd mm0, mm4 // mm0 = Ag+Rg+Gg+Bg=Gn Ab+Rb+Gb+Bb=Bn pmaddwd mm2, mm4 // mm2 = Aa+Ra+Ga+Ba=An Ar+Rr+Gr+Br=Rn packssdw mm0, mm2 // mm0 = 00 An 00 Rn 00 Gn 00 Bn paddw mm0, [esp+32+4] // mm0 = An+At Rn+Rt Gn+Gt Bn+Bt 平移行 packuswb mm0, mm0 // mm0 = 00 00 00 00 An Rn Gn Bn movd [edi], mm0 add esi, 4 add edi, 4 loop @@xLoopA add esi, eax add edi, ebx pop ecx dec edx jnz @@yLoopA add esp, 5*8+2 @@end: emms @@Exit: pop ebx pop edi pop esi end; procedure ImageSetColorMatrix(var Data: TImageData; Matrix: TColorMatrix); begin ImageSetColorMatrix(Data, Data, Matrix); end;
该过程中作了更详细的注释,其特点是处理速度较快。在我的机器上,不包括图像格式转换耗时,处理千万像素图片主对角线数据耗时不到50ms,而处理全部变换耗时350-400ms。
下面是一个测试程序代码。该测试代码界面与《GDI+ for VCL基础 -- 颜色调整矩阵ColorMatrix详解》是一样的。有兴趣的朋友可以同里面的测试代码作一下比较。
unit main2; interface uses Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms, Dialogs, StdCtrls, Buttons, Grids, ExtCtrls, Gdiplus, ImageData; type TForm1 = class(TForm) Label1: TLabel; PaintBox1: TPaintBox; SpeedButton1: TSpeedButton; SpeedButton2: TSpeedButton; SpeedButton3: TSpeedButton; SpeedButton4: TSpeedButton; StringGrid1: TStringGrid; BitBtn1: TBitBtn; BitBtn3: TBitBtn; BitBtn2: TBitBtn; procedure FormCreate(Sender: TObject); procedure FormDestroy(Sender: TObject); procedure StringGrid1DrawCell(Sender: TObject; ACol, ARow: Integer; Rect: TRect; State: TGridDrawState); procedure StringGrid1GetEditText(Sender: TObject; ACol, ARow: Integer; var Value: string); procedure PaintBox1Paint(Sender: TObject); procedure BitBtn1Click(Sender: TObject); procedure BitBtn2Click(Sender: TObject); procedure BitBtn3Click(Sender: TObject); procedure SpeedButton2Click(Sender: TObject); procedure SpeedButton3Click(Sender: TObject); procedure SpeedButton1Click(Sender: TObject); procedure SpeedButton4Click(Sender: TObject); procedure StringGrid1SetEditText(Sender: TObject; ACol, ARow: Integer; const Value: string); private { Private declarations } Source: TGpBitmap; Dest: TGpBitmap; SrcData: TImageData; DstData: TImageData; Matrix: TColorMatrix; function CheckFloatStr(Str: string): Double; procedure InitColorMatrix; public { Public declarations } end; var Form1: TForm1; implementation {$R *.dfm} procedure TForm1.BitBtn1Click(Sender: TObject); begin ImageSetColorMatrix(DstData, SrcData, Matrix); PaintBox1.Invalidate; with StringGrid1 do begin Cells[Col, Row] := FloatToStr(Matrix[Row, Col]); Invalidate; SetFocus; end; end; procedure TForm1.BitBtn2Click(Sender: TObject); begin InitColorMatrix; BitBtn1.Click; end; procedure TForm1.BitBtn3Click(Sender: TObject); begin Close; end; function TForm1.CheckFloatStr(Str: string): Double; var i, len: Integer; dec, neg: Boolean; s: string; begin Result := 0; len := Length(Str); if len = 0 then Exit; dec := False; neg := False; i := 1; s := ''; if (Str[i] = '-') or (Str[i] = '+') then begin if Str[i] = '-' then neg := True; Inc(i); end; while (i <= len) do begin if Str[i] = '.' then begin if dec then Break; dec := True; end else if (Str[i] < '0') or (Str[i] > '9') then Break; s := s + Str[i]; Inc(i); end; if Length(s) > 0 then begin if neg then s := '-' + s; Result := StrToFloat(s); end; end; procedure TForm1.FormCreate(Sender: TObject); var Bmp: TGpBitmap; Data: TBitmapData; R: TGpRect; begin // 从文件装入图像到Bmp Bmp := TGpBitmap.Create('..\..\media\100_0349.jpg'); R := GpRect(0, 0, Bmp.Width, Bmp.Height); // 分别建立新的源和目标图像数据到SrcData和DstData SrcData := NewImageData(R.Width, R.Height); DstData := NewImageData(R.Width, R.Height); // 将Bmp图像数据分别锁定拷贝到SrcData和DstData Data := TBitmapData(SrcData); Data := Bmp.LockBits(R, [imRead, imWrite, imUserInputBuf], pf32bppARGB); Bmp.UnlockBits(Data); Data.Scan0 := DstData.Scan0; Data := Bmp.LockBits(R, [imRead, imWrite, imUserInputBuf], pf32bppARGB); Bmp.UnlockBits(Data); Bmp.Free; // 分别用图像数据SrcData和DstData建立位图Source和Dest // 注:图像数据结构用于数据处理,位图用于显示,这样即可绑定数据结构和位图, // 又能避免每次处理图像数据时的锁定和解锁操作 Source := TGpBitmap.Create(SrcData.Width, SrcData.Height, SrcData.Stride, pf32bppARGB, SrcData.Scan0); Dest := TGpBitmap.Create(DstData.Width, DstData.Height, DstData.Stride, pf32bppARGB, DstData.Scan0); InitColorMatrix; end; procedure TForm1.FormDestroy(Sender: TObject); begin Dest.Free; Source.Free; FreeImageData(DstData); FreeImageData(SrcData); end; procedure TForm1.InitColorMatrix; var i, j: Integer; begin for i := 0 to 4 do begin for j := 0 to 4 do if i = j then Matrix[i, j] := 1 else Matrix[i, j] := 0; end; end; procedure TForm1.PaintBox1Paint(Sender: TObject); var g: TGpGraphics; begin g := TGpGraphics.Create(PaintBox1.Canvas.Handle); try g.DrawImage(Source, 10, 10); g.DrawImage(Dest, SrcData.Width + 20, 10); finally g.Free; end; end; procedure TForm1.SpeedButton1Click(Sender: TObject); var i: Integer; begin InitColorMatrix; for i := 0 to 2 do begin Matrix[0, i] := 0.30; Matrix[1, i] := 0.59; Matrix[2, i] := 0.11; end; BitBtn1.Click; end; procedure TForm1.SpeedButton2Click(Sender: TObject); var i: Integer; begin InitColorMatrix; for i := 0 to 2 do Matrix[4, i] := 0.10; BitBtn1.Click; end; procedure TForm1.SpeedButton3Click(Sender: TObject); begin InitColorMatrix; Matrix[0, 0] := -1; Matrix[1, 1] := -1; Matrix[2, 2] := -1; BitBtn1.Click; end; procedure TForm1.SpeedButton4Click(Sender: TObject); begin InitColorMatrix; Matrix[3, 3] := 0.5; BitBtn1.Click; end; procedure TForm1.StringGrid1DrawCell(Sender: TObject; ACol, ARow: Integer; Rect: TRect; State: TGridDrawState); var Text: string; begin Text := Format('%.2f', [Matrix[ARow, ACol]]); StringGrid1.Canvas.FillRect(Rect); StringGrid1.Canvas.Pen.Color := clBtnShadow; StringGrid1.Canvas.Rectangle(Rect); InflateRect(Rect, -2, -2); DrawText(StringGrid1.Canvas.Handle, PChar(text), Length(text), &Rect, DT_RIGHT); end; procedure TForm1.StringGrid1GetEditText(Sender: TObject; ACol, ARow: Integer; var Value: string); begin Value := Format('%.2f', [Matrix[ARow, ACol]]); end; procedure TForm1.StringGrid1SetEditText(Sender: TObject; ACol, ARow: Integer; const Value: string); begin Matrix[ARow, ACol] := CheckFloatStr(Value); end; end.
下面是运行效果图:
procedure ImageSetColorMatrix(var Dest: TImageData;
const Source: TImageData; Matrix: TColorMatrix); overload;
asm
push ebp
push esi
push edi
push ebx
// ebp为16字节对齐的128位栈内存地址
sub esp, 32
mov ebp, esp
add ebp, 16
and ebp, -16
// 检查颜色矩阵除主对角线和虚拟列外的数据项,如不等于零,执行全部颜色变换
mov edi, ecx
mov esi, 4 // for (i = 4; i >= 0; i --)
@@iLoop: // {
mov ecx, 3 // for (j = 3; j >= 0; j --)
@@jLoop: // {
cmp ecx, esi // if (i == j) continue
je @@1
lea ebx, [esi+esi*4]
add ebx, ecx // index = i * 5 + j
cmp dword ptr[edi+ebx*4], 0
jne @@Transform // if (Matrix[Index]) goto @@Transform
@@1:
dec ecx
jns @@jLoop // }
dec esi
jns @@iLoop // }
// 处理颜色缩放
mov ebx, [edi+(2*5+2)*4]
mov ecx, [edi+(1*5+1)*4]
mov [ebp], ebx
mov [ebp+4], ecx
mov ebx, [edi+(0*5+0)*4]
mov ecx, [edi+(3*5+3)*4]
mov [ebp+8], ebx
mov [ebp+12], ecx
movaps xmm1, [ebp] // xmm1 = m44 m11 m22 m33
pxor xmm7, xmm7
call _SetCopyRegs
@@yLoop_Scale:
push ecx
@@xLoop_Scale:
movd xmm0, [esi]
punpcklbw xmm0, xmm7
punpcklwd xmm0, xmm7
cvtdq2ps xmm0, xmm0
mulps xmm0, xmm1 // xmm0 = [A R G B] * [m44 m11 m22 m33]
cvtps2dq xmm0, xmm0
packssdw xmm0, xmm7
packuswb xmm0, xmm7
movd [edi], xmm0
add esi, 4
add edi, 4
loop @@xLoop_Scale
pop ecx
add esi, eax
add edi, ebx
dec edx
jnz @@yLoop_Scale
jmp @@Exit
// 处理全部的颜色变换
@@Transform:
// 颜色矩阵按行分别装入sse寄存器,不包括虚拟位列
movups xmm1, [edi+0*5*4]
movups xmm2, [edi+1*5*4]
movups xmm3, [edi+2*5*4]
movups xmm4, [edi+3*5*4]
movups xmm5, [edi+4*5*4]
// 平移行乘上255
mov ebx, 255
cvtsi2ss xmm6, ebx
pshufd xmm6, xmm6, 0
mulps xmm5, xmm6
// 交换每行的红与蓝位置
pshufd xmm1, xmm1, 11000110b
pshufd xmm2, xmm2, 11000110b
pshufd xmm3, xmm3, 11000110b
pshufd xmm4, xmm4, 11000110b
pshufd xmm5, xmm5, 11000110b
// 平移行保存在栈中
movaps [ebp], xmm5
pxor xmm7, xmm7
call _SetCopyRegs
@@yLoop:
push ecx
@@xLoop:
movd xmm0, [esi]
punpcklbw xmm0, xmm7
punpcklwd xmm0, xmm7
cvtdq2ps xmm0, xmm0
pshufd xmm5, xmm0, 0
pshufd xmm6, xmm0, 01010101b
mulps xmm5, xmm3 // vb = blue * m3
mulps xmm6, xmm2 // vg = green * m2
addps xmm5, [ebp] // vb += m5
addps xmm5, xmm6 // vb += vg
pshufd xmm6, xmm0, 10101010b
pshufd xmm0, xmm0, 11111111b
mulps xmm6, xmm1 // vr = red * m1
mulps xmm0, xmm4 // va = alpha * m4
addps xmm0, xmm6 // v = va + vr
addps xmm0, xmm5 // v += vb
cvtps2dq xmm0, xmm0
packssdw xmm0, xmm7
packuswb xmm0, xmm7
movd [edi], xmm0
add esi, 4
add edi, 4
loop @@xLoop
pop ecx
add esi, eax
add edi, ebx
dec edx
jnz @@yLoop
@@Exit:
add esp, 32
pop ebx
pop edi
pop esi
pop ebp
end;
procedure ImageSetColorMatrix(var Data: TImageData; Matrix: TColorMatrix); overload;
begin
ImageSetColorMatrix(Data, Data, Matrix);
end;
//---------------------------------------------------------------------------