void NcLine ( int x0, int y0,
int x1, int y1, uint p32BitVram, int pitch, uint color ) {
p32BitVram += (x0 << 2) + y0 * pitch;
int absX = x1 - x0,
absY = y1 - y0,
absXTemp = absX >> 31,
absYTemp = absY >> 31;
int vx_dir = ((absXTemp - ~absXTemp) << 2);
pitch = (pitch ^ absYTemp) - absYTemp,
absX = (absX ^ absXTemp) - absXTemp,
absY = (absY ^ absYTemp) - absYTemp;
int rtqX = absX << 1,
rtqY = absY << 1;
int error,
incEx, rtqL_Ex;
if (absX > absY) { /* near X */
error = rtqY - absX;
incEx = vx_dir + pitch;
rtqL_Ex = error - absX;
while (--absX) {
*cast (uint*) p32BitVram = color;
if (error < 0) {
p32BitVram += vx_dir;
error += rtqY;
} else {
p32BitVram += incEx;
error += rtqL_Ex;
}
} *cast (uint*) p32BitVram = color;
return;
}
error = rtqX - absY;
incEx = vx_dir + pitch;
rtqL_Ex = error - absY;
while (--absY) {
*cast (uint*) p32BitVram = color;
if (error < 0) {
p32BitVram += pitch;
error += rtqX;
} else {
p32BitVram += incEx;
error += rtqL_Ex;
}
} *cast (uint*) p32BitVram = color;
}
void WuLine ( uint * p32bitVram, uint VramPitch, int xPosStart, int yPosStart, int xPosEnd, int yPosEnd, uint clrLine ){
int XDir,
DeltaX,
DeltaY;
uint grayl, grayb,
ErrorAdj, ErrorAcc,
Weighting, ErrorAccTemp;
uint* LastEndWriteAddr = p32bitVram;
ubyte Mix[4];
ubyte Line[4];
ubyte Bg[4];
double WeightingShrink;
VramPitch >>= 2;
if (yPosStart > yPosEnd) {
int Temp = yPosEnd; yPosEnd = yPosStart; yPosStart = Temp;
Temp = xPosStart; xPosStart = xPosEnd; xPosEnd = Temp;
} *( p32bitVram += xPosStart + yPosStart * VramPitch ) = clrLine;
if ( ( DeltaX = xPosEnd - xPosStart ) >= 0 )
XDir = 1;
else
XDir = -1, DeltaX = -DeltaX;
if ( ( DeltaY = yPosEnd - yPosStart ) == 0 ) { /* Horizontal line */
if ( XDir > 0 ) {
while (--DeltaX != 0)
*(++p32bitVram) = clrLine;
*(++p32bitVram) = clrLine;
} else {
while (--DeltaX != 0)
*(--p32bitVram) = clrLine;
*(--p32bitVram) = clrLine;
} return;
} else if ( DeltaX == 0 ) { /* Vertical line */
do {
*(p32bitVram += VramPitch) = clrLine;
} while (--DeltaY != 0); return;
} else if ( DeltaX == DeltaY ) { /* Diagonal line */
if ( XDir > 0 ) {
do {
*(++p32bitVram += VramPitch) = clrLine;
} while (--DeltaY != 0);
} else /* Diagonal line Dec */ {
do {
*(--p32bitVram += VramPitch) = clrLine;
} while (--DeltaY != 0);
} return;
}
*cast (uint*) Line = clrLine, ErrorAcc = 0;
grayl = ( ( cast(uint) Line[2] << 2 ) + cast(uint) Line[1] * 5 + cast(uint) Line[0]);
if (DeltaY > DeltaX) {
ErrorAdj = cast(uint) ( ( cast(ulong) DeltaX << 32) / cast(ulong) DeltaY );
while (--DeltaY) {
ErrorAccTemp = ErrorAcc;
* cast (uint*) Bg = ((ErrorAcc += ErrorAdj) <= ErrorAccTemp ? * ( p32bitVram += VramPitch + XDir ) : * ( p32bitVram += VramPitch ) );
Weighting = ErrorAcc >> 24;
grayb = ( ( cast(uint) Bg[2] << 2 ) + cast(uint) Bg[1] * 5 + cast(uint) Bg[0] );
WeightingShrink = (grayl >= grayb ? Weighting ^ 0xFF : Weighting) / 255.0;
Mix[2] = ( Bg[2] > Line[2] ? ( cast(ubyte)( WeightingShrink * ( Bg[2] - Line[2] ) + Line[2] ) ) : ( cast(ubyte)( WeightingShrink * ( Line[2] - Bg[2] ) + Bg[2] ) ) );
Mix[1] = ( Bg[1] > Line[1] ? ( cast(ubyte)( WeightingShrink * ( Bg[1] - Line[1] ) + Line[1] ) ) : ( cast(ubyte)( WeightingShrink * ( Line[1] - Bg[1] ) + Bg[1] ) ) );
Mix[0] = ( Bg[0] > Line[0] ? ( cast(ubyte)( WeightingShrink * ( Bg[0] - Line[0] ) + Line[0] ) ) : ( cast(ubyte)( WeightingShrink * ( Line[0] - Bg[0] ) + Bg[0] ) ) );
*( p32bitVram ) = *cast(uint*) Mix;
*cast(uint*) Bg = *( p32bitVram + XDir );
grayb = ( (cast(uint) Bg[2] << 2) + cast(uint) Bg[1] * 5 +cast(uint) Bg[0] );
WeightingShrink = (grayl < grayb ? Weighting ^ 0xFF : Weighting) / 255.0;
Mix[2] = ( Bg[2] > Line[2] ? ( cast(ubyte)( WeightingShrink * ( Bg[2] - Line[2] ) + Line[2] ) ) : ( cast(ubyte)( WeightingShrink * ( Line[2] - Bg[2] ) + Bg[2] ) ) );
Mix[1] = ( Bg[1] > Line[1] ? ( cast(ubyte)( WeightingShrink * ( Bg[1] - Line[1] ) + Line[1] ) ) : ( cast(ubyte)( WeightingShrink * ( Line[1] - Bg[1] ) + Bg[1] ) ) );
Mix[0] = ( Bg[0] > Line[0] ? ( cast(ubyte)( WeightingShrink * ( Bg[0] - Line[0] ) + Line[0] ) ) : ( cast(ubyte)( WeightingShrink * ( Line[0] - Bg[0] ) + Bg[0] ) ) );
*( p32bitVram + XDir ) = *cast(uint*) Mix;
} *( LastEndWriteAddr + xPosEnd + yPosEnd * VramPitch ) = clrLine;
return;
}
ErrorAdj = cast(uint) ( (cast(ulong) DeltaY << 32) / cast(ulong) DeltaX);
while (--DeltaX) {
ErrorAccTemp = ErrorAcc;
*cast(uint*) Bg = ((ErrorAcc += ErrorAdj) <= ErrorAccTemp ? * ( p32bitVram += XDir + VramPitch ) : * ( p32bitVram += XDir ) );
Weighting = ErrorAcc >> 24;
grayb = ( (cast(uint) Bg[2] << 2) + cast(uint) Bg[1] * 5 +cast(uint) Bg[0] );
WeightingShrink = (grayl >= grayb ? Weighting ^ 0xFF : Weighting) / 255.0;
Mix[2] = ( Bg[2] > Line[2] ? ( cast(ubyte)( WeightingShrink * ( Bg[2] - Line[2] ) + Line[2] ) ) : ( cast(ubyte)( WeightingShrink * ( Line[2] - Bg[2] ) + Bg[2] ) ) );
Mix[1] = ( Bg[1] > Line[1] ? ( cast(ubyte)( WeightingShrink * ( Bg[1] - Line[1] ) + Line[1] ) ) : ( cast(ubyte)( WeightingShrink * ( Line[1] - Bg[1] ) + Bg[1] ) ) );
Mix[0] = ( Bg[0] > Line[0] ? ( cast(ubyte)( WeightingShrink * ( Bg[0] - Line[0] ) + Line[0] ) ) : ( cast(ubyte)( WeightingShrink * ( Line[0] - Bg[0] ) + Bg[0] ) ) );
*( p32bitVram ) = *cast(uint*) Mix;
*cast(uint*) Bg = *( p32bitVram + VramPitch );
grayb = ( ( cast(uint) Bg[2] << 2 ) + cast(uint) Bg[1] * 5 + cast(uint) Bg[0] );
WeightingShrink = cast (double)(grayl < grayb ? Weighting ^ 0xFF : Weighting) / 255.0;
Mix[2] = ( Bg[2] > Line[2] ? ( cast(ubyte)( WeightingShrink * ( Bg[2] - Line[2] ) + Line[2] ) ) : ( cast(ubyte)( WeightingShrink * ( Line[2] - Bg[2] ) + Bg[2] ) ) );
Mix[1] = ( Bg[1] > Line[1] ? ( cast(ubyte)( WeightingShrink * ( Bg[1] - Line[1] ) + Line[1] ) ) : ( cast(ubyte)( WeightingShrink * ( Line[1] - Bg[1] ) + Bg[1] ) ) );
Mix[0] = ( Bg[0] > Line[0] ? ( cast(ubyte)( WeightingShrink * ( Bg[0] - Line[0] ) + Line[0] ) ) : ( cast(ubyte)( WeightingShrink * ( Line[0] - Bg[0] ) + Bg[0] ) ) );
*( p32bitVram + VramPitch ) = *cast(uint*) Mix;
} *( LastEndWriteAddr + xPosEnd + yPosEnd * VramPitch ) = clrLine;
}
BR直线汇编代码
void _NcLine ( int x0, int y0,
int x1, int y1, uint p32BitVram, int pitch, uint color ) {
asm {
naked ; // use naked asm mode
pushad ; // save old frame
mov ECX, 36[ESP] ; // ECX <- x0
mov EDX, 40[ESP] ; // EDX <- y0
mov EDI, 44[ESP] ; // EDI <- x1
mov ESI, 48[ESP] ; // ESI <- y1
cmp ECX, EDI ; // x0 < x1 ?
setl AL ; // Y ? AL = 1 : AL = 0
shl EAX, 31 ; // left shift save old flags
cmp EDX, ESI ; // y0 < y1 ?
setl AL ; // Y ? AL = 1 : AL = 0
rol EAX, 1 ; // bit0: X bit1: Y
lea EAX, gtIndex[EAX*8] ;
jmp EAX ;
align 16 ;
gtIndex:
xchg EDI, ECX ;
xchg ESI, EDX ;
jmp R2 ;
xor EAX, EAX ;
xchg EDI, ECX ;
xchg ESI, EDX ;
jmp tempTable ;
xor EAX, EAX ;
tempTable:
jmp R1 ;
xchg EDI, ECX ;
inc EAX ;
R2: // x0 < x1/y0 < y0
sub EDI, ECX ; // RV - X
sub ESI, EDX ; // RV - Y
xor EAX, EAX ; // clear
cmp ESI, EDI ; // RV - Y > RV - X ?
seta AL ; // is > 0 ? AL = 1 : AL = 0
ror EAX, 1 ; // save old frame ror op not affect z bit
setz AL ; // is Equal ?
lea EAX, R2gtIndex[EAX*8] ; // table
jmp EAX ;
align 16 ;
R2gtIndex: /// r2 TABLE
jmp R2NearX ;
jmp R2NearY ;
inc EDX ;
jmp R2NearY ;
imul ECX, EAX ;
R2Align: // 45 du ^
mov EAX, 56[ESP] ; //pitch
imul ECX, EAX ; //- N pitch * tStartY
add EAX, 4 ;// ouy
lea ECX, [ECX+EDX*4] ; //init pVram comp
mov EDX, 60[ESP] ; //EDX = color
add ECX, 52[ESP] ; //pVram Pos + RVA
imul EDI, EAX ; //- N rv - y * ouy = start to end 's RVA total ...
neg EDI; //- N neg opr
sub ECX, EDI ; //init first write ...
align 16 ;
R2Align_main_loop:
mov [ECX+EDI], EDX ; //maybe have agi clash
lea ESP, [ESP] ;
add EDI, EAX ; // emmm ... maybe this is not fast :(
jne R2Align_main_loop ; // when write vram finish EDI is zero
mov [ECX+EDI], EDX ; // last write vram
popad ;
ret ;
R2NearX:
movd XMM7, ESP ;
mov EAX, 56[ESP] ;
neg EDI ; // dx
imul EDX, EAX ; //
lea EBX, [EDX+ECX*4] ; // RVA Vram Pos
add ESI, ESI ;
add EBX, 52[ESP] ; // p32BitVram + RVA Vram Pos
lea EDX, 4[EAX] ; //
lea ECX, [ESI+EDI] ; // ECX = error
mov ESP, 60[ESP] ; // ESP = color
lea EBP, [ECX+EDI] ; //
test ECX, ECX ;
jge R2NearX_Ex ;
align 16 ;
R2NearX_main_loop:
mov [EBX], ESP ;
jc R2NearX_RP ;
add EBX, 4 ;
add ECX, ESI ;
inc EDI ;
jne R2NearX_main_loop ;
jmp R2NearXRet ;
R2NearX_RP:
add EBX, EDX ;
add ECX, EBP ;
inc EDI ;
jne R2NearX_main_loop ;
R2NearXRet:
mov [EBX], ESP ;
movd ESP, XMM7 ;
popad ;
ret ;
R2NearX_Ex:
mov [EBX], ESP ;
add EBX, EDX ;
add ECX, EBP ;
inc EDI ;
jne R2NearX_main_loop ;
jmp R2NearXRet ;
R2NearY:
movd XMM0, ESP ;
mov EAX, 56[ESP] ; // pitch
neg ESI ; // dy
imul EDX, EAX ; // - N pitch * tStartY ( EDX )
lea EBX, [EDX+ECX*4] ; // init pVram comp
add EDI, EDI ; // dx2
add EBX, 52[ESP] ; // pVram Pos + RVA
lea EDX, 4[EAX] ; // EBP = ruy
lea ECX, [EDI+ESI] ; // error
mov ESP, 60[ESP] ;
lea EBP, [ECX+ESI] ; // EBP = ogh
test ECX, ECX ;
jge R2NearY_RP_Ex ;
align 16 ;
/*
R2NearY_main_loop:
mov [EBX], ESP ; // - U
jc R2NearY_RP ; // - V
add EBX, EAX ; // - U
add ECX, EDI ; // - V
inc ESI ; // - U
jne R2NearY_main_loop ; // - V
mov [EBX], ESP ; // - U
jmp R2NearY_ret ; // - V
R2NearY_RP:
add EBX, EDX ; //
add ECX, EBP ;
inc ESI ;
jne R2NearY_main_loop ;
*/
R2NearY_main_loop:
mov [EBX], ESP ; // - U
jnc R2NearY_no_repair ; // - V
R2NearY_RP:
add EBX, EDX ; //
add ECX, EBP ; //
inc ESI ; //
jne R2NearY_main_loop ; //
jmp R2NearY_ret ; // - V
R2NearY_no_repair:
add EBX, EAX ; // - U
add ECX, EDI ; // - V
inc ESI ; // - U
jne R2NearY_main_loop ; // - V
R2NearY_ret:
mov [EBX], ESP ;
movd ESP, XMM7 ;
popad ;
ret ;
R2NearY_RP_Ex:
mov [EBX], ESP ;
add EBX, EDX ;
add ECX, EBP ;
inc ESI ;
jne R2NearY_main_loop ;
jmp R2NearY_ret;
R1:// ----- x0 > x1/y0 <= y1
sub ECX, EDI ; //--- RV - X
sub ESI, EDX ; // RV - Y
xor EAX, EAX ; //
cmp ECX, ESI ; // RV - Y > RV - X ?
setl AL ; // 1 > 0 <=
ror EAX, 1 ;
setz AL ;
rol EAX, 1 ;
lea EAX, R1gtIndex[EAX*8];
jmp EAX ;
align 16 ;
R1gtIndex: /// r1 TABLE
jmp R1NearX; // 1
jmp R1NearY;
inc EDX;
jmp R1NearY; // 2
imul ECX, EAX;
R1Align:
mov EAX, 56[ESP] ; //pitch
add EDX, ESI ;
imul EDI, EAX ; //- N pitch * tStartY
sub EAX, 4 ; //ouy
lea EDI, [EDI+EDX*4] ; //init pVram comp
mov EDX, 60[ESP] ; //EDX = color
add EDI, 52[ESP] ; //pVram Pos + RVA
imul ESI, EAX ; //- N rv - y * ouy = start to end 's RVA total ...
neg ESI; //- N neg opr
sub EDI, ESI ;// init first write ...
align 16 ;
R1Align_main_loop:
mov [EDI+ESI], EDX ; // maybe have agi clash
lea ESP, [ESP] ;
add ESI, EAX ; // emmm ... maybe this is not fast :(
jne R1Align_main_loop ; // when write vram finish EDI is zero
popad ;
ret ;
R1NearX:
movd XMM7, ESP ;
mov EAX, 56[ESP] ; // pitch
add EDI, ECX ;
imul EDX, EAX ; //- N pitch * tStartY ( EDX )
neg ECX ; // dx
lea EBX, [EDX+EDI*4] ; //init pVram comp
add ESI, ESI ;// dy2
add EBX, 52[ESP] ; //pVram Pos + RVA
lea EDI, [EAX-4] ; //EDI = ruy
lea EBP, [ESI+ECX] ; //error
mov ESP, 60[ESP] ;
lea EDX, [EBP+ECX] ; //EDX = ogh
test EBP, EBP ;
jge R1NearX_RP_Ex ;
align 16 ;
R1NearX_main_loop:
mov [EBX], ESP;
jc R1NearX_repair;
sub EBX, 4;
add EBP, ESI;
inc ECX;
jne R1NearX_main_loop;
jmp R1NearX_ret;
R1NearX_repair:
add EBX, EDI;
add EBP, EDX;
inc ECX ;
jne R1NearX_main_loop ;
R1NearX_ret:
mov [EBX], ESP;
movd ESP, XMM7 ;
popad ;
ret ;
R1NearX_RP_Ex:
mov [EBX], ESP;
add EBX, EDI;
add EBP, EDX;
inc ECX ;
jne R1NearX_main_loop;
jmp R1NearX_ret ;
R1NearY:
movd XMM7, ESP;
mov EAX, 56[ESP] ; //pitch
neg ESI ;// dy .
imul EDX, EAX ; //- N pitch * tStartY ( EDX )
add EDI, ECX;
lea EBX, [EDX+EDI*4] ; //init pVram comp
add ECX, ECX ; //dx2
add EBX, 52[ESP] ; //pVram Pos + RVA
lea EDI, [EAX-4] ; //EDI = ruy
lea EBP, [ESI+ECX] ; //error
mov ESP, 60[ESP] ; //color
lea EDX, [EBP+ESI] ; //EDX = ogh
test EBP, EBP;
jge R1NearY_RP_Ex ;
align 16 ;
R1NearY_main_loop:
mov [EBX], ESP;
jc R1NearY_repair;
add EBX, EAX;
add EBP, ECX;
inc ESI;
jne R1NearY_main_loop;
jmp R1NearY_ret;
R1NearY_repair:
add EBX, EDI;
add EBP, EDX;
inc ESI ;
jne R1NearY_main_loop;
R1NearY_ret:
mov [EBX], ESP;
movd ESP, XMM7 ;
popad ;
ret ;
R1NearY_RP_Ex:
mov [EBX], ESP;
add EBX, EDI;
add EBP, EDX;
inc ESI ;
jne R1NearY_main_loop ;
}
}