File: VGASpeed.txt
Name: 测试VGA12H模式的速度
Author: zyl910
Blog: http://blog.csdn.net/zyl910/
Version: V1.1
Updata: 2006-11-23
下载(注意修改下载后的扩展名)
经过上一次的测试后,发现读显存的速度比写显存的速度慢得多。
很多资料建议我们使用写模式1来实现显存内的位图传送。当使用写模式1进行位图传送时,是一边读一边写的形式。而现在内存读速度很慢,那么会不会影响位图传送速度呢?于是我做了个测试。
测试结果
~~~~~~~~
CPU : AMD Athlon XP 1700+(实际频率:1463 MHz (11 x 133))
内存 : DDR266 256MB
显卡 : NVIDIA GeForce2 MX/MX 400(AGP 4X)
显存带宽: 125MHz * 128bit = 2000MB/s
操作系统: Windows XP SP2
[FPS: C]
Video to System: 11.3000
System to Video : 51.0807
OffScr to System: 11.7646
System to OffScr: 51.6840
Video to OffScr: 37.1039
OffScr to Video : 37.0033
[FPS: movsb]
Video to System: 11.9657
System to Video : 85.5702
OffScr to System: 11.9657
System to OffScr: 85.5702
Video to OffScr: 37.2044
OffScr to Video : 37.3050
[FPS: movsw]
Video to System: 23.5293
System to Video : 122.4729
OffScr to System: 23.6298
System to OffScr: 122.2718
[FPS: movsd]
Video to System: 44.7459
System to Video : 152.7392
OffScr to System: 44.8464
System to OffScr: 152.4376
CPU : AMD Athlon XP 1700+(实际频率:1463 MHz (11 x 133))
内存 : DDR266 256MB
显卡 : NVIDIA GeForce2 MX/MX 400(AGP 4X)
显存带宽: 125MHz * 128bit = 2000MB/s
操作系统: Windows 98SE
[FPS: C]
Video to System: 11.6641
System to Video : 60.7337
OffScr to System: 11.7000
System to OffScr: 60.5326
Video to OffScr: 35.3945
OffScr to Video : 35.4950
[FPS: movsb]
Video to System: 11.9657
System to Video : 98.9436
OffScr to System: 11.9657
System to OffScr: 99.2453
Video to OffScr: 35.6961
OffScr to Video : 35.6961
[FPS: movsw]
Video to System: 23.4287
System to Video : 173.4530
OffScr to System: 23.5293
System to OffScr: 173.4530
[FPS: movsd]
Video to System: 44.4442
System to Video : 268.1735
OffScr to System: 44.4442
System to OffScr: 269.5812
CPU : AMD Athlon XP 1700+(实际频率:1463 MHz (11 x 133))
内存 : DDR266 256MB
显卡 : NVIDIA GeForce2 MX/MX 400(AGP 4X)
显存带宽: 125MHz * 128bit = 2000MB/s
操作系统: DOS实模式
[FPS: C]
Video to System: 11.7646
System to Video : 61.2365
OffScr to System: 11.8652
System to OffScr: 61.1359
Video to OffScr: 37.2044
OffScr to Video : 37.2044
[FPS: movsb]
Video to System: 12.0663
System to Video : 108.0939
OffScr to System: 12.0663
System to OffScr: 108.3956
Video to OffScr: 37.4055
OffScr to Video : 37.3050
[FPS: movsw]
Video to System: 23.6298
System to Video : 190.4464
OffScr to System: 23.7304
System to OffScr: 190.5470
[FPS: movsd]
Video to System: 44.9470
System to Video : 278.9326
OffScr to System: 44.9470
System to OffScr: 279.4354
CPU : Intel Celeron, 1000 MHz (10 x 100)
内存 : SDRAM 256MB
显卡 : ATI Radeon 9550(AGP 4X)
显存带宽: 392MHz * 128bit = 6272MB/s
操作系统: Windows XP SP2
[FPS: C]
Video to System: 11.5000
System to Video : 52.0862
OffScr to System: 11.6641
System to OffScr: 52.0862
Video to OffScr: 32.6796
OffScr to Video : 32.5790
[FPS: movsb]
Video to System: 11.6641
System to Video : 51.8851
OffScr to System: 11.6000
System to OffScr: 52.0862
Video to OffScr: 32.6796
OffScr to Video : 32.5790
[FPS: movsw]
Video to System: 22.2221
System to Video : 76.4199
OffScr to System: 22.1215
System to OffScr: 76.7215
[FPS: movsd]
Video to System: 40.5227
System to Video : 100.3514
OffScr to System: 40.5227
System to OffScr: 100.3514
分析
~~~~
一、用写模式1实现的位图传送的确不够快
跟我们猜想的一样,位图传送的确不够快,才30多帧。
让我们理论分析一下。
每一次是先读再写,且每次硬件会利用锁存器一次性复制4个字节,所以速度为:1/(1/11.3000 + 1/51.0807)*4 = 9.25305 * 4 = 37.0122
与测试结果相符。
看来不能在VGA12H下利用显存内位图传送实现高速动画。最佳方案是先在内存中将屏幕画好,再一次性传送到显存。反正现在CPU主频够高,DDR内存条访问速度够快。
估计写模式1只能用作填充(如清屏)。
二、访问离屏显存并没有比访问主表面显存快
受某些早期资料的误导,曾以为访问离屏显存会比较快。可是现代的显卡都有缓冲机制,所以无论是访问主表面显存,还是访问离屏显存,速度都一样快。
当然在有条件的时候,应该使用离屏显存来实现双缓冲平滑动画。
测试代码
~~~~~~~~
/* S: system memory. 系统内存 */ #include <stdio.h>
typedef void far* LPVOID;
#define SCR_PLANES 4 #define SCANSIZE_DIB ((SCR_W)/2) #define SEG_VIDEO 0xA000 #define WaitVR() while(!(inportb(0x3da)&0x08)) static volatile DWORD far* const pbiosclock = MK_FP(0x0040, 0x6C); typedef void (*TESTFUNC)(DWORD iF); #define TESTFPS_WAITVR 1 static BYTE byVGA[SCR_PLANES][SCANSIZE_VGA];
void repmovsw(LPVOID lpD, LPVOID lpS, WORD cWords) void repmovsd(LPVOID lpD, LPVOID lpS, WORD cDWords)
int vgasetwritemode(int mode) void vgasetreadplane(BYTE n) void vgasetplanemask(BYTE bymask) double TestFps(TESTFUNC pfun, int nFlags) cntF++; return cntF / ((tmrcur-tmrold)/BIOSCLOCK_F); void filloffscreen(int c) vgasetwritemode(0); memset(byVGA[0], -(1&(c>>0)), SCANSIZE_VGA); void Test_C_V2S(DWORD iF) vgasetwritemode(0); pscan = 0; void Test_C_S2V(DWORD iF) vgasetwritemode(0); memset(byVGA[0], -(1&(((int)iF)>>0)), SCANSIZE_VGA); void Test_C_O2S(DWORD iF) vgasetwritemode(0); for(iY=0; iY<SCR_H; iY++) void Test_C_S2O(DWORD iF) vgasetwritemode(0); memset(byVGA[0], -(1&(((int)iF)>>0)), SCANSIZE_VGA); void Test_C_V2O(DWORD iF) vgasetwritemode(1); pscan = 0; void Test_C_O2V(DWORD iF) filloffscreen(iF & 0xF); vgasetwritemode(1); pscan = 0; void Test_SB_V2S(DWORD iF) vgasetwritemode(0); pscan = 0; void Test_SB_S2V(DWORD iF) vgasetwritemode(0); memset(byVGA[0], -(1&(((int)iF)>>0)), SCANSIZE_VGA); void Test_SB_O2S(DWORD iF) vgasetwritemode(0); for(iY=0; iY<SCR_H; iY++) void Test_SB_S2O(DWORD iF) vgasetwritemode(0); memset(byVGA[0], -(1&(((int)iF)>>0)), SCANSIZE_VGA); void Test_SB_V2O(DWORD iF) vgasetwritemode(1); pscan = 0; void Test_SB_O2V(DWORD iF) filloffscreen(iF & 0xF); vgasetwritemode(1); pscan = 0; void Test_SW_V2S(DWORD iF) vgasetwritemode(0); pscan = 0; void Test_SW_S2V(DWORD iF) vgasetwritemode(0); memset(byVGA[0], -(1&(((int)iF)>>0)), SCANSIZE_VGA); void Test_SW_O2S(DWORD iF) vgasetwritemode(0); for(iY=0; iY<SCR_H; iY++) void Test_SW_S2O(DWORD iF) vgasetwritemode(0); memset(byVGA[0], -(1&(((int)iF)>>0)), SCANSIZE_VGA); void Test_SD_V2S(DWORD iF) vgasetwritemode(0); pscan = 0; void Test_SD_S2V(DWORD iF) vgasetwritemode(0); memset(byVGA[0], -(1&(((int)iF)>>0)), SCANSIZE_VGA); void Test_SD_O2S(DWORD iF) vgasetwritemode(0); for(iY=0; iY<SCR_H; iY++) void Test_SD_S2O(DWORD iF) vgasetwritemode(0); memset(byVGA[0], -(1&(((int)iF)>>0)), SCANSIZE_VGA);
/* VGA 12h: 640*480*4bit */ /* C language */ /* movsb */ /* movsw */ /* movsd */
/* out */ return 0; |