The performance comparison of output in RTL monitor
summary
最近在写RTL monitor, 发现如果频繁用$fdisplay写数据出来,性能会成为瓶颈。所以就研究用DPI-C把数据送出来,然后在C侧看看有什么优化手段。有几种优化方法, 一种是写raw data到C侧,C侧直接把raw data格式化输出。 另一种是把raw data存成文件后就返回到RTL侧。然后线下用一个进程把raw data进行格式化输出。线下的进程可以和输出进程同时跑,一旦有数据出来它就进行数据处理并输出。
为了研究这个问题,我直接写个纯C的producer-consumer模型,代码见下面。
conclusion
fwrite在绝大分场景下比mmap快, 当写入量为4GB左右时, fwriter用时10.5 sec, mmap用时29.6 sec.
用fread读入4GB binary数据, 用fwrite写出8.9GB数据时需要103.4 sec
用mmap读入4GB binary数据, 用fwrite写出8.9GB数据时需要88.1 sec
producer比较快, consumer比较慢, producer 2秒可以产生1GB数据,consumer需要20 sec左右才可以处理完。当两者并行跑时,收益并不高。
注意点
open mode
用fopen以写模式("w")打开文件时,再用mmap加载文件时,会因为文件权限不一致无法map成功。
也就是下面的代码虽然在编译时通过,但在运行时会报如下错误
不管如何设置PROT (PROT_WRITE, PROT_WRITE|PROT_READ, PROT_READ|PROT_EXEC), 或者如何设置flags(MAP_SHARED, MAP_PRIVATE), 都会在运行时报错:Permission denied
FILE* f = fopen(argv[1], "w"); // whatever use "w" or "wb"
int *map=(int*)mmap(0, totalbytes, PROT_READ|PROT_WRITE, MAP_SHARED, fileno(f), 0);
解决方法是使用"w+"打开文件, 原因如下:
fopen() mode | open() flags |
---|---|
r | O_RDONLY |
w | O_WRONLY | O_CREAT | O_TRUNC |
a | O_WRONLY | O_CREAT | O_APPEND |
r+ | O_RDWR |
w+ | O_RDWR | O_CREAT | O_TRUNC |
a+ | O_RDWR | O_CREAT | O_APPEND |
write
在写之前需要先把文件设置成相应的大小。可以使用fnctl.h用的ftruncate
size_t totalbytes = 4 + iter*20 + 20 ;
ftruncate(fw, totalbytes);
原型
主要涉及mmap, fopen, open, fread, fwrite,ftruncate几个函数。
#include
void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset);
int munmap(void *addr, size_t length);
#include
FILE *fopen(const char *pathname, const char *mode);
FILE *fdopen(int fd, const char *mode);
FILE *freopen(const char *pathname, const char *mode, FILE *stream);
#include
size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream);
size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream);
//The function fread() reads nmemb items of data, each size bytes long, from the stream pointed to by stream, storing them at the location given by ptr.
实现
fwrite
#include
#include
#include
#include
#include
#include
#include "timediff.hpp"
struct info {
uint32_t pc_l;
uint32_t pc_h;
uint32_t instr;
uint32_t time_l;
uint32_t time_h;
};
int main(int argc, char** argv) {
if(argc<2) {
return 1;
}
TimerClock tc;
FILE *f = std::fopen(argv[1], "wb");
printf("size of info : %d\n", sizeof(info));
uint32_t i=1;
tc.start();
int prec=-9;
std::fwrite(&prec, sizeof(int), 1, f);
while(i++<50000000) {
info val = {(uint32_t)i, (uint32_t)i, (uint32_t)i, (uint32_t)i, (uint32_t)0};
size_t len = std::fwrite(&val, sizeof(info), 1, f);
if (len != 1) {
std::cout << "the len is: " << len << "\n";
info val = {0,0,0,0,0};
size_t len = std::fwrite(&val, sizeof(info), 1, f);
std::fclose(f);
exit(1);
} else {
if ( i%200 == 0) {
fflush(f);
}
}
//std::this_thread::sleep_for(std::chrono::microseconds(1));
}
info val;
std::memset(&val, 0, sizeof(info));
size_t len = std::fwrite(&val, sizeof(info), 1, f);
if (len != 1 ) {
std::cout << "end error\n";
}
std::cout << "writer elapsed time: " << tc.getTimerMicroSec() << "us\n";
std::fclose(f);
std::cout << "writer done!\n";
return 0;
}
fread
#include
#include
#include
#include
#include
#include
#include "timediff.hpp"
struct info {
uint32_t pc_l;
uint32_t pc_h;
uint32_t instr;
uint32_t time_l;
uint32_t time_h;
};
inline bool is_file_exist(const char* file) {
struct stat buffer;
if(stat(file, &buffer)) {
return false;
}
if ( !S_ISREG(buffer.st_mode)) {
return false;
}
return true;
}
int form_line(uint64_t time, int prec, uint64_t pc, uint32_t instr, char * fchar, size_t size) {
switch (prec) {
case -15:
snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "fs", pc, instr );
break;
case -14:
snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "fs", pc, instr );
break;
case -13:
snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "fs", pc, instr );
break;
case -12:
snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "ps", pc, instr );
break;
case -11:
snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "ps", pc, instr );
break;
case -10:
snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "ps", pc, instr );
break;
case -9:
snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "ns", pc, instr );
break;
case -8:
snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "ns", pc, instr );
break;
case -7:
snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "ns", pc, instr );
break;
case -6:
snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "us", pc, instr );
break;
case -5:
snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "us", pc, instr );
break;
case -4:
snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "us", pc, instr );
break;
case -3:
snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "ms", pc, instr );
break;
case -2:
snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "ms", pc, instr );
break;
case -1:
snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "ms", pc, instr );
break;
case 0:
snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "s", pc, instr );
break;
default:
fprintf(stderr, "unknown precision %d\n", prec);
return 1;
}
//printf("%s : %s\n", __FUNCTION, fchar);
return 0;
}
int main(int argc, char**argv) {
if(argc<3) return 1;
TimerClock tc;
int timeout=0;
while ( (!is_file_exist(argv[1])) && (timeout< 600000)) {
std::this_thread::sleep_for(std::chrono::milliseconds(1));
timeout++;
}
std::cout << "start ...\n";
FILE *r = std::fopen(argv[1], "rb");
if (r==NULL) {
std::cout<< "can't open " << argv[1] << "\n";
exit(1);
}
FILE *w = std::fopen(argv[2], "w");
if (w==NULL) {
std::cout<< "can't open " << argv[2] << "\n";
exit(1);
}
tc.start();
long pos;
timeout = 0;
int prec;
fread(&prec, sizeof(int), 1, r);
while(1) {
info val;
pos = ftell(r);
int len = fread(&val, sizeof(info), 1, r);
if (len!= 1) {
if(ferror(r)) {
perror("fread error\n");
goto FINISHED;
}
if(timeout < 10000000) {
fseek(r, pos, SEEK_SET);
std::this_thread::sleep_for(std::chrono::microseconds(1));
timeout++;
continue;
} else {
goto FINISHED;
}
} else {
if ((val.pc_h == 0)&&(val.pc_l==0) && (val.instr==0) && (val.time_h==0) && (val.time_l==0)) {
break;
}
}
char line[100];
uint64_t pc = ((uint64_t)val.pc_h << 32) + val.pc_l;
uint64_t time = ((uint64_t)val.time_h << 32) + val.time_l;
int r = form_line(time, prec, pc, val.instr, line, sizeof(line));
if (r==1) {
return 1;
}
std::fputs(line, w);
}
FINISHED:
std::cout << "timeout: " << timeout << "\n";
std::cout << "reader done\n";
std::cout << "reader elapsed time: " << tc.getTimerMicroSec() << "us\n";
std::fclose(r);
std::fclose(w);
return 0;
}
mmap write
#include
#include
#include
#include
#include
#include
#include
#include
#include "sys/mman.h"
#include "timediff.hpp"
struct info {
uint32_t pc_l;
uint32_t pc_h;
uint32_t instr;
uint32_t time_l;
uint32_t time_h;
};
const int iter = 50000000;
int main(int argc, char** argv) {
if(argc<2) {
return 1;
}
TimerClock tc;
int f = open(argv[1], O_RDWR|O_CREAT|O_TRUNC, 0x0777);
std::cout << "start to write bin file " << argv[1] << "\n";
size_t totalbytes = 4+iter*20+20;
ftruncate(f, totalbytes);
tc.start();
int prec=-9;
int *map=(int*)mmap(0, totalbytes, PROT_WRITE, MAP_SHARED, f, 0);
if (map == MAP_FAILED) {
close(f);
perror("error mapping");
exit(1);
}
size_t wptr=0;
map[wptr++] = prec;
uint32_t i=0;
while(i++
mmap read
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "timediff.hpp"
#include
struct info {
uint32_t pc_l;
uint32_t pc_h;
uint32_t instr;
uint32_t time_l;
uint32_t time_h;
};
inline bool is_file_exist(const char* file) {
struct stat buffer;
if(stat(file, &buffer)) {
return false;
}
if ( !S_ISREG(buffer.st_mode)) {
return false;
}
return true;
}
int form_line(uint64_t time, int prec, uint64_t pc, uint32_t instr, char * fchar, size_t size) {
switch (prec) {
case -15:
snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "fs", pc, instr );
break;
case -14:
snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "fs", pc, instr );
break;
case -13:
snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "fs", pc, instr );
break;
case -12:
snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "ps", pc, instr );
break;
case -11:
snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "ps", pc, instr );
break;
case -10:
snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "ps", pc, instr );
break;
case -9:
snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "ns", pc, instr );
break;
case -8:
snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "ns", pc, instr );
break;
case -7:
snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "ns", pc, instr );
break;
case -6:
snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "us", pc, instr );
break;
case -5:
snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "us", pc, instr );
break;
case -4:
snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "us", pc, instr );
break;
case -3:
snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "ms", pc, instr );
break;
case -2:
snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "ms", pc, instr );
break;
case -1:
snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "ms", pc, instr );
break;
case 0:
snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "s", pc, instr );
break;
default:
fprintf(stderr, "unknown precision %d\n", prec);
return 1;
}
//printf("%s : %s\n", __FUNCTION, fchar);
return 0;
}
int main(int argc, char**argv) {
if(argc<3) return 1;
TimerClock tc;
int timeout=0;
while ( (!is_file_exist(argv[1])) && (timeout< 600000)) {
std::this_thread::sleep_for(std::chrono::milliseconds(1));
timeout++;
}
std::cout << "start ...\n";
int r = open(argv[1], O_RDONLY, 0x0600);
if (r==-1) {
std::cout<< "can't open " << argv[1] << "\n";
exit(1);
}
FILE *w = std::fopen(argv[2], "w");
if (w==nullptr) {
std::cout<< "can't open " << argv[2] << "\n";
close(r);
exit(1);
}
struct stat rfileInfo = {0};
struct stat wfileInfo = {0};
if(fstat(r, &rfileInfo) == -1) {
exit(1);
}
if(fstat(fileno(w), &wfileInfo) == -1) {
exit(1);
}
std::vector segment;
size_t cursize;
size_t lastpos;
size_t pagesize = sysconf(_SC_PAGE_SIZE);
size_t readto;
int prec;
off_t pa_offset;
timeout =0;
while((rfileInfo.st_size == 0)&&(timeout<600000)) {
std::this_thread::sleep_for(std::chrono::milliseconds(1));
timeout++;
}
if (timeout >= 600000) {
goto FINISHED;
}
printf("File size is %ji\n", (intmax_t)rfileInfo.st_size);
cursize = rfileInfo.st_size;
lastpos = 0;
if (cursize<24) {
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
readto = cursize - (cursize-4) % 20;
tc.start();
while(1) {
pa_offset = lastpos & ~(pagesize-1);
size_t size = readto-pa_offset;
size_t pass_byte = lastpos-pa_offset;
std::cout << "size = " << size << "\n";
std::cout << "lastpos =" << lastpos << "\n";
char *rmap = (char*)mmap(0, size, PROT_READ, MAP_SHARED, r, pa_offset);
if(rmap == MAP_FAILED) {
perror("error mapping");
goto FINISHED;
}
size_t rindex=0;
if (lastpos==0) {
prec = rmap[rindex++];
}
lastpos = readto;
int * rptr = (int*)(rmap+pass_byte);
int total = (size-pass_byte)/4;
while(rindex= 30000) {
goto FINISHED;
}
readto = cursize - (cursize-4) % 20;
segment.push_back(readto);
}
FINISHED:
std::cout << "timeout: " << timeout << "\n";
std::cout << "reader done\n";
std::cout << "reader elapsed time: " << tc.getTimerMicroSec() << "us\n";
close(r);
std::fclose(w);
std::cout << "the segment is :\n";
for(auto it: segment) {
std::cout << it << "\n";
}
return 0;
}
timer
#ifndef _TIMEDIFF_HPP_
#define _TIMEDIFF_HPP_
#include
#include
class TimerClock
{
public:
TimerClock()
{
update();
}
~TimerClock()
{
}
void start()
{
_start = std::chrono::high_resolution_clock::now();
}
long long getTimerMicroSec()
{
return std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - _start).count();
}
private:
std::chrono::time_point_start;
};
#endif
compile
CXXFLAGS = -g -std=c++11
gen: reader writer
FILES = test.bin test.log test1.log test2.log
reader: reader.cpp
g++ ${CXXFLAGS} -o $@ $^ -I./
writer: writer.cpp
g++ ${CXXFLAGS} -o $@ $^ -I./
writer2: writer2.cpp
g++ ${CXXFLAGS} -o $@ $^ -I./
run: clean
(./writer &) && (sleep 1) && (./reader &)
runw:
rm -rf /tmp/test.txt
time ./writer &
runr:
rm -rf /tmp/out.txt /tmp/test.txt
time ./reader &
clean:
- rm -rf ${FILES} reader writer reader2 writer2 writer3
genbin:
./writer ../../tarmac.core0.log.bin
./writer ../../tarmac.core1.log.bin
diff:
tail out.txt | diff golden.txt -
.PHONY: gen clean