目录
文件锁
非阻塞io
select
poll
epoll
readv和writev
mmap
参考
#
//第二个参数cmd是F_GETLK, F_SETLK,F_SETLKW三种类型,表示获取,设置,测试记录锁
int fcntl(int fd, int cmd, .../* stutct flock *flockptr */);
struct flock {
short l_type; /* F_RDLCK, F_WRLCK, F_UNLCK */
short l_whence; /* SEEK_SET, SEEK_CUR, SEEK_END */
off_t l_start; /* offset in bytes,relative ot l_whence */
off_t l_len; /* length, in bytes, 0 means lock to EOF */
pid_t l_pid; /* returned with F_GETLK */
}
//l_type的类型
F_RDLCK:读锁
F_WRLCK:写锁
F_UNLCK:解锁
//l_whence的类型
SEEK_SET:当前位置为文件的开头,新位置为偏移量的大小
SEEK_CUR:当前位置为文件指针的位置,新位置为当前位置加上偏移量
SEEK_END:当前位置为文件的结尾,新位置为文件的大小加上偏移量的大小
一个例子
#include
#include
#include
#include
#include
#include
#include
#include
#include
//#include "rwlock.h"
static int lock_reg(int fd,int cmd,int type,off_t offset,int whence,off_t len) {
struct flock lock;
lock.l_type = type;
lock.l_start = offset;
lock.l_whence = whence;
lock.l_len = len;
return (fcntl(fd,cmd,&lock));
}
static pid_t lock_test(int fd,int type,off_t offset,int whence,off_t len) {
struct flock lock;
lock.l_type = type;
lock.l_start = offset;
lock.l_whence = whence;
lock.l_len = len;
if(fcntl(fd,F_GETLK,&lock) == -1) {
return -1;
}
if(lock.l_type = F_UNLCK) {
return 0;
}
return lock.l_pid;
}
int read_lock(int fd,off_t offset,int whence,off_t len) {
return lock_reg(fd,F_SETLKW,F_RDLCK,offset,whence,len);
}
int read_lock_try(int fd,off_t offset,int whence,off_t len) {
return lock_reg(fd,F_SETLK,F_RDLCK,offset,whence,len);
}
int write_lock(int fd,off_t offset,int whence,off_t len) {
return lock_reg(fd,F_SETLKW,F_WRLCK,offset,whence,len);
}
int write_lock_try(int fd,off_t offset,int whence,off_t len) {
return lock_reg(fd,F_SETLK,F_WRLCK,offset,whence,len);
}
int unlock(int fd,off_t offset, int whence,off_t len) {
return lock_reg(fd,F_SETLK,F_UNLCK,offset,whence,len);
}
int is_read_lockable(int fd, off_t offset,int whence,off_t len) {
return !lock_test(fd,F_RDLCK,offset,whence,len);
}
int is_write_lockable(int fd, off_t offset,int whence,off_t len) {
return !lock_test(fd,F_WRLCK,offset,whence,len);
}
int main(int argc, char *argv[]) {
int fd = open("aa.log",O_RDWR|O_APPEND);
write_lock(fd, 0, SEEK_SET, 10);
pid_t pid = fork();
if(pid > 0) {
printf("sleep -> parent 10 second\n");
sleep(10);
}
else if(pid == 0) {
write_lock(fd, 20, SEEK_SET, 10);
printf("chiild get write_lock ok\n");
unlock(fd, 20, SEEK_SET, 10);
printf("unlock child lock\n");
exit(0);
}
else {
printf("fork error ->%d\n",errno);
exit(errno);
}
unlock(fd, 0, SEEK_SET, 10);
printf("parent unlock ok\n");
return 0;
}
//执行结果
sleep -> parent 10 second
chiild get write_lock ok
unlock child lock
parent unlock ok
//如果将子进程中的改为
else if(pid == 0) {
write_lock(fd,8 , SEEK_SET, 10);
。。。
//执行结果为
sleep -> parent 10 second
parent unlock ok
chiild get write_lock ok
unlock child lock
//用strace分析程序
open("aa.log", O_RDWR|O_APPEND) = 3
fcntl(3, F_SETLKW, {l_type=F_WRLCK, l_whence=SEEK_SET, l_start=0, l_len=10}) = 0
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f818487aa10) = 32736
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8184884000
write(1, "sleep -> parent 10 second\n", 26sleep -> parent 10 second
) = 26
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
nanosleep({10, 0}, strace: Process 32736 attached
[pid 32736] fcntl(3, F_SETLKW, {l_type=F_WRLCK, l_whence=SEEK_SET, l_start=20, l_len=10}) = 0
[pid 32736] fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0
[pid 32736] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8184884000
[pid 32736] write(1, "chiild get write_lock ok\n", 25chiild get write_lock ok
) = 25
[pid 32736] fcntl(3, F_SETLK, {l_type=F_UNLCK, l_whence=SEEK_SET, l_start=20, l_len=10}) = 0
[pid 32736] write(1, "unlock child lock\n", 18unlock child lock
) = 18
[pid 32736] exit_group(0) = ?
[pid 32736] +++ exited with 0 +++
<... nanosleep resumed> {9, 999332497}) = ? ERESTART_RESTARTBLOCK (Interrupted by signal)
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=32736, si_uid=0, si_status=0, si_utime=0, si_stime=0} ---
restart_syscall(<... resuming interrupted nanosleep ...>
) = 0
fcntl(3, F_SETLK, {l_type=F_UNLCK, l_whence=SEEK_SET, l_start=0, l_len=10}) = 0
write(1, "parent unlock ok\n", 17parent unlock ok
) = 17
exit_group(0) = ?
关于记录锁的FreeBSD实现
#
int fcntl(int fd, int cmd, .../* stutct flock *flockptr */);
flag = fcntl(0, F_GETFL); //get 描述符状态
fcntl(socket_fd, F_SETFL, flags | O_NONBLOCK); //设置描述符为非阻塞
#include
#include
#include
#include
#include
#include
#include
int max_len = 500000;
int main(int argc, char *argv[]) {
char buf[max_len];
int read_count = read(STDIN_FILENO, buf, sizeof(buf));
fprintf(stderr, "read %d bytes\n",read_count);
fcntl(STDOUT_FILENO, O_NONBLOCK);
char *ptr = buf;
int nwrite = 0;
while(read_count > 0) {
errno = 0;
nwrite = write(STDOUT_FILENO,ptr,read_count);
sleep(1);
fprintf(stderr, "nwrite = %dkerrno=%d\n", nwrite,errno);
if(nwrite > 0) {
ptr += nwrite;
read_count -= nwrite;
}
}
fcntl(STDOUT_FILENO, ~O_NONBLOCK);
return 0;
}
//strace执行结果
execve("./noblock", ["./noblock", "2"], [/* 23 vars */]) = 0
brk(NULL) = 0x670000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4ca564a000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=30479, ...}) = 0
mmap(NULL, 30479, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4ca5642000
close(3) = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20\35\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2127336, ...}) = 0
mmap(NULL, 3940800, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4ca5067000
mprotect(0x7f4ca521f000, 2097152, PROT_NONE) = 0
mmap(0x7f4ca541f000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b8000) = 0x7f4ca541f000
mmap(0x7f4ca5425000, 16832, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f4ca5425000
close(3) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4ca5641000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4ca563f000
arch_prctl(ARCH_SET_FS, 0x7f4ca563f740) = 0
mprotect(0x7f4ca541f000, 16384, PROT_READ) = 0
mprotect(0x600000, 4096, PROT_READ) = 0
mprotect(0x7f4ca564b000, 4096, PROT_READ) = 0
munmap(0x7f4ca5642000, 30479) = 0
read(0, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 500000) = 500000
write(2, "read 500000 bytes\n", 18read 500000 bytes
) = 18
fcntl(1, 0x800 /* F_??? */, 0x7f4ca54259f0) = -1 EINVAL (Invalid argument)
write(1, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 500000) = 500000
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
nanosleep({1, 0}, 0x7fff9868c350) = 0
write(2, "nwrite = 500000, errno=0\n", 25nwrite = 500000, errno=0
) = 25
fcntl(1, 0xfffff7ff /* F_??? */, 0x7f4ca54259f0) = -1 EINVAL (Invalid argument)
exit_group(0) = ?
+++ exited with 0 +++
另一个例子
#include
#include
#include
#include
#include
#include
#include
int main(int argc, char *argv[]) {
int fd = open("/etc/profile", O_RDONLY | O_NONBLOCK);
char buf[100];
int read_size = 0;
errno = 0;
struct stat f_stat;
fstat(fd,&f_stat);
int total_size = f_stat.st_size;
while(1) {
read_size = read(fd, buf, sizeof(buf));
if(read_size < 0) {
if(EAGAIN == errno) {
printf("EAGAIN...\n");
continue;
}
printf("error\n");
exit(1);
}
else {
printf("read_size -> %d\n",read_size);
//sleep(1);
//fprintf(stdout,buf);
fwrite(buf,sizeof(char),read_size,stdout);
total_size -= read_size;
}
if(total_size <= 0 ) {
break;
}
}
return 0;
}
//执行后打印出 /etc/profile的内容
//执行多次,并没有出现 EAGAIN 这样的异常
一个非阻塞状态机的例子
#include
#include
#include
#include
#include
#include
#include
#define BUFSIZE 1024
#define TTY "/dev/tty"
#define TTY "/dev/tty"
enum {
STATE_R,
STATE_W,
STATE_Ex,
STATE_T
};
struct fsm_st {
int state; /* 状态机当前的状态 */
int sfd; /* 读取的来源文件描述符 */
int dfd; /* 写入的目标文件描述符 */
char buf[BUFSIZE]; /* 缓冲 */
int len; /* 一次读取到的实际数据量 */
int pos; /* buf 的偏移量,用于记录坚持写够 n 个字节时每次循环写到了哪里 */
char *errstr; /* 错误消息 */
};
/* 状态机驱动 */
static void fsm_driver(struct fsm_st *fsm) {
int ret;
switch(fsm->state)
{
case STATE_R: /* 读态 */
fsm->len = read(fsm->sfd,fsm->buf,BUFSIZE);
if(fsm->len == 0) /* 读到了文件末尾,将状态机推向 T态 */
fsm->state = STATE_T;
else if(fsm->len < 0) /* 读取出现异常 */
{
if(errno == EAGAIN) /* 如果是假错就推到 读态,重新读一次 */
fsm->state = STATE_R;
else // 如果是真错就推到 异常态
{
fsm->errstr = "read()";
fsm->state = STATE_Ex;
}
}
else // 成功读取到了数据,将状态机推到 写态
{
fsm->pos = 0;
fsm->state = STATE_W;
}
break;
case STATE_W: /* 写态 */
ret = write(fsm->dfd,fsm->buf+fsm->pos,fsm->len);
if(ret < 0) /* 写入出现异常 */
{
if(errno == EAGAIN) /* 如果是假错就再次推到 写态,重新再写入一次 */
fsm->state = STATE_W;
else /* 如果是真错就推到 异常态 */
{
fsm->errstr = "write()";
fsm->state = STATE_Ex;
}
}
else /* 成功写入了数据 */
{
fsm->pos += ret;
fsm->len -= ret;
if(fsm->len == 0) /* 如果将读到的数据完全写出去了就将状态机推向 读态,开始下一轮读取 */
fsm->state = STATE_R;
else /* 如果没有将读到的数据完全写出去,那么状态机依然推到 写态,下次继续写入没写完的数据,实现“坚持写够 n 个字节” */
fsm->state = STATE_W;
}
break;
case STATE_Ex: /* 异常态,打印异常并将状态机推到 T态 */
perror(fsm->errstr);
fsm->state = STATE_T;
break;
case STATE_T: /* 结束态,在这个例子中结束态没有什么需要做的事情,所以空着 */
/*do sth */
break;
default: /* 程序很可能发生了溢出等不可预料的情况,为了避免异常扩大直接自杀 */
abort();
}
}
/* 推动状态机 */
static void relay(int fd1,int fd2) {
int fd1_save,fd2_save;
/* 因为是读 tty1 写 tty2;读 tty2 写 tty1,所以这里的两个状态机直接取名为 fsm12 和 fsm21 */
struct fsm_st fsm12,fsm21;
fd1_save = fcntl(fd1,F_GETFL);
/* 使用状态机操作 IO 一般都采用非阻塞的形式,避免状态机被阻塞 */
fcntl(fd1,F_SETFL,fd1_save|O_NONBLOCK);
fd2_save = fcntl(fd2,F_GETFL);
fcntl(fd2,F_SETFL,fd2_save|O_NONBLOCK);
/* 在启动状态机之前将状态机推向 读态 */
fsm12.state = STATE_R;
/* 设置状态机中读写的来源和目标,这样状态机的读写接口就统一了。
在状态机里面不用管到底是 读tty1 写tty2 还是 读tty2 写tty1 了,它只需要知道是 读src 写des 就可以了。*/
fsm12.sfd = fd1;
fsm12.dfd = fd2;
/* 同上 */
fsm21.state = STATE_R;
fsm21.sfd = fd2;
fsm21.dfd = fd1;
/* 开始推状态机,只要不是 T态 就一直推 */
while(fsm12.state != STATE_T || fsm21.state != STATE_T) {
/* 调用状态机驱动函数,状态机开始工作 */
fsm_driver(&fsm12);
fsm_driver(&fsm21);
}
fcntl(fd1,F_SETFL,fd1_save);
fcntl(fd2,F_SETFL,fd2_save);
}
int main() {
int fd_r,fd_w;
/* 假设这里忘记将设备 tty 以非阻塞的形式打开也没关系,因为推动状态机之前会重新设定文件描述符为非阻塞形式 */
fd_r = open(TTY,O_RDWR);
if(fd_r < 0) {
perror("open()");
exit(1);
}
write(fd_r,"TTY\n",5);
fd_w = open(TTY,O_RDWR|O_NONBLOCK);
if(fd_w < 0) {
perror("open()");
exit(1);
}
write(fd_w,"TTY\n",5);
relay(fd_r,fd_w);
close(fd_r);
close(fd_w);
exit(0);
}
//strace结果,设置成非阻塞之后,fd=3,fd=4的两个描述符就不断出现EAGAIN 错误
。。。
open("/dev/tty", O_RDWR) = 3
write(3, "TTY\n\0", 5TTY
) = 5
open("/dev/tty", O_RDWR|O_NONBLOCK) = 4
write(4, "TTY\n\0", 5TTY
) = 5
fcntl(3, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK|O_LARGEFILE) = 0
fcntl(4, F_GETFL) = 0x8802 (flags O_RDWR|O_NONBLOCK|O_LARGEFILE)
fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK|O_LARGEFILE) = 0
read(3, 0x7ffffbdb12bc, 1024) = -1 EAGAIN (Resource temporarily unavailable)
read(4, 0x7ffffbdb0e9c, 1024) = -1 EAGAIN (Resource temporarily unavailable)
read(3, 0x7ffffbdb12bc, 1024) = -1 EAGAIN (Resource temporarily unavailable)
read(4, 0x7ffffbdb0e9c, 1024) = -1 EAGAIN (Resource temporarily unavailable)
。。。
从select返回时,内核告诉我们:
已准备好的描述符的数量。
对于读、写或异常这三个状态中的每一个,哪些描述符已准备好。
#include
int select(int maxfdp1,fd_set *readfds,fd_set *writefds,fd_set *exceptfds,struct timeval *tvptr);
//返回值:准备就绪的描述符数,若超时则返回0,若出错则返回-1
struct timeval{
long tv_sec; //seconds
long tv_usec;//and microseconds
}
#include
int FD_ISSET(int fd,fd_set *fdset);//返回值:若fd在描述符集中则返回非0值,否则返回0
void FD_CLR(int fd,fd_set *fdset);
void FD_SET(int fd,fd_set *fdset);
void FD_ZERO(fd_set *fdset);
#include
#include
#include
#include
#include
#include
#include
#include
#include
int max(int a, int b) {
if(a < b) {
return a;
}
return b;
}
int get_write_fileno() {
//return open("f_1",O_WRONLY|O_NONBLOCK);
int fd_1 = open("f_1",O_RDONLY|O_NONBLOCK);
return fd_1;
}
void io_driver(int fd_1, int fd_2) {
printf("fd_1 -> %d\n", fd_1);
printf("fd_2 -> %d\n", fd_2);
fd_set r_set,w_set;
int fd_1_save = fcntl(fd_1, F_GETFL);
int fd_2_save = fcntl(fd_2, F_GETFL);
fcntl(fd_1, F_SETFL, fd_1_save|O_NONBLOCK);
fcntl(fd_2, F_SETFL, fd_2_save|O_NONBLOCK);
char buf_1[100];
char buf_2[100];
int read_count_1;
int read_count_2;
printf("sizeof --> %d\n", sizeof(buf_1));
int write_fd = get_write_fileno();
printf("fifo fd -> %d\n",write_fd);
//FD_ZERO(&r_set);
//FD_SET(fd_1, &r_set);
//FD_SET(fd_2, &r_set);
while(1) {
FD_ZERO(&r_set);
FD_SET(fd_1, &r_set);
FD_SET(fd_2, &r_set);
if( select( max(fd_1,fd_2)+1, &r_set, NULL, NULL, NULL) < 0) {
printf("error\n");
exit(1);
}
//break;
if( FD_ISSET(fd_1,&r_set) ) {
printf("read fd_1...\n");
read_count_1 = read(fd_1,buf_1,sizeof(buf_1));
write(STDOUT_FILENO ,buf_1, read_count_1);
//write(write_fd ,buf_1, read_count_1);
//exit(0);
}
if( FD_ISSET(fd_2,&r_set) ) {
printf("read fd_2...\n");
read_count_1 = read(fd_1,buf_1,sizeof(buf_1));
//write(write_fd ,buf_2, read_count_2);
}
if( strncmp("end", buf_1, 3)==0 ) {
break;
}
if( strncmp("end", buf_2, 3)==0 ) {
break;
}
}
fcntl(fd_1,F_SETFL,fd_1_save);
fcntl(fd_2,F_SETFL,fd_2_save);
}
int main(int argc, char *argv[]) {
int fd_1 = open("/dev/tty", O_RDONLY|O_NONBLOCK);
int fd_2 = open("/dev/tty", O_RDONLY|O_NONBLOCK);
io_driver(fd_1, fd_2);
close(fd_1);
close(fd_2);
return 0;
}
//打印结果
write(1, "fd_1 -> 3\n", 10fd_1 -> 3
) = 10
write(1, "fd_2 -> 4\n", 10fd_2 -> 4
) = 10
fcntl(3, F_GETFL) = 0x8800 (flags O_RDONLY|O_NONBLOCK|O_LARGEFILE)
fcntl(4, F_GETFL) = 0x8800 (flags O_RDONLY|O_NONBLOCK|O_LARGEFILE)
fcntl(3, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
fcntl(4, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
write(1, "sizeof --> 100\n", 15sizeof --> 100
) = 15
open("f_1", O_RDONLY|O_NONBLOCK) = 5
write(1, "fifo fd -> 5\n", 13fifo fd -> 5
) = 13
select(4, [3], NULL, NULL, NULL111111111111
) = 1 (in [3])
write(1, "read fd_1...\n", 13read fd_1...
) = 13
read(3, "111111111111\n", 100) = 13
write(1, "111111111111\n", 13111111111111
) = 13
select(4, [3], NULL, NULL, NULL1111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222223333333333333333333333333333333333333333333333333334444444444444444444444444444444444444444444444444444455555555555555555555555555555555555555555566666666666666666666666666666666666666667777777777777777777777777777777777777777
) = 1 (in [3])
write(1, "read fd_1...\n", 13read fd_1...
) = 13
read(3, "11111111111111222222222222222222"..., 100) = 100
write(1, "11111111111111222222222222222222"..., 1001111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222222222) = 100
select(4, [3], NULL, NULL, NULL) = 1 (in [3])
write(1, "read fd_1...\n", 13read fd_1...
) = 13
read(3, "22222222222222222222222222222222"..., 100) = 100
write(1, "22222222222222222222222222222222"..., 1002222222222222222222222222222222222222233333333333333333333333333333333333333333333333333344444444444) = 100
select(4, [3], NULL, NULL, NULL) = 1 (in [3])
write(1, "read fd_1...\n", 13read fd_1...
) = 13
read(3, "44444444444444444444444444444444"..., 100) = 100
write(1, "44444444444444444444444444444444"..., 1004444444444444444444444444444444444444444445555555555555555555555555555555555555555556666666666666666) = 100
select(4, [3], NULL, NULL, NULL) = 1 (in [3])
write(1, "read fd_1...\n", 13read fd_1...
) = 13
read(3, "66666666666666666666666677777777"..., 100) = 65
write(1, "66666666666666666666666677777777"..., 656666666666666666666666667777777777777777777777777777777777777777
) = 65
select(4, [3], NULL, NULL, NULL
改用pthread方式去实现
int fd_1_no;
int fd_2_no;
void *io_driver(void *arg) {
//内容不变
}
int main(int argc, char *argv[]) {
fd_1_no = open("/dev/tty", O_RDONLY|O_NONBLOCK);
fd_2_no = open("/dev/tty", O_RDONLY|O_NONBLOCK);
pthread_t p_1;
void *ret_1;
pthread_create(&p_1,NULL,io_driver,(void*)0);
pthread_join(p_1,&ret_1);
//io_driver(fd_1, fd_2);
close(fd_1_no);
close(fd_2_no);
return 0;
}
//strace -ff 结果
[pid 10141] select(4, [3], NULL, NULL, NULL111111111111111
) = 1 (in [3])
[pid 10141] write(1, "read fd_1...\n", 13read fd_1...
) = 13
[pid 10141] read(3, "111111111111111\n", 100) = 16
[pid 10141] write(1, "111111111111111\n", 16111111111111111
) = 16
[pid 10141] select(4, [3], NULL, NULL, NULL222222222222222222222222222222222222222222222222222222222333333333333333333333333333333333333333333333333333333333334444444444444444444444444444444444444444444444444445555555555555555555555555555555555555566666666666666666666666666666666666666666677777777777777777777777777777777778888888888888888888888888888888888888888888888888899999999999999999
) = 1 (in [3])
[pid 10141] write(1, "read fd_1...\n", 13read fd_1...
) = 13
[pid 10141] read(3, "22222222222222222222222222222222"..., 100) = 100
[pid 10141] write(1, "22222222222222222222222222222222"..., 1002222222222222222222222222222222222222222222222222222222223333333333333333333333333333333333333333333) = 100
[pid 10141] select(4, [3], NULL, NULL, NULL) = 1 (in [3])
[pid 10141] write(1, "read fd_1...\n", 13read fd_1...
) = 13
[pid 10141] read(3, "33333333333333334444444444444444"..., 100) = 100
[pid 10141] write(1, "33333333333333334444444444444444"..., 1003333333333333333444444444444444444444444444444444444444444444444444555555555555555555555555555555555) = 100
[pid 10141] select(4, [3], NULL, NULL, NULL) = 1 (in [3])
[pid 10141] write(1, "read fd_1...\n", 13read fd_1...
) = 13
[pid 10141] read(3, "55555666666666666666666666666666"..., 100) = 100
[pid 10141] write(1, "55555666666666666666666666666666"..., 1005555566666666666666666666666666666666666666666677777777777777777777777777777777778888888888888888888) = 100
[pid 10141] select(4, [3], NULL, NULL, NULL) = 1 (in [3])
[pid 10141] write(1, "read fd_1...\n", 13read fd_1...
) = 13
[pid 10141] read(3, "88888888888888888888888888888889"..., 100) = 49
[pid 10141] write(1, "88888888888888888888888888888889"..., 49888888888888888888888888888888899999999999999999
) = 49
[pid 10141] select(4, [3], NULL, NULL, NULL
//poll - wait for some event on a file descriptor
#include
//fds:实际上是一个数组的首地址,因为 poll可以帮助我们监视多个文件描述符,而一个文件描述放到一个 //struct pollfd 结构体中,多个文件描述符就需要一个数组来存储了。
//nfds:fds 这个数组的长度。在参数列表中使用数组首地址 + 长度的做法还是比较常见的。
//timeout:阻塞等待的超时时间。传入 -1 则始终阻塞,不超时。
int poll(struct pollfd *fds, nfds_t nfds, int timeout);
struct pollfd {
int fd; /* 需要监视的文件描述符 */
short events; /* 要监视的事件 */
short revents; /* 该文件描述符发生了的事件 */
};
//结构体中的事件可以指定下面七种事件,同时监视多个事件可以使用按位或(|)添加:
POLLIN 文件描述符可读
POLLPRI 可以非阻塞的读高优先级的数据
POLLOUT 文件描述符可写
POLLRDHUP 流式套接字连接点关闭,或者关闭写半连接。
POLLERR 已出错
POLLHUP 已挂断(一般指设备)
POLLNVAL 参数非法
一个例子
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
int max(int a, int b) {
if(a < b) {
return b;
}
return a;
}
int driver(int fd_1, int fd_2) {
struct pollfd pfd[2];
int fd_1_save = fcntl(fd_1,F_GETFL);
int fd_2_save = fcntl(fd_2,F_GETFL);
fcntl(fd_1, F_SETFL, fd_1_save|O_NONBLOCK);
fcntl(fd_2, F_SETFL, fd_2_save|O_NONBLOCK);
pfd[0].fd = fd_1;
pfd[0].events |= POLLIN;
pfd[1].fd = fd_2;
pfd[1].events |= POLLIN;
int read_count_1;
int read_count_2;
char buf_1[100];
char buf_2[100];
while(1) {
if( poll(pfd,2,-1)<0 ) {
if(errno == EINTR) {
continue;
}
printf("error\n");
exit(1);
}
if(pfd[0].revents & POLLIN) {
printf("read fd_1\n");
read_count_1 = read(fd_1,buf_1,sizeof(buf_1));
write(STDOUT_FILENO,buf_1,read_count_1);
}
if(pfd[1].revents & POLLIN) {
printf("read fd_2\n");
read_count_2 = read(fd_2,buf_2,sizeof(buf_2));
write(STDOUT_FILENO,buf_2,read_count_2);
}
if( strncmp("end",buf_1,3)==0 ) {
break;
}
if( strncmp("end",buf_2,3)==0 ) {
break;
}
}//end while
fcntl(fd_1,F_SETFL,fd_1_save);
fcntl(fd_2,F_SETFL,fd_2_save);
}
int main(int argc, char *argv[]) {
int fd_1 = open("/dev/tty",O_RDONLY);
int fd_2 = open("/dev/tty",O_RDONLY|O_NONBLOCK);
int fds[2];
//pipe(fds);
//int fd_w = fds[1];
driver(fd_1,fd_2);
close(fd_1);
close(fd_2);
return 0;
}
//用strace分析程序
open("/dev/tty", O_RDONLY) = 3
open("/dev/tty", O_RDONLY|O_NONBLOCK) = 4
fcntl(3, F_GETFL) = 0x8000 (flags O_RDONLY|O_LARGEFILE)
fcntl(4, F_GETFL) = 0x8800 (flags O_RDONLY|O_NONBLOCK|O_LARGEFILE)
fcntl(3, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
fcntl(4, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
poll([{fd=3, events=POLLIN}, {fd=4, events=POLLIN}], 2, -1
aaaaaaaaaaaaaaaaaaaaaaaaa
) = 2 ([{fd=3, revents=POLLIN}, {fd=4, revents=POLLIN}])
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 7), ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f515010f000
write(1, "read fd_1\n", 10read fd_1
) = 10
read(3, "aaaaaaaaaaaaaaaaaaaaaaaaa\n", 100) = 26
write(1, "aaaaaaaaaaaaaaaaaaaaaaaaa\n", 26aaaaaaaaaaaaaaaaaaaaaaaaa
) = 26
write(1, "read fd_2\n", 10read fd_2
) = 10
read(4, 0x7ffdada914f0, 100) = -1 EAGAIN (Resource temporarily unavailable)
write(1, "", 18446744073709551615) = -1 EFAULT (Bad address)
poll([{fd=3, events=POLLIN}, {fd=4, events=POLLIN}], 2, -1
end
) = 2 ([{fd=3, revents=POLLIN}, {fd=4, revents=POLLIN}])
write(1, "read fd_1\n", 10read fd_1
) = 10
read(3, "end\n", 100) = 4
write(1, "end\n", 4end
) = 4
write(1, "read fd_2\n", 10read fd_2
) = 10
read(4, 0x7ffdada914f0, 100) = -1 EAGAIN (Resource temporarily unavailable)
write(1, "", 18446744073709551615) = -1 EFAULT (Bad address)
fcntl(3, F_SETFL, O_RDONLY|O_LARGEFILE) = 0
fcntl(4, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
close(3) = 0
close(4) = 0
exit_group(0) = ?
#include
//创建若干个epoll监控元素,并返回epoll的fd
int epoll_create(int size);
//操作需要监听的事件
int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event);
struct epoll_event {
uint32_t events; /* epoll 监视的事件,与 poll(2) 能监视的事件差不多 */
epoll_data_t data; /* 用户数据,除了能保存文件描述符以外,还能保存其它有关数据 */
}
//epoll_ctl第二个参数op的操作定义
EPOLL_CTL_ADD 增加要监视的文件描述符
EPOLL_CTL_MOD 更改目标文件描述符的事件
EPOLL_CTL_DEL 删除要监视的文件描述符,event 参数会被忽略,可以传入 NULL
//阻塞监视并返回监视结果
//epfd,要操作的 epoll 实例
//events + maxevents:共同指定了一个结构体数组,数组的起始位置和长度
//timeout:超时等待的时间,设置为 -1 则始终阻塞监视,不超时
int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout);
一个例子
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
int driver(int fd_1, int fd_2) {
int fd_1_save = fcntl(fd_1, F_GETFL);
int fd_2_save = fcntl(fd_2, F_GETFL);
fcntl(fd_1, F_SETFL, fd_1_save|O_NONBLOCK);
fcntl(fd_2, F_SETFL, fd_2_save|O_NONBLOCK);
int epoll_fd = epoll_create(10);
struct epoll_event event;
event.events = EPOLLIN;
event.data.fd = fd_1;
epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd_1, &event);
event.events = EPOLLIN;
event.data.fd = fd_2;
epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd_2, &event);
int read_count_1, read_count_2;
char buf_1[100], buf_2[100];
while(1) {
if( epoll_wait(epoll_fd, &event, 1, -1)<0 ) {
printf("epoll_wait error\n");
exit(1);
}
if(event.data.fd==fd_1 && event.events&EPOLLIN) {
printf("fd_1 read...\n");
read_count_1 = read(fd_1,buf_1,sizeof(buf_1));
write(STDOUT_FILENO,buf_1,read_count_1);
printf("str->%s\n",buf_1);
}
if(event.data.fd== fd_2 && event.events&EPOLLIN) {
printf("fd_2 reaad.....\n");
read_count_2 = read(fd_2,buf_2,sizeof(buf_2));
write(STDOUT_FILENO, buf_2, read_count_2);
printf("str->%s\n",buf_2);
}
} /* end while */
fcntl(fd_1, F_SETFL, fd_1_save);
fcntl(fd_2, F_SETFL, fd_2_save);
}
int main(int argc, char *argv[]) {
int fd_1 = open("/dev/tty",O_RDONLY);
int fd_2 = open("/dev/tty",O_RDONLY);
driver(fd_1,fd_2);
close(fd_1);
close(fd_2);
return 0;
}
//用strace分析程序
open("/dev/tty", O_RDONLY) = 3
open("/dev/tty", O_RDONLY) = 4
fcntl(3, F_GETFL) = 0x8000 (flags O_RDONLY|O_LARGEFILE)
fcntl(4, F_GETFL) = 0x8000 (flags O_RDONLY|O_LARGEFILE)
fcntl(3, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
fcntl(4, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
epoll_create(10) = 5
epoll_ctl(5, EPOLL_CTL_ADD, 3, {EPOLLIN, {u32=3, u64=3}}) = 0
epoll_ctl(5, EPOLL_CTL_ADD, 4, {EPOLLIN, {u32=4, u64=4}}) = 0
epoll_wait(5,
aaaaaa
[{EPOLLIN, {u32=4, u64=4}}], 1, -1) = 1
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f11f3bd6000
write(1, "fd_2 reaad.....\n", 16fd_2 reaad.....) = 16
read(4, "aaaaaa\n", 100) = 7
write(1, "aaaaaa\n", 7aaaaaa) = 7
write(1, "str->aaaaaa\n", 12str->aaaaaa) = 12
write(1, "\n", 1) = 1
epoll_wait(5
java的nio例子
import java.io.IOException;
import java.net.InetSocketAddress;
import java.nio.ByteBuffer;
import java.nio.channels.SelectionKey;
import java.nio.channels.Selector;
import java.nio.channels.SocketChannel;
import java.util.Iterator;
import java.util.Set;
/**
* * Created by yang.wang09 on 2018-12-04 14:00.
* */
public class X {
public static void main(String[] args) throws IOException {
go();
}
public static void go() throws IOException {
String host = "www.baidu.com";
host = "220.181.111.37";
int port = 80;
InetSocketAddress ias = new InetSocketAddress(host, port);
SocketChannel channel = SocketChannel.open(ias);
channel.configureBlocking(false);
Selector selector = Selector.open();
SelectionKey key = channel.register(selector, SelectionKey.OP_READ);
ByteBuffer buf = ByteBuffer.allocate(100);
String message = "GET / HTTP/1.1\r\nHost:220.181.111.37\r\n\r\n\r\n";
buf.put(message.getBytes());
channel.write(buf);
boolean isLoop = true;
while(isLoop) {
int readyChannels = selector.select();
selector.selectedKeys();
if(readyChannels == 0) continue;
Set selectedKeys = selector.selectedKeys();
Iterator keyIterator = selectedKeys.iterator();
while(keyIterator.hasNext()) {
keyIterator.next();
if(key.isAcceptable()) {
/* a connection was accepted by a ServerSocketChannel. */
} else if (key.isConnectable()) {
/* a connection was established with a remote server. */
} else if (key.isReadable()) {
/* a channel is ready for reading */
System.out.println("read ok");
isLoop = false;
break;
} else if (key.isWritable()) {
/* a channel is ready for writing */
channel.write(buf);
System.out.println("write ok");
isLoop = false;
break;
}
keyIterator.remove();
}
}
}
}
strace -o xx.log -ff java X
会生成很多log文件,将这些log文件都放到 log目录下,然后grep "select" *,grep "poll" *,发现只有epoll,其他的都没有
java version "1.8.0_131"
Java(TM) SE Runtime Environment (build 1.8.0_131-b11)
Java HotSpot(TM) 64-Bit Server VM (build 25.131-b11, mixed mode)
可见java 8 的多路复用底层是 epoll实现的
xx.log.16795:epoll_create(256) = 7
xx.log.16795:epoll_ctl(7, EPOLL_CTL_ADD, 5, {EPOLLIN, {u32=5, u64=15903069758740758533}}) = 0
xx.log.16795:epoll_ctl(7, EPOLL_CTL_ADD, 4, {EPOLLIN, {u32=4, u64=16044752212915650564}}) = 0
xx.log.16795:epoll_wait(7, [{EPOLLIN, {u32=4, u64=16044752212915650564}}], 8192, -1) = 1
#include
ssize_t readv(int filedes, const struct iovec *iov, int iovcnt);
ssize_t writev(int filedes, const struct iovec *iov, int iovcnt);
/*若成功则返回已读,写的字节数,若出错则返回-1。 */
//这两个函数的第二个参数是指向iovec结构数组的一个指针:
struct iovec{
void *iov_base; //starting address of buffer
size_t iov_len; //size of buffer
}
//iov数组中的元素数由iovcnt说明。下图说明了readv和writev的参数和iovec结构。
writev以顺序iov[0],iov[1]至iov[iovcnt-1]从缓冲区中聚集输出数据。writev返回输出的字节总数。
readv则将读入的数据按照上述同样顺序散布到缓冲区中,readv总是先填满一个缓冲区,然后再填写下一个。readv返回读到
的总字节数。如果遇到文件结尾,已无数据可读,则返回0。
列子
#include
#include
#include
int main(){
char buf1[5],buf2[10];
struct iovec iov[2];
iov[0].iov_base = buf1;
iov[0].iov_len = 5;
iov[1].iov_base = buf2;
iov[1].iov_len = 10;
int fd = open("a.txt",O_RDWR);
if(fd < 0){
perror("open");
return -1;
}
int rsize = readv(fd, iov, 2);
printf("rsize = %d\n",rsize);
close(fd);
fd = open("b.txt", O_RDWR|O_CREAT, S_IRUSR|S_IWUSR);
if(fd < 0){
perror("open");
return -1;
}
int wsize = writev(fd,iov,2);
printf("wsize = %d\n",wsize);
close(fd);
return 0;
}
mmap将一个文件或者其它对象映射进内存。文件被映射到多个页上,如果文件的大小不是所有页的大小之和,最后一个页不被使用的空间将会清零。mmap在用户空间映射调用系统中作用很大。
具体函数
#include
//addr 起始地址
//len 需要映射的长度
//port PROT_READ,映射区可读;
// PROT_WRITE,映射区可写;PROT_EXEC,映射区可执行;PROT_NONE,映射区不可访问
//flag
// fd,off 需要映射的fd和起始位置
void* mmap(void* addr, size_t len, int port, int flag, int fd, off_t off)
//addr 起始地址
//len 长度
//port 跟mmap的一样
int mprotect(void* addr, size_t len, int port)
//addr和len 同mmap函数
//flag
//MS_ASYNC,这实际上不要求内核做什么,让内核自主去执行同步
//MS_SYNC,要求内核在返回之前把写操作完成
//MS_INVALIDATE,是一个可选的标志,它告诉内核丢弃没有同步的部分
int msync(void* addr, size_t len, int flags)
int munmap(void* addr, size_t len)
mmap的flag
MAP_FIXED //使用指定的映射起始地址,如果由start和len参数指定的内存区重叠于现存的映射空间,重叠部分将会被丢弃。如果指定的起始地址不可用,操作将会失败。并且起始地址必须落在页的边界上。
MAP_SHARED //与其它所有映射这个对象的进程共享映射空间。对共享区的写入,相当于输出到文件。直到msync()或者munmap()被调用,文件实际上不会被更新。
MAP_PRIVATE //建立一个写入时拷贝的私有映射。内存区域的写入不会影响到原文件。这个标志和以上标志是互斥的,只能使用其中一个。
MAP_DENYWRITE //这个标志被忽略。
MAP_EXECUTABLE //同上
MAP_NORESERVE //不要为这个映射保留交换空间。当交换空间被保留,对映射区修改的可能会得到保证。当交换空间不被保留,同时内存不足,对映射区的修改会引起段违例信号。
MAP_LOCKED //锁定映射区的页面,从而防止页面被交换出内存。
MAP_GROWSDOWN //用于堆栈,告诉内核VM系统,映射区可以向下扩展。
MAP_ANONYMOUS //匿名映射,映射区不与任何文件关联。
MAP_ANON //MAP_ANONYMOUS的别称,不再被使用。
MAP_FILE //兼容标志,被忽略。
MAP_32BIT //将映射区放在进程地址空间的低2GB,MAP_FIXED指定时会被忽略。当前这个标志只在x86-64平台上得到支持。
MAP_POPULATE //为文件映射通过预读的方式准备好页表。随后对映射区的访问不会被页违例阻塞。
MAP_NONBLOCK //仅和MAP_POPULATE一起使用时才有意义。不执行预读,只为已存在于内存中的页面建立页表入口。
例子
#include
#include
#include
#include
#include
#define COPYING (1024*1024*1024) /* 1 GB */
int main(int argc, char *argv[]) {
int fd_in,fd_out;
void *src, *dest;
size_t copy_size;
struct stat stat_buf;
off_t off_size = 0;
if(argc != 3) {
printf("usage %s \n",argv[0]);
}
fd_in = open(argv[1], O_RDONLY);
fd_out = open(argv[2], O_RDWR|O_CREAT|O_TRUNC,0777);
fstat(fd_in,&stat_buf);
//如果不加这句ftruncate就会报错
// --- SIGBUS {si_signo=SIGBUS, si_code=BUS_ADRERR, si_addr=0x7f593cdba1d0}
ftruncate(fd_out,stat_buf.st_size);
while(off_size < stat_buf.st_size) {
if((stat_buf.st_size - off_size) > COPYING) {
copy_size = COPYING;
}
else {
copy_size = stat_buf.st_size - off_size;
}
src = mmap(0,copy_size,PROT_READ, MAP_SHARED,fd_in,off_size);
dest = mmap(0,copy_size,PROT_READ|PROT_WRITE, MAP_SHARED, fd_out, off_size);
memcpy(dest, src, copy_size);
munmap(src,copy_size);
munmap(dest,copy_size);
off_size += copy_size;
}
return 0;
}
文件映射IO函数
mmap函数和mprotect
高级IO--存储映射
linux驱动mmap内存映射
一起学 Unix 环境高级编程 (APUE) 之 高级 IO
select、poll、epoll之间的区别总结[整理]
使用SocketChannel的NIO客户机服务器通信示例
Linux 系统 文件锁 fcntl函数详解
文件锁 flock及fcntl flock