UNIX环境高级编程-高级IO

目录

文件锁

非阻塞io

select

poll

epoll

readv和writev

mmap

参考


 

 

文件锁

#
//第二个参数cmd是F_GETLK, F_SETLK,F_SETLKW三种类型,表示获取,设置,测试记录锁
int fcntl(int fd, int cmd, .../* stutct flock *flockptr */);

struct flock {
    short l_type;    /* F_RDLCK, F_WRLCK, F_UNLCK */
    short l_whence;  /* SEEK_SET, SEEK_CUR, SEEK_END */
    off_t l_start;   /* offset in bytes,relative ot l_whence */
    off_t l_len;     /* length, in bytes, 0 means lock to EOF */
    pid_t l_pid;     /* returned with F_GETLK */
}

//l_type的类型
F_RDLCK:读锁
F_WRLCK:写锁
F_UNLCK:解锁

//l_whence的类型
SEEK_SET:当前位置为文件的开头,新位置为偏移量的大小
SEEK_CUR:当前位置为文件指针的位置,新位置为当前位置加上偏移量
SEEK_END:当前位置为文件的结尾,新位置为文件的大小加上偏移量的大小

一个例子

#include
#include 
#include 
#include 
#include 
#include 
#include  
#include 
#include 
//#include "rwlock.h"

static int lock_reg(int fd,int cmd,int type,off_t offset,int whence,off_t len) {
    struct flock lock;
    lock.l_type = type;
    lock.l_start = offset;
    lock.l_whence = whence;
    lock.l_len = len;

    return (fcntl(fd,cmd,&lock));
}

static pid_t lock_test(int fd,int type,off_t offset,int whence,off_t len) {
    struct flock lock;

    lock.l_type = type;
    lock.l_start = offset;
    lock.l_whence = whence;
    lock.l_len = len;

    if(fcntl(fd,F_GETLK,&lock) == -1) {
        return -1;
    }
    if(lock.l_type = F_UNLCK) {
        return 0;
    }
    return lock.l_pid;
}


int read_lock(int fd,off_t offset,int whence,off_t len) {
    return lock_reg(fd,F_SETLKW,F_RDLCK,offset,whence,len);
}

int read_lock_try(int fd,off_t offset,int whence,off_t len) {
    return lock_reg(fd,F_SETLK,F_RDLCK,offset,whence,len);
}

int write_lock(int fd,off_t offset,int whence,off_t len) {
    return lock_reg(fd,F_SETLKW,F_WRLCK,offset,whence,len);
}

int write_lock_try(int fd,off_t offset,int whence,off_t len) {
    return lock_reg(fd,F_SETLK,F_WRLCK,offset,whence,len);
} 

int unlock(int fd,off_t offset, int whence,off_t len) {
    return lock_reg(fd,F_SETLK,F_UNLCK,offset,whence,len);
}

int is_read_lockable(int fd, off_t offset,int whence,off_t len) {
    return !lock_test(fd,F_RDLCK,offset,whence,len);
}

int is_write_lockable(int fd, off_t offset,int whence,off_t len) {
    return !lock_test(fd,F_WRLCK,offset,whence,len); 
}

int main(int argc, char *argv[]) {

    int fd = open("aa.log",O_RDWR|O_APPEND);
    write_lock(fd, 0, SEEK_SET, 10);

    pid_t pid = fork();
    if(pid > 0) {
        printf("sleep -> parent 10 second\n");
        sleep(10);
    } 
    else if(pid == 0) {
        write_lock(fd, 20, SEEK_SET, 10);
        printf("chiild get write_lock ok\n");
        unlock(fd, 20, SEEK_SET, 10);
        printf("unlock child lock\n");
        exit(0);
    } 
    else {
        printf("fork error ->%d\n",errno);
        exit(errno);
    }
    unlock(fd, 0, SEEK_SET, 10);
    printf("parent unlock ok\n");
    return 0;
}

//执行结果
sleep -> parent 10 second
chiild get write_lock ok
unlock child lock
parent unlock ok

//如果将子进程中的改为
else if(pid == 0) {
    write_lock(fd,8 , SEEK_SET, 10);
    。。。
//执行结果为
sleep -> parent 10 second
parent unlock ok
chiild get write_lock ok
unlock child lock



//用strace分析程序
open("aa.log", O_RDWR|O_APPEND)         = 3
fcntl(3, F_SETLKW, {l_type=F_WRLCK, l_whence=SEEK_SET, l_start=0, l_len=10}) = 0

clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f818487aa10) = 32736
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8184884000
write(1, "sleep -> parent 10 second\n", 26sleep -> parent 10 second
) = 26
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
nanosleep({10, 0}, strace: Process 32736 attached
 

[pid 32736] fcntl(3, F_SETLKW, {l_type=F_WRLCK, l_whence=SEEK_SET, l_start=20, l_len=10}) = 0
[pid 32736] fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0
[pid 32736] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8184884000
[pid 32736] write(1, "chiild get write_lock ok\n", 25chiild get write_lock ok
) = 25
[pid 32736] fcntl(3, F_SETLK, {l_type=F_UNLCK, l_whence=SEEK_SET, l_start=20, l_len=10}) = 0
[pid 32736] write(1, "unlock child lock\n", 18unlock child lock
) = 18
[pid 32736] exit_group(0)               = ?
[pid 32736] +++ exited with 0 +++
<... nanosleep resumed> {9, 999332497}) = ? ERESTART_RESTARTBLOCK (Interrupted by signal)

--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=32736, si_uid=0, si_status=0, si_utime=0, si_stime=0} ---
restart_syscall(<... resuming interrupted nanosleep ...>

) = 0
fcntl(3, F_SETLK, {l_type=F_UNLCK, l_whence=SEEK_SET, l_start=0, l_len=10}) = 0
write(1, "parent unlock ok\n", 17parent unlock ok
)      = 17
exit_group(0)                           = ?

 

关于记录锁的FreeBSD实现

UNIX环境高级编程-高级IO_第1张图片

 

 

 

非阻塞io

#
int fcntl(int fd, int cmd, .../* stutct flock *flockptr */);
flag = fcntl(0, F_GETFL);    //get 描述符状态
fcntl(socket_fd, F_SETFL, flags | O_NONBLOCK);    //设置描述符为非阻塞
#include 
#include 
#include 
#include 
#include 
#include 
#include 

int max_len = 500000;
int main(int argc, char *argv[]) {

    char buf[max_len];
    int read_count = read(STDIN_FILENO, buf, sizeof(buf));

    fprintf(stderr, "read %d bytes\n",read_count);

    fcntl(STDOUT_FILENO, O_NONBLOCK);
    char *ptr = buf;
    int nwrite = 0;
    while(read_count > 0) {
        errno = 0;
        nwrite = write(STDOUT_FILENO,ptr,read_count);
        sleep(1);
        fprintf(stderr, "nwrite = %dkerrno=%d\n", nwrite,errno);
        if(nwrite > 0) {
            ptr += nwrite;
            read_count -= nwrite;
        }   
    }   

    fcntl(STDOUT_FILENO, ~O_NONBLOCK);
    return 0;
}

//strace执行结果
execve("./noblock", ["./noblock", "2"], [/* 23 vars */]) = 0
brk(NULL)                               = 0x670000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4ca564a000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=30479, ...}) = 0
mmap(NULL, 30479, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4ca5642000
close(3)                                = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20\35\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2127336, ...}) = 0
mmap(NULL, 3940800, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4ca5067000
mprotect(0x7f4ca521f000, 2097152, PROT_NONE) = 0
mmap(0x7f4ca541f000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b8000) = 0x7f4ca541f000
mmap(0x7f4ca5425000, 16832, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f4ca5425000
close(3)                                = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4ca5641000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4ca563f000
arch_prctl(ARCH_SET_FS, 0x7f4ca563f740) = 0
mprotect(0x7f4ca541f000, 16384, PROT_READ) = 0
mprotect(0x600000, 4096, PROT_READ)     = 0
mprotect(0x7f4ca564b000, 4096, PROT_READ) = 0
munmap(0x7f4ca5642000, 30479)           = 0
read(0, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 500000) = 500000
write(2, "read 500000 bytes\n", 18read 500000 bytes
)     = 18
fcntl(1, 0x800 /* F_??? */, 0x7f4ca54259f0) = -1 EINVAL (Invalid argument)
write(1, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 500000) = 500000
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
nanosleep({1, 0}, 0x7fff9868c350)       = 0
write(2, "nwrite = 500000, errno=0\n", 25nwrite = 500000, errno=0
) = 25
fcntl(1, 0xfffff7ff /* F_??? */, 0x7f4ca54259f0) = -1 EINVAL (Invalid argument)
exit_group(0)                           = ?
+++ exited with 0 +++

另一个例子

#include 
#include 
#include 
#include 
#include 
#include
#include 

int main(int argc, char *argv[]) {
    int fd = open("/etc/profile", O_RDONLY | O_NONBLOCK);
    char buf[100];
    int read_size = 0;
    errno = 0;
    struct stat f_stat;
    fstat(fd,&f_stat);
    int total_size = f_stat.st_size;

    while(1) {
        read_size = read(fd, buf, sizeof(buf));
        if(read_size < 0) {
            if(EAGAIN == errno) {
                printf("EAGAIN...\n");
                continue;
            }
            printf("error\n");
            exit(1);   
        }
        else {
            printf("read_size -> %d\n",read_size);
            //sleep(1);
            //fprintf(stdout,buf);
            fwrite(buf,sizeof(char),read_size,stdout);
            total_size -= read_size;
        }
        if(total_size <= 0 ) {
            break;
        }
    }  

    return 0;
}

//执行后打印出 /etc/profile的内容
//执行多次,并没有出现  EAGAIN 这样的异常

一个非阻塞状态机的例子

#include 
#include 
#include 
#include 
#include 
#include 
#include   
#define BUFSIZE   1024
#define TTY        "/dev/tty"
#define TTY        "/dev/tty"
 
 enum {
     STATE_R,
     STATE_W,
     STATE_Ex,
     STATE_T
 };
 
 
struct fsm_st {
     int state; /* 状态机当前的状态 */
     int sfd; /* 读取的来源文件描述符 */
     int dfd; /* 写入的目标文件描述符 */
     char buf[BUFSIZE]; /* 缓冲 */
     int len; /* 一次读取到的实际数据量 */
     int pos; /* buf 的偏移量,用于记录坚持写够 n 个字节时每次循环写到了哪里 */
     char *errstr; /* 错误消息 */
 };
 
 /* 状态机驱动 */
 static void fsm_driver(struct fsm_st *fsm) {
     int ret;
 
     switch(fsm->state)
     {
         case STATE_R: /* 读态 */
             fsm->len = read(fsm->sfd,fsm->buf,BUFSIZE);
             if(fsm->len == 0) /* 读到了文件末尾,将状态机推向 T态 */
                 fsm->state = STATE_T;
             else if(fsm->len < 0) /* 读取出现异常 */
             {
                 if(errno == EAGAIN) /* 如果是假错就推到 读态,重新读一次 */
                     fsm->state = STATE_R;
                 else // 如果是真错就推到 异常态
                 {
                     fsm->errstr = "read()";
                     fsm->state = STATE_Ex;
                 }
             }
             else // 成功读取到了数据,将状态机推到 写态
             {
                 fsm->pos = 0;
                 fsm->state = STATE_W;
             }
             break;
 
         case STATE_W: /* 写态 */
             ret = write(fsm->dfd,fsm->buf+fsm->pos,fsm->len);
             if(ret < 0) /* 写入出现异常 */
             {
                 if(errno == EAGAIN) /* 如果是假错就再次推到 写态,重新再写入一次 */
                     fsm->state = STATE_W;
                 else /* 如果是真错就推到 异常态  */
                 {
                     fsm->errstr = "write()";
                     fsm->state = STATE_Ex;
                 }
             }
             else /* 成功写入了数据  */
             {
                 fsm->pos += ret;
                 fsm->len -= ret;
                 if(fsm->len == 0) /* 如果将读到的数据完全写出去了就将状态机推向 读态,开始下一轮读取  */
                     fsm->state = STATE_R;
                 else /* 如果没有将读到的数据完全写出去,那么状态机依然推到 写态,下次继续写入没写完的数据,实现“坚持写够 n 个字节” */
                     fsm->state = STATE_W;
             }
 
             break;
     
         case STATE_Ex: /* 异常态,打印异常并将状态机推到 T态 */
             perror(fsm->errstr);
             fsm->state = STATE_T;
             break;
 
         case STATE_T: /* 结束态,在这个例子中结束态没有什么需要做的事情,所以空着 */
             /*do sth */
             break;
         default: /* 程序很可能发生了溢出等不可预料的情况,为了避免异常扩大直接自杀 */
             abort();
     }
 
 }
 
/* 推动状态机 */
 static void relay(int fd1,int fd2) {
     int fd1_save,fd2_save;    
     /* 因为是读 tty1 写 tty2;读 tty2 写 tty1,所以这里的两个状态机直接取名为 fsm12 和 fsm21 */
     struct fsm_st fsm12,fsm21;
 
     fd1_save = fcntl(fd1,F_GETFL);
     /* 使用状态机操作 IO 一般都采用非阻塞的形式,避免状态机被阻塞 */
     fcntl(fd1,F_SETFL,fd1_save|O_NONBLOCK);
     fd2_save = fcntl(fd2,F_GETFL);
     fcntl(fd2,F_SETFL,fd2_save|O_NONBLOCK);
 
     /* 在启动状态机之前将状态机推向 读态 */
     fsm12.state = STATE_R;
     /* 设置状态机中读写的来源和目标,这样状态机的读写接口就统一了。
	 在状态机里面不用管到底是 读tty1 写tty2 还是 读tty2 写tty1 了,它只需要知道是 读src 写des 就可以了。*/
     fsm12.sfd = fd1;
     fsm12.dfd = fd2;
 
     /* 同上 */
    fsm21.state = STATE_R;
     fsm21.sfd = fd2;
     fsm21.dfd = fd1;
 
 
     /* 开始推状态机,只要不是 T态 就一直推 */
    while(fsm12.state != STATE_T || fsm21.state != STATE_T) {
         /* 调用状态机驱动函数,状态机开始工作 */
         fsm_driver(&fsm12);
         fsm_driver(&fsm21);
     }
 
     fcntl(fd1,F_SETFL,fd1_save);
     fcntl(fd2,F_SETFL,fd2_save);
     
}

int main() {
    int fd_r,fd_w;

    /* 假设这里忘记将设备 tty 以非阻塞的形式打开也没关系,因为推动状态机之前会重新设定文件描述符为非阻塞形式 */
    fd_r = open(TTY,O_RDWR);
    if(fd_r < 0) {
        perror("open()");
        exit(1);
    }
    write(fd_r,"TTY\n",5);
    
    fd_w = open(TTY,O_RDWR|O_NONBLOCK);
    if(fd_w < 0) {
        perror("open()");
        exit(1);
    }
    write(fd_w,"TTY\n",5);
    relay(fd_r,fd_w);
    close(fd_r);
    close(fd_w);
    exit(0);
}

//strace结果,设置成非阻塞之后,fd=3,fd=4的两个描述符就不断出现EAGAIN 错误
。。。
open("/dev/tty", O_RDWR)                = 3
write(3, "TTY\n\0", 5TTY
)                  = 5
open("/dev/tty", O_RDWR|O_NONBLOCK)     = 4
write(4, "TTY\n\0", 5TTY
)                  = 5
fcntl(3, F_GETFL)                       = 0x8002 (flags O_RDWR|O_LARGEFILE)
fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK|O_LARGEFILE) = 0
fcntl(4, F_GETFL)                       = 0x8802 (flags O_RDWR|O_NONBLOCK|O_LARGEFILE)
fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK|O_LARGEFILE) = 0
read(3, 0x7ffffbdb12bc, 1024)           = -1 EAGAIN (Resource temporarily unavailable)
read(4, 0x7ffffbdb0e9c, 1024)           = -1 EAGAIN (Resource temporarily unavailable)
read(3, 0x7ffffbdb12bc, 1024)           = -1 EAGAIN (Resource temporarily unavailable)
read(4, 0x7ffffbdb0e9c, 1024)           = -1 EAGAIN (Resource temporarily unavailable)
。。。

 

select

从select返回时,内核告诉我们:
已准备好的描述符的数量。
对于读、写或异常这三个状态中的每一个,哪些描述符已准备好。

#include 
int select(int maxfdp1,fd_set *readfds,fd_set *writefds,fd_set *exceptfds,struct timeval *tvptr);
//返回值:准备就绪的描述符数,若超时则返回0,若出错则返回-1

struct timeval{
  long tv_sec; //seconds
  long tv_usec;//and microseconds
}

#include 
int FD_ISSET(int fd,fd_set *fdset);//返回值:若fd在描述符集中则返回非0值,否则返回0
void FD_CLR(int fd,fd_set *fdset);
void FD_SET(int fd,fd_set *fdset);
void FD_ZERO(fd_set *fdset);
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

int max(int a, int b) {
    if(a < b) {
        return a;
    }
    return b;
}

int get_write_fileno() {
    //return open("f_1",O_WRONLY|O_NONBLOCK);
    int fd_1 = open("f_1",O_RDONLY|O_NONBLOCK);
    return fd_1;
}


void io_driver(int fd_1, int fd_2) {

    printf("fd_1 -> %d\n", fd_1);
    printf("fd_2 -> %d\n", fd_2);

    fd_set r_set,w_set;
    int fd_1_save = fcntl(fd_1, F_GETFL);
    int fd_2_save = fcntl(fd_2, F_GETFL);

    fcntl(fd_1, F_SETFL, fd_1_save|O_NONBLOCK);
    fcntl(fd_2, F_SETFL, fd_2_save|O_NONBLOCK);


    
    char buf_1[100];
    char buf_2[100];
    int read_count_1;
    int read_count_2;

    printf("sizeof --> %d\n", sizeof(buf_1));
    int write_fd = get_write_fileno();
    printf("fifo fd -> %d\n",write_fd);

    //FD_ZERO(&r_set);
    //FD_SET(fd_1, &r_set);
    //FD_SET(fd_2, &r_set);

    while(1) {
         FD_ZERO(&r_set);
         FD_SET(fd_1, &r_set);
         FD_SET(fd_2, &r_set);

        if( select( max(fd_1,fd_2)+1, &r_set, NULL, NULL, NULL) < 0) {
            printf("error\n");
            exit(1);
        }
        //break;

        if( FD_ISSET(fd_1,&r_set) ) {
            printf("read fd_1...\n");
            read_count_1 = read(fd_1,buf_1,sizeof(buf_1));
            write(STDOUT_FILENO ,buf_1, read_count_1);
            //write(write_fd ,buf_1, read_count_1);
           
            //exit(0);
        }
        if( FD_ISSET(fd_2,&r_set) ) {
            printf("read fd_2...\n");
            read_count_1 = read(fd_1,buf_1,sizeof(buf_1));
            //write(write_fd ,buf_2, read_count_2);
        }
        if( strncmp("end", buf_1, 3)==0 ) {
            break;
        }
        if( strncmp("end", buf_2, 3)==0 ) {
            break;
        }
    }
   
    fcntl(fd_1,F_SETFL,fd_1_save);
    fcntl(fd_2,F_SETFL,fd_2_save);
}

int main(int argc, char *argv[]) {

    int fd_1 = open("/dev/tty", O_RDONLY|O_NONBLOCK);
    int fd_2 = open("/dev/tty", O_RDONLY|O_NONBLOCK);
  
    io_driver(fd_1, fd_2);
    close(fd_1);
    close(fd_2);

    return 0;
}

//打印结果
write(1, "fd_1 -> 3\n", 10fd_1 -> 3
)             = 10
write(1, "fd_2 -> 4\n", 10fd_2 -> 4
)             = 10

fcntl(3, F_GETFL)                       = 0x8800 (flags O_RDONLY|O_NONBLOCK|O_LARGEFILE)
fcntl(4, F_GETFL)                       = 0x8800 (flags O_RDONLY|O_NONBLOCK|O_LARGEFILE)
fcntl(3, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
fcntl(4, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0

write(1, "sizeof --> 100\n", 15sizeof --> 100
)        = 15
open("f_1", O_RDONLY|O_NONBLOCK)        = 5
write(1, "fifo fd -> 5\n", 13fifo fd -> 5
)          = 13


select(4, [3], NULL, NULL, NULL111111111111
)        = 1 (in [3])
write(1, "read fd_1...\n", 13read fd_1...
)          = 13
read(3, "111111111111\n", 100)          = 13
write(1, "111111111111\n", 13111111111111
)          = 13


select(4, [3], NULL, NULL, NULL1111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222223333333333333333333333333333333333333333333333333334444444444444444444444444444444444444444444444444444455555555555555555555555555555555555555555566666666666666666666666666666666666666667777777777777777777777777777777777777777
)        = 1 (in [3])
write(1, "read fd_1...\n", 13read fd_1...
)          = 13
read(3, "11111111111111222222222222222222"..., 100) = 100
write(1, "11111111111111222222222222222222"..., 1001111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222222222) = 100
select(4, [3], NULL, NULL, NULL)        = 1 (in [3])
write(1, "read fd_1...\n", 13read fd_1...
)          = 13
read(3, "22222222222222222222222222222222"..., 100) = 100
write(1, "22222222222222222222222222222222"..., 1002222222222222222222222222222222222222233333333333333333333333333333333333333333333333333344444444444) = 100
select(4, [3], NULL, NULL, NULL)        = 1 (in [3])
write(1, "read fd_1...\n", 13read fd_1...
)          = 13
read(3, "44444444444444444444444444444444"..., 100) = 100
write(1, "44444444444444444444444444444444"..., 1004444444444444444444444444444444444444444445555555555555555555555555555555555555555556666666666666666) = 100
select(4, [3], NULL, NULL, NULL)        = 1 (in [3])
write(1, "read fd_1...\n", 13read fd_1...
)          = 13
read(3, "66666666666666666666666677777777"..., 100) = 65
write(1, "66666666666666666666666677777777"..., 656666666666666666666666667777777777777777777777777777777777777777
) = 65

select(4, [3], NULL, NULL, NULL

改用pthread方式去实现

int fd_1_no;
int fd_2_no;

void *io_driver(void *arg) {
    //内容不变
}


int main(int argc, char *argv[]) {

    fd_1_no = open("/dev/tty", O_RDONLY|O_NONBLOCK);
    fd_2_no = open("/dev/tty", O_RDONLY|O_NONBLOCK);

    pthread_t p_1;
    void *ret_1;
    pthread_create(&p_1,NULL,io_driver,(void*)0);
    pthread_join(p_1,&ret_1);

    //io_driver(fd_1, fd_2);
    close(fd_1_no);
    close(fd_2_no);

    return 0;
}

//strace -ff 结果

[pid 10141] select(4, [3], NULL, NULL, NULL111111111111111
) = 1 (in [3])
[pid 10141] write(1, "read fd_1...\n", 13read fd_1...
) = 13
[pid 10141] read(3, "111111111111111\n", 100) = 16
[pid 10141] write(1, "111111111111111\n", 16111111111111111
) = 16


[pid 10141] select(4, [3], NULL, NULL, NULL222222222222222222222222222222222222222222222222222222222333333333333333333333333333333333333333333333333333333333334444444444444444444444444444444444444444444444444445555555555555555555555555555555555555566666666666666666666666666666666666666666677777777777777777777777777777777778888888888888888888888888888888888888888888888888899999999999999999
) = 1 (in [3])

[pid 10141] write(1, "read fd_1...\n", 13read fd_1...
) = 13
[pid 10141] read(3, "22222222222222222222222222222222"..., 100) = 100
[pid 10141] write(1, "22222222222222222222222222222222"..., 1002222222222222222222222222222222222222222222222222222222223333333333333333333333333333333333333333333) = 100
[pid 10141] select(4, [3], NULL, NULL, NULL) = 1 (in [3])
[pid 10141] write(1, "read fd_1...\n", 13read fd_1...
) = 13
[pid 10141] read(3, "33333333333333334444444444444444"..., 100) = 100
[pid 10141] write(1, "33333333333333334444444444444444"..., 1003333333333333333444444444444444444444444444444444444444444444444444555555555555555555555555555555555) = 100
[pid 10141] select(4, [3], NULL, NULL, NULL) = 1 (in [3])
[pid 10141] write(1, "read fd_1...\n", 13read fd_1...
) = 13
[pid 10141] read(3, "55555666666666666666666666666666"..., 100) = 100
[pid 10141] write(1, "55555666666666666666666666666666"..., 1005555566666666666666666666666666666666666666666677777777777777777777777777777777778888888888888888888) = 100
[pid 10141] select(4, [3], NULL, NULL, NULL) = 1 (in [3])
[pid 10141] write(1, "read fd_1...\n", 13read fd_1...
) = 13
[pid 10141] read(3, "88888888888888888888888888888889"..., 100) = 49
[pid 10141] write(1, "88888888888888888888888888888889"..., 49888888888888888888888888888888899999999999999999
) = 49
[pid 10141] select(4, [3], NULL, NULL, NULL

 

 

poll

//poll - wait for some event on a file descriptor 
#include 

//fds:实际上是一个数组的首地址,因为 poll可以帮助我们监视多个文件描述符,而一个文件描述放到一个 //struct pollfd 结构体中,多个文件描述符就需要一个数组来存储了。
//nfds:fds 这个数组的长度。在参数列表中使用数组首地址 + 长度的做法还是比较常见的。
//timeout:阻塞等待的超时时间。传入 -1 则始终阻塞,不超时。
int poll(struct pollfd *fds, nfds_t nfds, int timeout);
 
struct pollfd {
    int   fd;         /* 需要监视的文件描述符 */
    short events;     /* 要监视的事件 */
    short revents;    /* 该文件描述符发生了的事件 */
};

//结构体中的事件可以指定下面七种事件,同时监视多个事件可以使用按位或(|)添加:
POLLIN	    文件描述符可读
POLLPRI	    可以非阻塞的读高优先级的数据
POLLOUT	    文件描述符可写
POLLRDHUP	流式套接字连接点关闭,或者关闭写半连接。
POLLERR	    已出错
POLLHUP	    已挂断(一般指设备)
POLLNVAL	参数非法

一个例子

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

int max(int a, int b) {
    if(a < b) {
        return b;
    }
    return a;
}


int driver(int fd_1, int fd_2) {
    struct pollfd pfd[2];
    int fd_1_save = fcntl(fd_1,F_GETFL);
    int fd_2_save = fcntl(fd_2,F_GETFL);
    fcntl(fd_1, F_SETFL, fd_1_save|O_NONBLOCK);
    fcntl(fd_2, F_SETFL, fd_2_save|O_NONBLOCK);

    pfd[0].fd = fd_1;
    pfd[0].events |= POLLIN;
    pfd[1].fd = fd_2;
    pfd[1].events |= POLLIN;

    int read_count_1;
    int read_count_2;
    char buf_1[100];
    char buf_2[100];

    while(1) {
        if( poll(pfd,2,-1)<0 ) {
            if(errno == EINTR) {
                continue;
            }
            printf("error\n");
            exit(1);
        }

        if(pfd[0].revents & POLLIN) {
            printf("read fd_1\n");
            read_count_1 = read(fd_1,buf_1,sizeof(buf_1));
            write(STDOUT_FILENO,buf_1,read_count_1);
        }

        if(pfd[1].revents & POLLIN) {
            printf("read fd_2\n");
            read_count_2 = read(fd_2,buf_2,sizeof(buf_2));
            write(STDOUT_FILENO,buf_2,read_count_2);    
        }
         
        if( strncmp("end",buf_1,3)==0 ) {
            break;
        }
        if( strncmp("end",buf_2,3)==0 ) {
            break;
        }
    }//end while    
   
    fcntl(fd_1,F_SETFL,fd_1_save);
    fcntl(fd_2,F_SETFL,fd_2_save);
}


int main(int argc, char *argv[]) {
    int fd_1 = open("/dev/tty",O_RDONLY);
    int fd_2 = open("/dev/tty",O_RDONLY|O_NONBLOCK);
    int fds[2];
    //pipe(fds);  
    //int fd_w = fds[1];

    driver(fd_1,fd_2);
    close(fd_1);
    close(fd_2);
    return 0;
}

//用strace分析程序
open("/dev/tty", O_RDONLY)              = 3
open("/dev/tty", O_RDONLY|O_NONBLOCK)   = 4
fcntl(3, F_GETFL)                       = 0x8000 (flags O_RDONLY|O_LARGEFILE)
fcntl(4, F_GETFL)                       = 0x8800 (flags O_RDONLY|O_NONBLOCK|O_LARGEFILE)
fcntl(3, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
fcntl(4, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
poll([{fd=3, events=POLLIN}, {fd=4, events=POLLIN}], 2, -1
aaaaaaaaaaaaaaaaaaaaaaaaa

) = 2 ([{fd=3, revents=POLLIN}, {fd=4, revents=POLLIN}])
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 7), ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f515010f000
write(1, "read fd_1\n", 10read fd_1
)             = 10
read(3, "aaaaaaaaaaaaaaaaaaaaaaaaa\n", 100) = 26
write(1, "aaaaaaaaaaaaaaaaaaaaaaaaa\n", 26aaaaaaaaaaaaaaaaaaaaaaaaa
) = 26
write(1, "read fd_2\n", 10read fd_2
)             = 10
read(4, 0x7ffdada914f0, 100)            = -1 EAGAIN (Resource temporarily unavailable)
write(1, "", 18446744073709551615)      = -1 EFAULT (Bad address)
poll([{fd=3, events=POLLIN}, {fd=4, events=POLLIN}], 2, -1

end
) = 2 ([{fd=3, revents=POLLIN}, {fd=4, revents=POLLIN}])
write(1, "read fd_1\n", 10read fd_1
)             = 10
read(3, "end\n", 100)                   = 4
write(1, "end\n", 4end
)                    = 4
write(1, "read fd_2\n", 10read fd_2
)             = 10
read(4, 0x7ffdada914f0, 100)            = -1 EAGAIN (Resource temporarily unavailable)
write(1, "", 18446744073709551615)      = -1 EFAULT (Bad address)

fcntl(3, F_SETFL, O_RDONLY|O_LARGEFILE) = 0
fcntl(4, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
close(3)                                = 0
close(4)                                = 0
exit_group(0)                           = ?

 

epoll

#include 

//创建若干个epoll监控元素,并返回epoll的fd
int epoll_create(int size);

//操作需要监听的事件
int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event);

struct epoll_event {
    uint32_t events; /* epoll 监视的事件,与 poll(2) 能监视的事件差不多 */
    epoll_data_t data; /* 用户数据,除了能保存文件描述符以外,还能保存其它有关数据 */
}
//epoll_ctl第二个参数op的操作定义
EPOLL_CTL_ADD	增加要监视的文件描述符
EPOLL_CTL_MOD	更改目标文件描述符的事件
EPOLL_CTL_DEL	删除要监视的文件描述符,event 参数会被忽略,可以传入 NULL

//阻塞监视并返回监视结果
//epfd,要操作的 epoll 实例
//events + maxevents:共同指定了一个结构体数组,数组的起始位置和长度
//timeout:超时等待的时间,设置为 -1 则始终阻塞监视,不超时
int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout);

一个例子

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

int driver(int fd_1, int fd_2) {

    int fd_1_save = fcntl(fd_1, F_GETFL);
    int fd_2_save = fcntl(fd_2, F_GETFL);

    fcntl(fd_1, F_SETFL, fd_1_save|O_NONBLOCK);
    fcntl(fd_2, F_SETFL, fd_2_save|O_NONBLOCK);

    int epoll_fd = epoll_create(10);
    struct epoll_event event;

    event.events = EPOLLIN;
    event.data.fd = fd_1;
    epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd_1, &event);
    
    event.events = EPOLLIN;
    event.data.fd = fd_2;
    epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd_2, &event);


    int read_count_1, read_count_2;
    char buf_1[100], buf_2[100];

    while(1) {
        if( epoll_wait(epoll_fd, &event, 1, -1)<0 ) {
            printf("epoll_wait error\n");
            exit(1);
        }

        if(event.data.fd==fd_1 && event.events&EPOLLIN) {
            printf("fd_1 read...\n");
            read_count_1 = read(fd_1,buf_1,sizeof(buf_1));
            write(STDOUT_FILENO,buf_1,read_count_1);
            printf("str->%s\n",buf_1);
        }
        if(event.data.fd== fd_2 && event.events&EPOLLIN) {
            printf("fd_2 reaad.....\n");
            read_count_2 = read(fd_2,buf_2,sizeof(buf_2));
            write(STDOUT_FILENO, buf_2, read_count_2);
            printf("str->%s\n",buf_2);
        }

    } /* end while */
   
    fcntl(fd_1, F_SETFL, fd_1_save);
    fcntl(fd_2, F_SETFL, fd_2_save);


}


int main(int argc, char *argv[]) {

    int fd_1 = open("/dev/tty",O_RDONLY);
    int fd_2 = open("/dev/tty",O_RDONLY);

    driver(fd_1,fd_2);
    close(fd_1);
    close(fd_2);

    return 0;
}


//用strace分析程序
open("/dev/tty", O_RDONLY)              = 3
open("/dev/tty", O_RDONLY)              = 4
fcntl(3, F_GETFL)                       = 0x8000 (flags O_RDONLY|O_LARGEFILE)
fcntl(4, F_GETFL)                       = 0x8000 (flags O_RDONLY|O_LARGEFILE)
fcntl(3, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
fcntl(4, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
epoll_create(10)                        = 5
epoll_ctl(5, EPOLL_CTL_ADD, 3, {EPOLLIN, {u32=3, u64=3}}) = 0
epoll_ctl(5, EPOLL_CTL_ADD, 4, {EPOLLIN, {u32=4, u64=4}}) = 0
epoll_wait(5, 

aaaaaa
[{EPOLLIN, {u32=4, u64=4}}], 1, -1) = 1
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f11f3bd6000
write(1, "fd_2 reaad.....\n", 16fd_2 reaad.....)       = 16
read(4, "aaaaaa\n", 100)                = 7
write(1, "aaaaaa\n", 7aaaaaa)                 = 7
write(1, "str->aaaaaa\n", 12str->aaaaaa)           = 12
write(1, "\n", 1)                       = 1

epoll_wait(5

 

java的nio例子

import java.io.IOException;
import java.net.InetSocketAddress;
import java.nio.ByteBuffer;
import java.nio.channels.SelectionKey;
import java.nio.channels.Selector;
import java.nio.channels.SocketChannel;
import java.util.Iterator;
import java.util.Set;

/**
 *  * Created by yang.wang09 on 2018-12-04 14:00.
 *   */
public class X {

    public static void main(String[] args) throws IOException {
        go();
    }

    public static void go() throws IOException {
        String host = "www.baidu.com";
        host = "220.181.111.37";
        int port = 80;
        InetSocketAddress ias = new InetSocketAddress(host, port);
        SocketChannel channel = SocketChannel.open(ias);
        channel.configureBlocking(false);

        Selector selector = Selector.open();
        SelectionKey key = channel.register(selector, SelectionKey.OP_READ);

        ByteBuffer buf = ByteBuffer.allocate(100);
        String message = "GET / HTTP/1.1\r\nHost:220.181.111.37\r\n\r\n\r\n";
        buf.put(message.getBytes());
        channel.write(buf);

        boolean isLoop = true;
        while(isLoop) {
            int readyChannels = selector.select();
            selector.selectedKeys();
            if(readyChannels == 0) continue;
            Set selectedKeys = selector.selectedKeys();
            Iterator keyIterator = selectedKeys.iterator();
            while(keyIterator.hasNext()) {
                keyIterator.next();
                if(key.isAcceptable()) {
                    /* a connection was accepted by a ServerSocketChannel. */
                } else if (key.isConnectable()) {
                    /* a connection was established with a remote server. */
                } else if (key.isReadable()) {
                    /* a channel is ready for reading */
                    System.out.println("read ok");
                    isLoop = false;
                    break;
                } else if (key.isWritable()) {
                    /* a channel is ready for writing */
                    channel.write(buf);
                    System.out.println("write ok");
                    isLoop = false;
                    break;
                }
                keyIterator.remove();
            }
        }
    }
}

strace -o xx.log -ff java X

会生成很多log文件,将这些log文件都放到 log目录下,然后grep "select" *,grep "poll" *,发现只有epoll,其他的都没有

java version "1.8.0_131"
Java(TM) SE Runtime Environment (build 1.8.0_131-b11)
Java HotSpot(TM) 64-Bit Server VM (build 25.131-b11, mixed mode)

可见java 8 的多路复用底层是 epoll实现的

xx.log.16795:epoll_create(256)                       = 7
xx.log.16795:epoll_ctl(7, EPOLL_CTL_ADD, 5, {EPOLLIN, {u32=5, u64=15903069758740758533}}) = 0
xx.log.16795:epoll_ctl(7, EPOLL_CTL_ADD, 4, {EPOLLIN, {u32=4, u64=16044752212915650564}}) = 0
xx.log.16795:epoll_wait(7, [{EPOLLIN, {u32=4, u64=16044752212915650564}}], 8192, -1) = 1

 

 

 

readv和writev

#include
ssize_t readv(int filedes, const struct iovec *iov, int iovcnt);
ssize_t writev(int filedes, const struct iovec *iov, int iovcnt);
/*若成功则返回已读,写的字节数,若出错则返回-1。 */

//这两个函数的第二个参数是指向iovec结构数组的一个指针:
struct iovec{
void *iov_base; //starting address of buffer
size_t iov_len; //size of buffer
}
//iov数组中的元素数由iovcnt说明。下图说明了readv和writev的参数和iovec结构。

UNIX环境高级编程-高级IO_第2张图片

writev以顺序iov[0],iov[1]至iov[iovcnt-1]从缓冲区中聚集输出数据。writev返回输出的字节总数。
readv则将读入的数据按照上述同样顺序散布到缓冲区中,readv总是先填满一个缓冲区,然后再填写下一个。readv返回读到
的总字节数。如果遇到文件结尾,已无数据可读,则返回0。

列子

#include 
#include 
#include  
int main(){        
	char buf1[5],buf2[10];        
	struct iovec iov[2];        
	iov[0].iov_base = buf1;        
	iov[0].iov_len = 5;        
	iov[1].iov_base = buf2;        
	iov[1].iov_len = 10;         
	int fd = open("a.txt",O_RDWR);        
	if(fd < 0){                
		perror("open");                
		return -1;        
	}        
	int rsize = readv(fd, iov, 2);        
	printf("rsize = %d\n",rsize);         
	close(fd);         

	fd = open("b.txt", O_RDWR|O_CREAT, S_IRUSR|S_IWUSR);        
	if(fd < 0){                
		perror("open");                
		return -1;        
	}         
	int wsize = writev(fd,iov,2);       
	printf("wsize = %d\n",wsize);         
	close(fd);        
	return 0;
}

 

 

 

mmap

mmap将一个文件或者其它对象映射进内存。文件被映射到多个页上,如果文件的大小不是所有页的大小之和,最后一个页不被使用的空间将会清零。mmap在用户空间映射调用系统中作用很大。

具体函数

#include
//addr 起始地址
//len  需要映射的长度
//port PROT_READ,映射区可读;
//     PROT_WRITE,映射区可写;PROT_EXEC,映射区可执行;PROT_NONE,映射区不可访问
//flag 
// fd,off 需要映射的fd和起始位置
void* mmap(void* addr, size_t len, int port, int flag, int fd, off_t off)

//addr 起始地址
//len  长度
//port 跟mmap的一样
int mprotect(void* addr, size_t len, int port)

//addr和len 同mmap函数
//flag 
//MS_ASYNC,这实际上不要求内核做什么,让内核自主去执行同步
//MS_SYNC,要求内核在返回之前把写操作完成
//MS_INVALIDATE,是一个可选的标志,它告诉内核丢弃没有同步的部分
int msync(void* addr, size_t len, int flags)

int munmap(void* addr, size_t len)
mmap的flag

MAP_FIXED //使用指定的映射起始地址,如果由start和len参数指定的内存区重叠于现存的映射空间,重叠部分将会被丢弃。如果指定的起始地址不可用,操作将会失败。并且起始地址必须落在页的边界上。
MAP_SHARED //与其它所有映射这个对象的进程共享映射空间。对共享区的写入,相当于输出到文件。直到msync()或者munmap()被调用,文件实际上不会被更新。
MAP_PRIVATE //建立一个写入时拷贝的私有映射。内存区域的写入不会影响到原文件。这个标志和以上标志是互斥的,只能使用其中一个。
MAP_DENYWRITE //这个标志被忽略。
MAP_EXECUTABLE //同上
MAP_NORESERVE //不要为这个映射保留交换空间。当交换空间被保留,对映射区修改的可能会得到保证。当交换空间不被保留,同时内存不足,对映射区的修改会引起段违例信号。
MAP_LOCKED //锁定映射区的页面,从而防止页面被交换出内存。
MAP_GROWSDOWN //用于堆栈,告诉内核VM系统,映射区可以向下扩展。
MAP_ANONYMOUS //匿名映射,映射区不与任何文件关联。
MAP_ANON //MAP_ANONYMOUS的别称,不再被使用。
MAP_FILE //兼容标志,被忽略。
MAP_32BIT //将映射区放在进程地址空间的低2GB,MAP_FIXED指定时会被忽略。当前这个标志只在x86-64平台上得到支持。
MAP_POPULATE //为文件映射通过预读的方式准备好页表。随后对映射区的访问不会被页违例阻塞。
MAP_NONBLOCK //仅和MAP_POPULATE一起使用时才有意义。不执行预读,只为已存在于内存中的页面建立页表入口。

例子

#include 
#include 
#include 
#include 
#include 
#define COPYING (1024*1024*1024)  /* 1 GB */

int main(int argc, char *argv[]) {
    

    int fd_in,fd_out;
    void *src, *dest;
    size_t copy_size;
    struct stat stat_buf;
    off_t  off_size = 0;


    if(argc != 3) {
        printf("usage %s  \n",argv[0]);
    }
    fd_in = open(argv[1], O_RDONLY);
    fd_out = open(argv[2], O_RDWR|O_CREAT|O_TRUNC,0777);
    fstat(fd_in,&stat_buf); 
    //如果不加这句ftruncate就会报错
    // --- SIGBUS {si_signo=SIGBUS, si_code=BUS_ADRERR, si_addr=0x7f593cdba1d0} 
    ftruncate(fd_out,stat_buf.st_size);
    
    while(off_size < stat_buf.st_size) {
        if((stat_buf.st_size - off_size) > COPYING) {
            copy_size = COPYING;
        }
        else {
            copy_size = stat_buf.st_size - off_size;
        }

        src = mmap(0,copy_size,PROT_READ, MAP_SHARED,fd_in,off_size);
        dest = mmap(0,copy_size,PROT_READ|PROT_WRITE, MAP_SHARED, fd_out, off_size);
  
        memcpy(dest, src, copy_size);
        munmap(src,copy_size);
        munmap(dest,copy_size);
        off_size += copy_size;
    }
    
    return 0;
}

 

 

 

参考

文件映射IO函数

mmap函数和mprotect

高级IO--存储映射

linux驱动mmap内存映射

一起学 Unix 环境高级编程 (APUE) 之 高级 IO

select、poll、epoll之间的区别总结[整理]

使用SocketChannel的NIO客户机服务器通信示例

Linux 系统 文件锁 fcntl函数详解

文件锁 flock及fcntl flock

你可能感兴趣的:(Linux,c语言)