在调用socket成功返回后,我们得到与socket关联的文件描述符。然后我们以该描述符和sockaddr地址结构对象为参数调用bind,就实现了socket对象地址的绑定。那这个绑定到底是个什么意思?这个绑定操作是必须吗?绑定操作之后,socket对象又发生了什么?也许还有更多的疑问,我们什么协议栈的源码寻找答案。先贴上bind系统调用的源码:
SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) { struct socket *sock; struct sockaddr_storage address; int err, fput_needed; /* 通过文件描述符fd,找到对应的socket。 * 以fd为索引从当前进程的文件描述符表files_struct中找到对应的file实例, * 然后从file实例的private_data成员中获取socket实例。 */ sock = sockfd_lookup_light(fd, &err, &fput_needed);//通过fd找到file,进而找到sock if (sock) { err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);//将地址从用户空间拷贝到内核空间 if (err >= 0) { err = security_socket_bind(sock,//安全方面地东西我们不解释 (struct sockaddr *)&address, addrlen); if (!err) err = sock->ops->bind(sock, (struct sockaddr *) &address, addrlen); } fput_light(sock->file, fput_needed); } return err; }
通过传入的文件描述符,我们找到对应的socket对象,如何找到的,可以参看系列文章“附2”,这里不详解。接着将包含地址信息的sockaddr对象从用户空间拷贝到内核空间,接着是安全方面的东西,接着就是我们关注的重点:
if (!err) err = sock->ops->bind(sock, (struct sockaddr *) &address, addrlen);
通过系列文章《Linux内核协议栈(3) 刨根问底socket调用 》我们知道实际调用的是协议对应的bind函数。我以udp协议为例子:
static struct inet_protosw inetsw_array[] = { { .type = SOCK_STREAM, .protocol = IPPROTO_TCP, .prot = &tcp_prot, .ops = &inet_stream_ops, .capability = -1, .no_check = 0, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, }, { .type = SOCK_DGRAM, .protocol = IPPROTO_UDP, .prot = &udp_prot, //协议描述块 .ops = &inet_dgram_ops,//协议相关额socket操作函数级 .capability = -1, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_PERMANENT, }, { .type = SOCK_RAW, .protocol = IPPROTO_IP, /* wild card */ .prot = &raw_prot, .ops = &inet_sockraw_ops, .capability = CAP_NET_RAW, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, } };
而inet_dgram_ops的详细定义为:
const struct proto_ops inet_dgram_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, .bind = inet_bind, .connect = inet_dgram_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = inet_getname, .poll = udp_poll, .ioctl = inet_ioctl, .listen = sock_no_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, .sendpage = inet_sendpage, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif };
可知sock->ops->bind实际调用的是inet_bind,我们分析下该函数:
/* * *所谓绑定就是设置inet_sock结构 * * */ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct sock *sk = sock->sk; /* 传输层实例 */ struct inet_sock *inet = inet_sk(sk); /* INET实例 */ unsigned short snum; /* 要绑定的端口 */ int chk_addr_ret;/* IP地址类型 */ int err; /* If the socket has its own bind function then use it. (RAW) */ if (sk->sk_prot->bind) { err = sk->sk_prot->bind(sk, uaddr, addr_len);//网络层协议本身的bind函数,tcp/udp都没有提供该函数,在op中赋值为inet_bind goto out; } err = -EINVAL; if (addr_len < sizeof(struct sockaddr_in))//检查长度合法性 goto out; /* 在路由中检查IP地址类型,单播、多播还是广播 */ chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);//??? /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since * allowing applications to make a non-local bind solves * several problems with systems using dynamic addressing. * (ie. your servers still start up even if your ISDN link * is temporarily down) */ /* * sysctl_ip_nonlocal_bind表示是否允许绑定非本地的IP地址。 * inet->freebind表示是否允许绑定非主机地址。 * 这里需要允许绑定非本地地址,除非是发送给自己、多播或广播。 */ err = -EADDRNOTAVAIL; if (!sysctl_ip_nonlocal_bind && !(inet->freebind || inet->transparent) && addr->sin_addr.s_addr != htonl(INADDR_ANY) && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) goto out; snum = ntohs(addr->sin_port);//端口号 err = -EACCES; if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))//超级用户才可绑定1-1023端口 goto out; /* We keep a pair of addresses. rcv_saddr is the one * used by hash lookups, and saddr is used for transmit. * * In the BSD API these are the same except where it * would be illegal to use them (multicast/broadcast) in * which case the sending device address is used. */ lock_sock(sk); /* Check these errors (active socket, double bind). */ err = -EINVAL; /* Check these errors (active socket, double bind). * 如果套接字不在初始状态TCP_CLOSE,或者已经绑定端口了,则出错。 * 一个socket最多可以绑定一个端口,而一个端口则可能被多个socket共用。 */ if (sk->sk_state != TCP_CLOSE || inet->num) goto out_release_sock; inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) inet->saddr = 0; /* Use device */ /* Make sure we are allowed to bind here. * 如果使用的是TCP,则sk_prot为tcp_prot,get_port为inet_csk_get_port() * 端口可用的话返回0。 */ if (sk->sk_prot->get_port(sk, snum)) {/*-------->将sk加入哈希表*/ udp_v4_get_port --> udp_lib_get_port inetsw_array inet->saddr = inet->rcv_saddr = 0; err = -EADDRINUSE; goto out_release_sock; } /* inet_rcv_saddr表示绑定的地址,接收数据时用于查找socket */ if (inet->rcv_saddr) sk->sk_userlocks |= SOCK_BINDADDR_LOCK;/* 表示绑定了本地地址 */ if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK;/* 绑定端口 */ inet->sport = htons(inet->num); inet->daddr = 0; inet->dport = 0; sk_dst_reset(sk); err = 0; out_release_sock: release_sock(sk); out: return err; }
分析该函数,我们发现如果协议本身有bind函数,会执行协议的bind函数然后调到函数结尾返回;如果协议本身没有定义bind函数则往下执行。查看udp的协议描述块,我们没有发现有bind函数,所以if内的代码时没有执行的,而是往下执行。
udp的协议描述块如下(注意字段udp_table):
struct proto udp_prot = { .name = "UDP", .owner = THIS_MODULE, .close = udp_lib_close, .connect = ip4_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, .destroy = udp_destroy_sock, .setsockopt = udp_setsockopt, .getsockopt = udp_getsockopt, .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp_sock), .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udp_table,//全局链表头,注意该字段 #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, #endif };
if (sk->sk_prot->bind) { err = sk->sk_prot->bind(sk, uaddr, addr_len); goto out; }
往下的代码主要是地址信息合法性检查,还有inet_sock对象的设置(实际上是udp_sock对象中的inetd对象——查看udp_sock定义,可知udp_sock中包含一个inet_sock对象),申请端口等。留意端口申请代码:
if (sk->sk_prot->get_port(sk, snum)) {/*->将sock对象加入哈希表udp_table*/ udp_v4_get_port --> udp_lib_get_port inetsw_array inet->saddr = inet->rcv_saddr = 0; err = -EADDRINUSE; goto out_release_sock; }
查看协议描述块可知,实际上调用的是udp_lib_get_port,
详细代码注释可见《Linux内核协议栈(附3)udp_lib_get_port函数 》
该函数除了得到可用的端口外,主要作用是将sock对象加入udp_table哈希表。我们知道哈希表可以加快搜索的速度,这用于在数据接收过程中。在数据接收过程中,我们接收发往本机的数据报,根据是目的IP。但是系统中有很多进程,很多socket连接,并不知道数据是要给哪个进程的。通过端口号和哈希表快速定位接收数据的sock,然后将数据放到sock的接收队列中,等待用户线程取数据,这样完成了一次网络通信。
未完待续...