Linux内核协议栈(5) 跟踪bind系统调用

在调用socket成功返回后,我们得到与socket关联的文件描述符。然后我们以该描述符和sockaddr地址结构对象为参数调用bind,就实现了socket对象地址的绑定。那这个绑定到底是个什么意思?这个绑定操作是必须吗?绑定操作之后,socket对象又发生了什么?也许还有更多的疑问,我们什么协议栈的源码寻找答案。先贴上bind系统调用的源码:

SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
{
	struct socket *sock;
	struct sockaddr_storage address;
	int err, fput_needed;

    /* 通过文件描述符fd,找到对应的socket。 
     * 以fd为索引从当前进程的文件描述符表files_struct中找到对应的file实例, 
     * 然后从file实例的private_data成员中获取socket实例。 
     */ 
	sock = sockfd_lookup_light(fd, &err, &fput_needed);//通过fd找到file,进而找到sock
	if (sock) {
		err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);//将地址从用户空间拷贝到内核空间
		if (err >= 0) {
			err = security_socket_bind(sock,//安全方面地东西我们不解释
						   (struct sockaddr *)&address,
						   addrlen);
			if (!err)
				err = sock->ops->bind(sock,
						      (struct sockaddr *)
						      &address, addrlen);
		}
		fput_light(sock->file, fput_needed);
	}
	return err;
}

通过传入的文件描述符,我们找到对应的socket对象,如何找到的,可以参看系列文章“附2”,这里不详解。接着将包含地址信息的sockaddr对象从用户空间拷贝到内核空间,接着是安全方面的东西,接着就是我们关注的重点:

if (!err)
	err = sock->ops->bind(sock,
				(struct sockaddr *)
				 &address, addrlen);

通过系列文章《Linux内核协议栈(3) 刨根问底socket调用 》我们知道实际调用的是协议对应的bind函数。我以udp协议为例子:

static struct inet_protosw inetsw_array[] =
{
	{
		.type =       SOCK_STREAM,
		.protocol =   IPPROTO_TCP,
		.prot =       &tcp_prot,
		.ops =        &inet_stream_ops,
		.capability = -1,
		.no_check =   0,
		.flags =      INET_PROTOSW_PERMANENT |
			      INET_PROTOSW_ICSK,
	},

	{
		.type =       SOCK_DGRAM,
		.protocol =   IPPROTO_UDP,
		.prot =       &udp_prot,  //协议描述块
		.ops =        &inet_dgram_ops,//协议相关额socket操作函数级
		.capability = -1,
		.no_check =   UDP_CSUM_DEFAULT,
		.flags =      INET_PROTOSW_PERMANENT,
       },


       {
	       .type =       SOCK_RAW,
	       .protocol =   IPPROTO_IP,	/* wild card */
	       .prot =       &raw_prot,
	       .ops =        &inet_sockraw_ops,
	       .capability = CAP_NET_RAW,
	       .no_check =   UDP_CSUM_DEFAULT,
	       .flags =      INET_PROTOSW_REUSE,
       }
};

而inet_dgram_ops的详细定义为:

const struct proto_ops inet_dgram_ops = {
	.family		   = PF_INET,
	.owner		   = THIS_MODULE,
	.release	   = inet_release,
	.bind		   = inet_bind,
	.connect	   = inet_dgram_connect,
	.socketpair	   = sock_no_socketpair,
	.accept		   = sock_no_accept,
	.getname	   = inet_getname,
	.poll		   = udp_poll,
	.ioctl		   = inet_ioctl,
	.listen		   = sock_no_listen,
	.shutdown	   = inet_shutdown,
	.setsockopt	   = sock_common_setsockopt,
	.getsockopt	   = sock_common_getsockopt,
	.sendmsg	   = inet_sendmsg,
	.recvmsg	   = sock_common_recvmsg,
	.mmap		   = sock_no_mmap,
	.sendpage	   = inet_sendpage,
#ifdef CONFIG_COMPAT
	.compat_setsockopt = compat_sock_common_setsockopt,
	.compat_getsockopt = compat_sock_common_getsockopt,
#endif
};

可知sock->ops->bind实际调用的是inet_bind,我们分析下该函数:

/*
*
*所谓绑定就是设置inet_sock结构
*
*
*/
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
	struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
	struct sock *sk = sock->sk;		/* 传输层实例 */  
	struct inet_sock *inet = inet_sk(sk);		/* INET实例 */  
	unsigned short snum; 		/* 要绑定的端口 */  
	int chk_addr_ret;/* IP地址类型 */ 
	int err;

	/* If the socket has its own bind function then use it. (RAW) */
	if (sk->sk_prot->bind) {
		err = sk->sk_prot->bind(sk, uaddr, addr_len);//网络层协议本身的bind函数,tcp/udp都没有提供该函数,在op中赋值为inet_bind
		goto out;
	}
	err = -EINVAL;
	if (addr_len < sizeof(struct sockaddr_in))//检查长度合法性
		goto out;
	
	 /* 在路由中检查IP地址类型,单播、多播还是广播 */  
	chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);//???

	/* Not specified by any standard per-se, however it breaks too
	 * many applications when removed.  It is unfortunate since
	 * allowing applications to make a non-local bind solves
	 * several problems with systems using dynamic addressing.
	 * (ie. your servers still start up even if your ISDN link
	 *  is temporarily down)
	 */

	/*
	* sysctl_ip_nonlocal_bind表示是否允许绑定非本地的IP地址。 
	* inet->freebind表示是否允许绑定非主机地址。
	* 这里需要允许绑定非本地地址,除非是发送给自己、多播或广播。 
	*/
	err = -EADDRNOTAVAIL;
	if (!sysctl_ip_nonlocal_bind &&
	    !(inet->freebind || inet->transparent) &&
	    addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
	    chk_addr_ret != RTN_LOCAL &&
	    chk_addr_ret != RTN_MULTICAST &&
	    chk_addr_ret != RTN_BROADCAST)
		goto out;


	snum = ntohs(addr->sin_port);//端口号
	err = -EACCES;
	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))//超级用户才可绑定1-1023端口
		goto out;

	/*      We keep a pair of addresses. rcv_saddr is the one
	 *      used by hash lookups, and saddr is used for transmit.
	 *
	 *      In the BSD API these are the same except where it
	 *      would be illegal to use them (multicast/broadcast) in
	 *      which case the sending device address is used.
	 */
	lock_sock(sk);

	/* Check these errors (active socket, double bind). */
	err = -EINVAL;

	/* Check these errors (active socket, double bind).  
     * 如果套接字不在初始状态TCP_CLOSE,或者已经绑定端口了,则出错。 
     * 一个socket最多可以绑定一个端口,而一个端口则可能被多个socket共用。 
     */  
	if (sk->sk_state != TCP_CLOSE || inet->num)
		goto out_release_sock;

	inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr;
	if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
		inet->saddr = 0;  /* Use device */

    /* Make sure we are allowed to bind here. 
     * 如果使用的是TCP,则sk_prot为tcp_prot,get_port为inet_csk_get_port() 
     * 端口可用的话返回0。 
     */ 

	if (sk->sk_prot->get_port(sk, snum)) {/*-------->将sk加入哈希表*/ udp_v4_get_port --> udp_lib_get_port inetsw_array
		inet->saddr = inet->rcv_saddr = 0;
		err = -EADDRINUSE;
		goto out_release_sock;
	}

	 /* inet_rcv_saddr表示绑定的地址,接收数据时用于查找socket */  
	if (inet->rcv_saddr)
		sk->sk_userlocks |= SOCK_BINDADDR_LOCK;/* 表示绑定了本地地址 */  
	if (snum)
		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;/* 绑定端口 */  
	inet->sport = htons(inet->num);
	inet->daddr = 0;
	inet->dport = 0;
	sk_dst_reset(sk);
	err = 0;
out_release_sock:
	release_sock(sk);
out:
	return err;
}

分析该函数,我们发现如果协议本身有bind函数,会执行协议的bind函数然后调到函数结尾返回;如果协议本身没有定义bind函数则往下执行。查看udp的协议描述块,我们没有发现有bind函数,所以if内的代码时没有执行的,而是往下执行。

udp的协议描述块如下(注意字段udp_table):

struct proto udp_prot = {
	.name		   = "UDP",
	.owner		   = THIS_MODULE,
	.close		   = udp_lib_close,
	.connect	   = ip4_datagram_connect,
	.disconnect	   = udp_disconnect,
	.ioctl		   = udp_ioctl,
	.destroy	   = udp_destroy_sock,
	.setsockopt	   = udp_setsockopt,
	.getsockopt	   = udp_getsockopt,
	.sendmsg	   = udp_sendmsg,
	.recvmsg	   = udp_recvmsg,
	.sendpage	   = udp_sendpage,
	.backlog_rcv	   = __udp_queue_rcv_skb,
	.hash		   = udp_lib_hash,
	.unhash		   = udp_lib_unhash,
	.get_port	   = udp_v4_get_port,
	.memory_allocated  = &udp_memory_allocated,
	.sysctl_mem	   = sysctl_udp_mem,
	.sysctl_wmem	   = &sysctl_udp_wmem_min,
	.sysctl_rmem	   = &sysctl_udp_rmem_min,
	.obj_size	   = sizeof(struct udp_sock),
	.slab_flags	   = SLAB_DESTROY_BY_RCU,
	.h.udp_table	   = &udp_table,//全局链表头,注意该字段
#ifdef CONFIG_COMPAT
	.compat_setsockopt = compat_udp_setsockopt,
	.compat_getsockopt = compat_udp_getsockopt,
#endif
};

	if (sk->sk_prot->bind) {
		err = sk->sk_prot->bind(sk, uaddr, addr_len);
		goto out;
	}

往下的代码主要是地址信息合法性检查,还有inet_sock对象的设置(实际上是udp_sock对象中的inetd对象——查看udp_sock定义,可知udp_sock中包含一个inet_sock对象),申请端口等。留意端口申请代码:

	if (sk->sk_prot->get_port(sk, snum)) {/*->将sock对象加入哈希表udp_table*/ udp_v4_get_port --> udp_lib_get_port inetsw_array
		inet->saddr = inet->rcv_saddr = 0;
		err = -EADDRINUSE;
		goto out_release_sock;
	}

查看协议描述块可知,实际上调用的是udp_lib_get_port,

详细代码注释可见《Linux内核协议栈(附3)udp_lib_get_port函数

该函数除了得到可用的端口外,主要作用是将sock对象加入udp_table哈希表。我们知道哈希表可以加快搜索的速度,这用于在数据接收过程中。在数据接收过程中,我们接收发往本机的数据报,根据是目的IP。但是系统中有很多进程,很多socket连接,并不知道数据是要给哪个进程的。通过端口号和哈希表快速定位接收数据的sock,然后将数据放到sock的接收队列中,等待用户线程取数据,这样完成了一次网络通信。





未完待续...

你可能感兴趣的:(Linux内核协议栈(5) 跟踪bind系统调用)