TCP之系统调用accept()

这篇笔记来看看TCP对accept()系统调用的实现。

1. 内核入口

//从注释中也可以看出accept()系统调用要做的事情:
//1.建立一个新的套接字供服务器端和客户端通信
//2.创建一个新的fd供应用程序后续读写该套接字
/*
 *	For accept, we attempt to create a new socket, set up the link
 *	with the client, wake up the client, then return the new
 *	connected fd. We collect the address of the connector in kernel
 *	space and move it to user at the very end. This is unclean because
 *	we open the socket then return an error.
 *
 *	1003.1g adds the ability to recvmsg() to query connection pending
 *	status to recvmsg. We need to add that support in a way thats
 *	clean when we restucture accept also.
 */
asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
			   int __user *upeer_addrlen)
{
	struct socket *sock, *newsock;
	struct file *newfile;
	int err, len, newfd, fput_needed;
	char address[MAX_SOCK_ADDR];

	//根据监听套接字的fd找到监听套接字对应的套接字结构struct scoket
	sock = sockfd_lookup_light(fd, &err, &fput_needed);
	if (!sock)
		goto out;

	//为新的通信套接字分配套接字结构struct socket
	err = -ENFILE;
	if (!(newsock = sock_alloc()))
		goto out_put;
	//新的套接字类型和操作函数集与监听套接字相同
	newsock->type = sock->type;
	newsock->ops = sock->ops;

	/*
	 * We don't need try_module_get here, as the listening socket (sock)
	 * has the protocol module (sock->ops->owner) held.
	 */
	__module_get(newsock->ops->owner);
	//为新创建的通信套接字结构分配文件描述符fd
	newfd = sock_alloc_fd(&newfile);
	if (unlikely(newfd < 0)) {
		err = newfd;
		sock_release(newsock);
		goto out_put;
	}
	//将fd与socket关联起来
	err = sock_attach_fd(newsock, newfile);
	if (err < 0)
		goto out_fd_simple;

	//SELinux相关
	err = security_socket_accept(sock, newsock);
	if (err)
		goto out_fd;

	//调用协议族提供的accept()函数完成接收,IPv4协议族中,为inet_accept(),见下文
	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
	if (err < 0)
		goto out_fd;

	//如果accept()系统调用参数中指明要获取客户端地址信息,
	//则调用getname()接口获取客户端信息后将其拷贝到用户空间
	if (upeer_sockaddr) {
		if (newsock->ops->getname(newsock, (struct sockaddr *)address,
					  &len, 2) < 0) {
			err = -ECONNABORTED;
			goto out_fd;
		}
		err = move_addr_to_user(address, len, upeer_sockaddr,
					upeer_addrlen);
		if (err < 0)
			goto out_fd;
	}

	//关联文件系统
	/* File flags are not inherited via accept() unlike another OSes. */
	fd_install(newfd, newfile);
	err = newfd;

	security_socket_post_accept(sock, newsock);

out_put:
	fput_light(sock->file, fput_needed);
out:
	return err;
out_fd_simple:
	sock_release(newsock);
	put_filp(newfile);
	put_unused_fd(newfd);
	goto out_put;
out_fd:
	fput(newfile);
	put_unused_fd(newfd);
	goto out_put;
}

2. IPv4协议族的accept

/*
 *	Accept a pending connection. The TCP layer now gives BSD semantics.
 */
int inet_accept(struct socket *sock, struct socket *newsock, int flags)
{
	struct sock *sk1 = sock->sk;
	int err = -EINVAL;

	//直接调用传输层的accept()回调,TCP为inet_csk_accept(),该回调需要返回
	//新的通信套接字对应的TCB
	struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err);
	if (!sk2)
		goto do_err;

	lock_sock(sk2);
	BUG_TRAP((1 << sk2->sk_state) &
		 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE));
	//将新的TCB和新的套接字结构sock关联起来
	sock_graft(sk2, newsock);
	//设置套接字结构中的状态为”已连接“
	newsock->state = SS_CONNECTED;
	err = 0;
	release_sock(sk2);
do_err:
	return err;
}

static inline void sock_graft(struct sock *sk, struct socket *parent)
{
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_sleep = &parent->wait;
	parent->sk = sk;
	sk->sk_socket = parent;
	security_sock_graft(sk, parent);
	write_unlock_bh(&sk->sk_callback_lock);
}

3. TCP的accept实现

/*
 * This will accept the next outstanding connection.
 */
//sk为监听套接字传输控制块
struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
{
	struct inet_connection_sock *icsk = inet_csk(sk);
	struct sock *newsk;
	int error;

	lock_sock(sk);
	//传入到的套接字的TCB状态必须是TCP_LISTEN
	/* We need to make sure that this socket is listening,
	 * and that it has something pending.
	 */
	error = -EINVAL;
	if (sk->sk_state != TCP_LISTEN)
		goto out_err;

	//如果监听套接字的accept接收队列为空,则需要根据当前套接字是否阻塞进行操作
	/* Find already established connection */
	if (reqsk_queue_empty(&icsk->icsk_accept_queue)) {
		//根据是否阻塞决定一个超时值,如果为非阻塞模式,那么timeo将为0
		long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);

		//对于非阻塞模式,直接返回重试错误
		/* If this is a non blocking socket don't sleep */
		error = -EAGAIN;
		if (!timeo)
			goto out_err;
		//休眠等待accept接收队列非空
		error = inet_csk_wait_for_connect(sk, timeo);
		if (error)
			goto out_err;
	}
	//到这里,说明当前accept队列已经有连接可以接收(可能是阻塞后被唤醒的)
	newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk);
	BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
out:
	release_sock(sk);
	return newsk;
out_err:
	newsk = NULL;
	*err = error;
	goto out;
}

3.1 获取通信套接字的TCB

在之前介绍服务器端三次握手过程的笔记中,其实就已经知道,这些等待accept()的套接字已经放到了监听套接字的accept()接收队列中。

static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queue,
						 struct sock *parent)
{
	//从accept接收队列上将第一个已完成连接的请求块摘除
	struct request_sock *req = reqsk_queue_remove(queue);
	//记录与该请求块关联的真正的传输控制块(在三次握手完成时创建)
	struct sock *child = req->sk;

	BUG_TRAP(child != NULL);
	//更新监听套接字的accept接收队列中的计数信息(即当前已完成连接的请求数目)
	sk_acceptq_removed(parent);
	//释放该连接请求块,它已经完成了它的使命
	__reqsk_free(req);
	return child;
}

//取出指定队列的第一个节点,典型的链表操作
static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue)
{
	struct request_sock *req = queue->rskq_accept_head;

	BUG_TRAP(req != NULL);

	queue->rskq_accept_head = req->dl_next;
	if (queue->rskq_accept_head == NULL)
		queue->rskq_accept_tail = NULL;

	return req;
}

static inline void sk_acceptq_removed(struct sock *sk)
{
	sk->sk_ack_backlog--;
}

你可能感兴趣的:(linux网络,TCP,accept,inet_accept,inet_csk_accept)