Socket与系统调用深度分析

首先来看看sys_socket中的函数调用关系:

sys_socket

       |

       +--------- sock_create

       |                    |

       |                    +------- __sock_create

       |                                         |

       |                                         +------- security_socket_create

       |                                         +-------- sock_alloc()

       |                                         +--------- rcu_dereference(net_families[family])

       |                                         +--------- pf->create(net, sock, protocol, kern)

       |                                         +--------- module_put(pf->owner)

       |                                         +--------- security_socket_post_create

       +---------- sock_map_fd

sys_socket 调用sock_create函数,最终调用rcu_dereference函数来得到相应的net_family_ops,在这里是inet_family_ops,然后调用inet_family_ops结构中的create函数,这里是inet_create函数,来创建socket。sock_map_fd是得到一个文件号。

当使用socket(int,int,int)创建一个socket时,socket会调用sys_socket来完成socket的创建。

本次我们来看看socket()函数系统调用过程。

sys_socket()函数声明如下:

asmlinkage long sys_socket(int, int, int);
同样地,sys_socket()函数实现为:

1. sys_socket()
SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
{
int retval;
struct socket *sock;
int flags;

/* Check the SOCK_* constants for consistency. */
BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);

flags = type & ~SOCK_TYPE_MASK;
if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
return -EINVAL;
type &= SOCK_TYPE_MASK;

if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;

/*创建socket及inode*/
retval = sock_create(family, type, protocol, &sock);
if (retval < 0)
goto out;

/*创建file,完成fd与file绑定,file与socket绑定*/
retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
if (retval < 0)
goto out_release;

out:
/* It may be already another descriptor 8) Not kernel problem. */
return retval;

out_release:
sock_release(sock);
return retval;
}
参数kern:表示由应用程序还是内核创建该套接口,一般为0(表示应用程序),或者1(表示内核)。

2. sock_create()函数:
这个函数是对__socket_create函数的封装,直接调用__sock_create()函数。

int sock_create(int family, int type, int protocol, struct socket **res)
{
return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
}
3. __sock_create()函数

创建socket及inode

int __sock_create(struct net *net, int family, int type, int protocol,
struct socket **res, int kern)
{
int err;
struct socket *sock;
const struct net_proto_family *pf;

/*
* Check protocol is in range
*/
/*family和type字段范围检查*/
if (family < 0 || family >= NPROTO)
return -EAFNOSUPPORT;
if (type < 0 || type >= SOCK_MAX)
return -EINVAL;

/* Compatibility.
This uglymoron is moved from INET layer to here to avoid
deadlock in module load.
*/
/*兼容性考虑,IPv4协议族的SOCK_PACKET已经废弃,当family ==F_INET && type == SOCK_PACKET时,
强制把family改为PF_PACKET。*/
if (family == PF_INET && type == SOCK_PACKET) {
static int warned;
if (!warned) {
warned = 1;
pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
current->comm);
}
family = PF_PACKET;
}

/*安全模块对套接口的创建做检查,安全模块不是网络中必需的组成部门,不做讨论。*/
// 检查权限,并考虑协议集、类型、协议,以及 socket 是在内核中创建还是在用户空间中创建
// 可以参考:https://www.ibm.com/developerworks/cn/linux/l-selinux/
err = security_socket_create(family, type, protocol, kern);
if (err)
return err;

/*
* Allocate the socket and allow the family to set things up. if
* the protocol is 0, the family is instructed to select an appropriate
* default.
*/
/*调用sock_alloc()在sock_inode_cache缓存中分配与套接口关联的i结点和套接口,同时
初始化i结点和套接口,失败则直接返回错误码。*/
sock = sock_alloc();
if (!sock) {
net_warn_ratelimited("socket: no more sockets\n");
return -ENFILE; /* Not exactly a match, but its the
closest posix thing */
}

sock->type = type;

/*如果协议族支持内核模块动态加载,但在创建此协议族类型的套接字时,内核模块并未被加载,则调用
request_module()进行内核模块的动态加载。*/
#ifdef CONFIG_MODULES
/* Attempt to load a protocol module if the find failed.
*
* 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
* requested real, full-featured networking support upon configuration.
* Otherwise module support will break!
*/
if (rcu_access_pointer(net_families[family]) == NULL)
request_module("net-pf-%d", family);
#endif

rcu_read_lock();

/*获取对应协议的net_proto_family指针*/
pf = rcu_dereference(net_families[family]);
err = -EAFNOSUPPORT;
if (!pf)
goto out_release;

/*
* We will call the ->create function, that possibly is in a loadable
* module, so we have to bump that loadable module refcnt first.
*/
/*如果对应协议族模块是动态加载到内核中去的,则对此内核模块的应用计数+1,以防
在创建过程中,该模块被卸载,造成严重的后果。*/
if (!try_module_get(pf->owner))
goto out_release;

/* Now protected by module ref count */
rcu_read_unlock();

/*在IPv4协议族中调用inet_create()对已创建的socket继续进行初始化,同时创建网络层socket。*/
err = pf->create(net, sock, protocol, kern);
if (err < 0)
goto out_module_put;

/*
* Now to bump the refcnt of the [loadable] module that owns this
* socket at sock_release time we decrement its refcnt.
*/
/*如果proto_ops结构实例所在模块以内核模块方式动态加载进内核,
则增加该模块的引用计数,在sock_release时,减小该计数。*/
if (!try_module_get(sock->ops->owner))
goto out_module_busy;

/*
* Now that we're done with the ->create function, the [loadable]
* module can have its refcnt decremented
*/
/*调用完inet_create函数后,对此模块的引用计数减一。*/
module_put(pf->owner);

/*安全模块对创建后的socket做安全检查,不做讨论。*/
err = security_socket_post_create(sock, family, type, protocol, kern);
if (err)
goto out_sock_release;
*res = sock;

return 0;

out_module_busy:
err = -EAFNOSUPPORT;
out_module_put:
sock->ops = NULL;
module_put(pf->owner);
out_sock_release:
sock_release(sock);
return err;

out_release:
rcu_read_unlock();
goto out_sock_release;
}
 

你可能感兴趣的:(Socket与系统调用深度分析)