->err = pf->create(net, sock, protocol);
调用协议族操作创建函数
static struct socket *
sock_alloc(void)
{
struct inode *inode;
struct socket *sock;
inode = new_inode(sock_mnt->mnt_sb);
在网络文件系统中创建inode结构
if (!inode)
return NULL;
sock = SOCKET_I(inode);
从inode结构得到socket结构
kmemcheck_annotate_bitfield(sock, type);
进行初始化工作
inode->i_mode = S_IFSOCK | S_IRWXUGO;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
percpu_add(sockets_in_use, 1);
return sock;
}
->pf = rcu_dereference(
net_families[family]);
取得协议族操作表
net_families数组内容是通过sock_register注册的
int sock_register(const struct net_proto_family *ops)
{
int err;
if (ops->family >= NPROTO) {
printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
NPROTO);
return -ENOBUFS;
}
spin_lock(&net_family_lock);
if (net_families[ops->family])
err = -EEXIST;
else {
net_families[ops->family] = ops;
err = 0;
}
spin_unlock(&net_family_lock);
printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
return err;
}
我们如果调用socket(
PF_INET,SOCK_STREAM,0);
在inet_init()中
(void)sock_register(&
inet_family_ops); 将
PF_INET协议族的函数操作列表进行注册
static struct net_proto_family
inet_family_ops
= {
.family =
PF_INET,
.create = inet_create,
.owner = THIS_MODULE,
};
我们看下inet_create函数
static int
inet_create(struct net *net, struct socket *sock, int protocol)
{
struct sock *sk;
struct inet_protosw *answer;
struct inet_sock *inet;
struct proto *answer_prot;
unsigned char answer_flags;
char answer_no_check;
int try_loading_module = 0;
int err;
if (unlikely(!inet_ehash_secret))
加密项处理
if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
build_ehash_secret();
sock->state = SS_UNCONNECTED;
将socket的状态设为未连接状态
/* Look for the requested type/protocol pair. */
lookup_protocol:
err = -ESOCKTNOSUPPORT;
rcu_read_lock();
list_for_each_entry_rcu(answer, &
inetsw[sock->type], list) {
sock->ops = answer->ops; 即为
.ops = &inet_stream_ops,
。。。
err = -ENOBUFS;
sk = sk_alloc(net, PF_INET, GFP_KERNEL,
answer_prot);
创建sock结构
以TCP为例
answer_prot 为
inet_stream_ops
对sock的sk_prot的设置:sk->sk_prot = sk->sk_prot_creator = prot;
if (sk == NULL)
goto out;
。。。
sock_init_data(sock, sk); 初始化sock与socket结构,并将两者进行关联
sk->sk_destruct = inet_sock_destruct;
sk->sk_protocol = protocol;
sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
inet->uc_ttl = -1;
inet->mc_loop = 1;
inet->mc_ttl = 1;
inet->mc_all = 1;
inet->mc_index = 0;
inet->mc_list = NULL;
sk_refcnt_debug_inc(sk);
if (inet->num) {
/* It assumes that any protocol which allows
* the user to assign a number at socket
* creation time automatically
* shares.
*/
inet->sport = htons(inet->num);
/* Add to protocol hash chains. */
sk->sk_prot->hash(sk);
}
if (sk->sk_prot->init) {
err =
sk->sk_prot->init(sk);
if (err)
sk_common_release(sk);
}
out:
return err;
out_rcu_unlock:
rcu_read_unlock();
goto out;
}
const struct proto_ops inet_stream_ops = { 该结构为TCP类型的sock 操作函数列表 包括listen bind accept poll 等函数
.family = PF_INET,
.owner = THIS_MODULE,
.release = inet_release,
.bind = inet_bind,
.connect = inet_stream_connect,
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = inet_getname,
.poll = tcp_poll,
.ioctl = inet_ioctl,
.listen = inet_listen,
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
.getsockopt = sock_common_getsockopt,
.sendmsg = tcp_sendmsg,
.recvmsg = sock_common_recvmsg,
.mmap = sock_no_mmap,
.sendpage = tcp_sendpage,
.splice_read = tcp_splice_read,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
#endif
};
struct proto tcp_prot = { TCP协议注册的协议层处理函数
.name = "TCP",
.owner = THIS_MODULE,
.close = tcp_close,
.connect = tcp_v4_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
.ioctl = tcp_ioctl,
.init = tcp_v4_init_sock,
.destroy = tcp_v4_destroy_sock,
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
.recvmsg = tcp_recvmsg,
.backlog_rcv = tcp_v4_do_rcv,
.hash = inet_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
.sockets_allocated = &tcp_sockets_allocated,
.orphan_count = &tcp_orphan_count,
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
.sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem,
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp_sock),
.slab_flags = SLAB_DESTROY_BY_RCU,
.twsk_prot = &tcp_timewait_sock_ops,
.rsk_prot = &tcp_request_sock_ops,
.h.hashinfo = &tcp_hashinfo,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
};
sock_create(family, type, protocol, &sock);
socket的创建过程基本就是这样 ,从网络文件系统中申请了inode,并得到并建立socket结构 ,申请了新的sock结构,并将两者进行关联
根据socket(family,type ,flag)传入的参数协议族类型、是流式套接字还是数据报套接字等挂载相应的处理函数集等。
我们再看下
sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); 的工作
int sock_map_fd(struct socket *sock, int flags)
{
struct file *newfile;
int fd =
sock_alloc_fd(&newfile, flags);
为socket分配文件号与文件结构
if (likely(fd >= 0)) {
int err =
sock_attach_fd(sock, newfile, flags);
将socket与file文件结构进行关联
if (unlikely(err < 0)) {
put_filp(newfile);
put_unused_fd(fd);
return err;
}
fd_install(fd, newfile); 将文件号与文件挂钩
}
return fd;
}
static int
sock_attach_fd(struct socket *sock, struct file *file, int flags)
{
struct dentry *dentry;
struct qstr name = { .name = "" };
dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); 分配目录项
if (unlikely(!dentry))
return -ENOMEM;
dentry->d_op = &sockfs_dentry_operations;
/*
* We dont want to push this dentry into global dentry hash table.
* We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
* This permits a working /proc/$pid/fd/XXX on sockets
*/
dentry->d_flags &= ~DCACHE_UNHASHED;
d_instantiate(dentry, SOCK_INODE(sock));
将目录项与inode关联
sock->file = file;
init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
&
socket_file_ops
);
对file结构成员赋值,并将file->f_op 更新为
socket_file_ops socket类型文件的操作函数集
SOCK_INODE(sock)->i_fop = &socket_file_ops;
file->f_flags = O_RDWR | (flags & O_NONBLOCK);
file->f_pos = 0;
file->private_data = sock; file的私有数据被更新为socket变量
return 0;
}
摘一副linux内核源代码情景分析中的图,主要数据结构的组织关系大概就是这样