本文纯属个人见解,是对前面学习的总结,如有描述不正确的地方还请高手指正~
明天分析下socket的创立流程
关于用户态socket如何通过系统调用进入核内态到sys_socke,这里就不再分析,直接看核内态socket口入
SYSCALL_DEFINE3(
socket
, int, family, int, type, int, protocol)
{
int retval;
struct socket *sock;
int flags;
/* Check the SOCK_* constants for consistency. */
BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
flags = type & ~SOCK_TYPE_MASK;
if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
return -EINVAL;
type &= SOCK_TYPE_MASK;
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
以上为一系列参数的检查和置配
retval = sock_create(family, type, protocol, &sock); sock创立
if (retval < 0)
goto out;
retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); 与文件系统关联
if (retval < 0)
goto out_release;
out:
/* It may be already another descriptor 8) Not kernel problem. */
return retval;
out_release:
sock_release(sock);
return retval;
}
int
sock_create
(int family, int type, int protocol, struct socket **res)
{
return
__sock_create
(current->nsproxy->net_ns, family, type, protocol, res, 0);
}
static int
__sock_create(struct net *net, int family, int type, int protocol,
struct socket **res, int kern)
->sock =
sock_alloc();
->pf = rcu_dereference(net_families[family]);
获得协议族操纵表
->err = pf->create(net, sock, protocol);
调用协议族操纵创立数函
static struct socket *
sock_alloc(void)
{
struct inode *inode;
struct socket *sock;
inode = new_inode(sock_mnt->mnt_sb);
在网络文件系统中创立inode结构
if (!inode)
return NULL;
sock = SOCKET_I(inode);
从inode结构到得socket结构
kmemcheck_annotate_bitfield(sock, type);
行进初始化作工
inode->i_mode = S_IFSOCK | S_IRWXUGO;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
percpu_add(sockets_in_use, 1);
return sock;
}
->pf = rcu_dereference(
net_families[family]);
获得协议族操纵表
net_families数组内容是通过sock_register注册的
int sock_register(const struct net_proto_family *ops)
{
int err;
if (ops->family >= NPROTO) {
printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
NPROTO);
return -ENOBUFS;
}
spin_lock(&net_family_lock);
if (net_families[ops->family])
err = -EEXIST;
else {
net_families[ops->family] = ops;
err = 0;
}
spin_unlock(&net_family_lock);
printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
return err;
}
我们如果调用socket(
PF_INET,SOCK_STREAM,0);
在inet_init()中
(void)sock_register(&
inet_family_ops); 将
PF_INET协议族的数函操纵表列行进注册
static struct net_proto_family
inet_family_ops
= {
.family =
PF_INET,
.create = inet_create,
.owner = THIS_MODULE,
};
我们看下inet_create数函
static int
inet_create(struct net *net, struct socket *sock, int protocol)
{
struct sock *sk;
struct inet_protosw *answer;
struct inet_sock *inet;
struct proto *answer_prot;
unsigned char answer_flags;
char answer_no_check;
int try_loading_module = 0;
int err;
if (unlikely(!inet_ehash_secret))
密加项处置
if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
build_ehash_secret();
sock->state = SS_UNCONNECTED;
将socket的态状设为未连接态状
/* Look for the requested type/protocol pair. */
lookup_protocol:
err = -ESOCKTNOSUPPORT;
rcu_read_lock();
list_for_each_entry_rcu(answer, &
inetsw[sock->type], list) {
查看核内是不是注册了协议处置
inetsw的结构由void inet_register_protosw(struct inet_protosw *p)数函行进了注册
而inet_init()中
for (q = inetsw_array; q < &
inetsw_array[INETSW_ARRAY_LEN]; ++q)
inet_register_protosw
(q);
追根溯源
static struct inet_protosw inetsw_array[] =
{
{
.type =
SOCK_STREAM, 流套接字
.protocol = IPPROTO_TCP,
.prot = &tcp_prot,
.ops = &inet_stream_ops,
.capability = -1,
.no_check = 0,
.flags = INET_PROTOSW_PERMANENT |
INET_PROTOSW_ICSK,
},
{
.type =
SOCK_DGRAM, 数据包套接字
.protocol = IPPROTO_UDP,
.prot = &udp_prot,
.ops = &inet_dgram_ops,
.capability = -1,
.no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_PERMANENT,
},
{
.type =
SOCK_RAW, 原始套接字
.protocol = IPPROTO_IP, /* wild card */
.prot = &raw_prot,
.ops = &inet_sockraw_ops,
.capability = CAP_NET_RAW,
.no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_REUSE,
}
}
每日一道理
翻开早已发黄的页张,试着寻找过去所留下的点点滴滴的足迹。多年前的好友似乎现在看来已变得陌生,匆忙之间,让这维持了多年的友谊变淡,找不出什么亲切感,只是偶尔遇上,淡淡地微笑,如今也只能在这发黄的页张中找寻过去的那些让人难忘的,至少我可以握住这仅剩下一段的“丝线头”……
sock->ops = answer->ops; 即为
.ops = &inet_stream_ops,
。。。
err = -ENOBUFS;
sk = sk_alloc(net, PF_INET, GFP_KERNEL,
answer_prot);
创立sock结构
以TCP为例
answer_prot 为
inet_stream_ops
对sock的sk_prot的置设:sk->sk_prot = sk->sk_prot_creator = prot;
if (sk == NULL)
goto out;
。。。
sock_init_data(sock, sk); 初始化sock与socket结构,并将两者行进关联
sk->sk_destruct = inet_sock_destruct;
sk->sk_protocol = protocol;
sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
inet->uc_ttl = -1;
inet->mc_loop = 1;
inet->mc_ttl = 1;
inet->mc_all = 1;
inet->mc_index = 0;
inet->mc_list = NULL;
sk_refcnt_debug_inc(sk);
if (inet->num) {
/* It assumes that any protocol which allows
* the user to assign a number at socket
* creation time automatically
* shares.
*/
inet->sport = htons(inet->num);
/* Add to protocol hash chains. */
sk->sk_prot->hash(sk);
}
if (sk->sk_prot->init) {
err =
sk->sk_prot->init(sk);
if (err)
sk_common_release(sk);
}
out:
return err;
out_rcu_unlock:
rcu_read_unlock();
goto out;
}
const struct proto_ops inet_stream_ops = { 该结构为TCP类型的sock 操纵数函表列 括包listen bind accept poll 等数函
.family = PF_INET,
.owner = THIS_MODULE,
.release = inet_release,
.bind = inet_bind,
.connect = inet_stream_connect,
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = inet_getname,
.poll = tcp_poll,
.ioctl = inet_ioctl,
.listen = inet_listen,
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
.getsockopt = sock_common_getsockopt,
.sendmsg = tcp_sendmsg,
.recvmsg = sock_common_recvmsg,
.mmap = sock_no_mmap,
.sendpage = tcp_sendpage,
.splice_read = tcp_splice_read,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
#endif
};
struct proto tcp_prot = { TCP协议注册的协议层处置数函
.name = "TCP",
.owner = THIS_MODULE,
.close = tcp_close,
.connect = tcp_v4_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
.ioctl = tcp_ioctl,
.init = tcp_v4_init_sock,
.destroy = tcp_v4_destroy_sock,
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
.recvmsg = tcp_recvmsg,
.backlog_rcv = tcp_v4_do_rcv,
.hash = inet_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
.sockets_allocated = &tcp_sockets_allocated,
.orphan_count = &tcp_orphan_count,
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
.sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem,
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp_sock),
.slab_flags = SLAB_DESTROY_BY_RCU,
.twsk_prot = &tcp_timewait_sock_ops,
.rsk_prot = &tcp_request_sock_ops,
.h.hashinfo = &tcp_hashinfo,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
};
sock_create(family, type, protocol, &sock);
socket的创立进程基本就是这样 ,从网络文件系统中申请了inode,并到得并建立socket结构 ,申请了新的sock结构,并将两者行进关联
根据socket(family,type ,flag)传入的参数协议族类型、是流式套接字还是数据报套接字等载挂响应的处置数函集等。
我们再看下
sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); 的作工
int sock_map_fd(struct socket *sock, int flags)
{
struct file *newfile;
int fd =
sock_alloc_fd(&newfile, flags);
为socket分配文件号与文件结构
if (likely(fd >= 0)) {
int err =
sock_attach_fd(sock, newfile, flags);
将socket与file文件结构行进关联
if (unlikely(err < 0)) {
put_filp(newfile);
put_unused_fd(fd);
return err;
}
fd_install(fd, newfile); 将文件号与文件挂钩
}
return fd;
}
static int
sock_attach_fd(struct socket *sock, struct file *file, int flags)
{
struct dentry *dentry;
struct qstr name = { .name = "" };
dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); 分配目录项
if (unlikely(!dentry))
return -ENOMEM;
dentry->d_op = &sockfs_dentry_operations;
/*
* We dont want to push this dentry into global dentry hash table.
* We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
* This permits a working /proc/$pid/fd/XXX on sockets
*/
dentry->d_flags &= ~DCACHE_UNHASHED;
d_instantiate(dentry, SOCK_INODE(sock));
将目录项与inode关联
sock->file = file;
init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
&
socket_file_ops
);
对file结构成员赋值,并将file->f_op 新更为
socket_file_ops socket类型文件的操纵数函集
SOCK_INODE(sock)->i_fop = &socket_file_ops;
file->f_flags = O_RDWR | (flags & O_NONBLOCK);
file->f_pos = 0;
file->private_data = sock; file的私有数据被新更为socket变量
return 0;
}
摘一副linux核内源代码景情分析中的图,要主数据结构的组织关系大概就是这样
文章结束给大家分享下程序员的一些笑话语录: 看到有人回帖“不顶不是中国人”,他的本意是想让帖子沉了。