Linux的BSD和INET socket层分别对应于ISO的表示层和会话层,其中有两个比较重要的数据结构:
struct socket { socket_state state; unsigned long flags; struct proto_ops *ops; struct inode *inode; struct fasync_struct *fasync_list; /* Asynchronous wake up list */ struct file *file; /* File back pointer for gc */ struct sock *sk; wait_queue_head_t wait; short type; unsigned char passcred; };
其中state表示socket当前的状态,file和inode指向文件系统的文件描述符和节点信息。每个文件描述符都要对应一个inode节点,其中file->f_inode指向inode节点。
struct proto_ops *ops是一个L6层的处理函数,pops数组将在sock_register中初始化,对应于不同的作用域将被初始化为不同的值。fasync_list用于异步唤醒,对应的函数是sock_wake_async()。
而struct sock *sk 就是我们要说的另外一个非常重要的数据结构,相对于socket只应用于L6层,sock贯穿于L2~L5层并且做过各层之间贯穿的一个纽带。
struct sock { /* Socket demultiplex comparisons on incoming packets. */ __u32 daddr; /* Foreign IPv4 addr */ __u32 rcv_saddr; /* Bound local IPv4 addr */ __u16 dport; /* Destination port */ unsigned short num; /* Local port */ int bound_dev_if; /* Bound device index if != 0 */ /* Main hash linkage for various protocol lookup tables. */ struct sock *next; struct sock **pprev; struct sock *bind_next; struct sock **bind_pprev; volatile unsigned char state, /* Connection state */ zapped; /* In ax25 & ipx means not linked */ __u16 sport; /* Source port */ unsigned short family; /* Address family */ unsigned char reuse; /* SO_REUSEADDR setting */ unsigned char shutdown; atomic_t refcnt; /* Reference count */ socket_lock_t lock; /* Synchronizer... */ int rcvbuf; /* Size of receive buffer in bytes */ wait_queue_head_t *sleep; /* Sock wait queue */ struct dst_entry *dst_cache; /* Destination cache */ rwlock_t dst_lock; atomic_t rmem_alloc; /* Receive queue bytes committed */ struct sk_buff_head receive_queue; /* Incoming packets */ atomic_t wmem_alloc; /* Transmit queue bytes committed */ struct sk_buff_head write_queue; /* Packet sending queue */ atomic_t omem_alloc; /* "o" is "option" or "other" */ int wmem_queued; /* Persistent queue size */ int forward_alloc; /* Space allocated forward. */ __u32 saddr; /* Sending source */ unsigned int allocation; /* Allocation mode */ int sndbuf; /* Size of send buffer in bytes */ struct sock *prev; /* Not all are volatile, but some are, so we might as well say they all are. * XXX Make this a flag word -DaveM */ volatile char dead, done, urginline, keepopen, linger, destroy, no_check, broadcast, bsdism; unsigned char debug; unsigned char rcvtstamp; unsigned char use_write_queue; unsigned char userlocks; /* Hole of 3 bytes. Try to pack. */ int route_caps; int proc; unsigned long lingertime; int hashent; struct sock *pair; /* The backlog queue is special, it is always used with * the per-socket spinlock held and requires low latency * access. Therefore we special case it's implementation. */ struct { struct sk_buff *head; struct sk_buff *tail; } backlog; rwlock_t callback_lock; /* Error queue, rarely used. */ struct sk_buff_head error_queue; struct proto *prot; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) union { struct ipv6_pinfo af_inet6; } net_pinfo; #endif union { struct tcp_opt af_tcp; #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE) struct raw_opt tp_raw4; #endif #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) struct raw6_opt tp_raw; #endif /* CONFIG_IPV6 */ #if defined(CONFIG_SPX) || defined (CONFIG_SPX_MODULE) struct spx_opt af_spx; #endif /* CONFIG_SPX */ } tp_pinfo; int err, err_soft; /* Soft holds errors that don't cause failure but are the cause of a persistent failure not just 'timed out' */ unsigned short ack_backlog; unsigned short max_ack_backlog; __u32 priority; unsigned short type; unsigned char localroute; /* Route locally only */ unsigned char protocol; struct ucred peercred; int rcvlowat; long rcvtimeo; long sndtimeo; #ifdef CONFIG_FILTER /* Socket Filtering Instructions */ struct sk_filter *filter; #endif /* CONFIG_FILTER */ /* This is where all the private (optional) areas that don't * overlap will eventually live. */ union { void *destruct_hook; struct unix_opt af_unix; #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE) struct inet_opt af_inet; #endif #if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE) struct atalk_sock af_at; #endif #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE) struct ipx_opt af_ipx; #endif #if defined (CONFIG_DECNET) || defined(CONFIG_DECNET_MODULE) struct dn_scp dn; #endif #if defined (CONFIG_PACKET) || defined(CONFIG_PACKET_MODULE) struct packet_opt *af_packet; #endif #if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE) x25_cb *x25; #endif #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) ax25_cb *ax25; #endif #if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE) nr_cb *nr; #endif #if defined(CONFIG_ROSE) || defined(CONFIG_ROSE_MODULE) rose_cb *rose; #endif #if defined(CONFIG_PPPOE) || defined(CONFIG_PPPOE_MODULE) struct pppox_opt *pppox; #endif struct netlink_opt *af_netlink; #if defined(CONFIG_ECONET) || defined(CONFIG_ECONET_MODULE) struct econet_opt *af_econet; #endif #if defined(CONFIG_ATM) || defined(CONFIG_ATM_MODULE) struct atm_vcc *af_atm; #endif #if defined(CONFIG_IRDA) || defined(CONFIG_IRDA_MODULE) struct irda_sock *irda; #endif #if defined(CONFIG_WAN_ROUTER) || defined(CONFIG_WAN_ROUTER_MODULE) struct wanpipe_opt *af_wanpipe; #endif } protinfo; /* This part is used for the timeout functions. */ struct timer_list timer; /* This is the sock cleanup timer. */ struct timeval stamp; /* Identd and reporting IO signals */ struct socket *socket; /* RPC layer private data */ void *user_data; /* Callbacks */ void (*state_change)(struct sock *sk); void (*data_ready)(struct sock *sk,int bytes); void (*write_space)(struct sock *sk); void (*error_report)(struct sock *sk); int (*backlog_rcv) (struct sock *sk, struct sk_buff *skb); void (*destruct)(struct sock *sk); };
write_queue表示待发送到队列,send_head和send_tail表示队列数据包已经发送出去尚未收到应答;receive_queue为读队列其不同于back_log在于后者缓冲从网络层传上来的数据包然后移至receive_queue队列,如果此时此时缓冲区太小则直接丢弃从back_log队列上摘下的数据包。