一, BSD
/**
* struct socket - general BSD socket
* @state - socket state (%SS_CONNECTED, etc)
* @flags - socket flags (%SOCK_ASYNC_NOSPACE, etc)
* @ops - protocol specific socket operations
* @fasync_list - Asynchronous wake up list
* @file - File back pointer for gc
* @sk - internal networking protocol agnostic socket representation
* @wait - wait queue for several uses
* @type - socket type (%SOCK_STREAM, etc)
* @passcred - credentials (used only in Unix Sockets (aka PF_LOCAL))
*/
struct socket {
socket_state state;
unsigned long flags;
struct proto_ops *ops;
struct fasync_struct *fasync_list;
struct file *file;
struct sock *sk;
wait_queue_head_t wait;
short type;
unsigned char passcred;
};
struct proto_ops {
int family;
struct module *owner;
int (*release) (struct socket *sock);
int (*bind) (struct socket *sock,
struct sockaddr *myaddr,
int sockaddr_len);
int (*connect) (struct socket *sock,
struct sockaddr *vaddr,
int sockaddr_len, int flags);
int (*socketpair)(struct socket *sock1,
struct socket *sock2);
int (*accept) (struct socket *sock,
struct socket *newsock, int flags);
int (*getname) (struct socket *sock,
struct sockaddr *addr,
int *sockaddr_len, int peer);
unsigned int (*poll) (struct file *file, struct socket *sock,
struct poll_table_struct *wait);
int (*ioctl) (struct socket *sock, unsigned int cmd,
unsigned long arg);
int (*listen) (struct socket *sock, int len);
int (*shutdown) (struct socket *sock, int flags);
int (*setsockopt)(struct socket *sock, int level,
int optname, char __user *optval, int optlen);
int (*getsockopt)(struct socket *sock, int level,
int optname, char __user *optval, int __user *optlen);
int (*sendmsg) (struct kiocb *iocb, struct socket *sock,
struct msghdr *m, size_t total_len);
int (*recvmsg) (struct kiocb *iocb, struct socket *sock,
struct msghdr *m, size_t total_len,
int flags);
int (*mmap) (struct file *file, struct socket *sock,
struct vm_area_struct * vma);
ssize_t (*sendpage) (struct socket *sock, struct page *page,
int offset, size_t size, int flags);
};
struct fasync_struct {
int magic;
int fa_fd;
struct fasync_struct *fa_next; /* singly linked list */
struct file *fa_file;
};
struct file {
struct file *next;
struct file *parent;
char *name;
int lineno;
int flags;
};
struct __wait_queue_head {
spinlock_t lock;
struct list_head task_list;
};
typedef struct __wait_queue_head wait_queue_head_t;
***************************************************************************
/* INET socket层: */
***************************************************************************
/**
* struct sock - network layer representation of sockets
* @__sk_common - shared layout with tcp_tw_bucket
* @sk_zapped - ax25 & ipx means !linked
* @sk_shutdown - mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN
* @sk_use_write_queue - wheter to call sk->sk_write_space in sock_wfree
* @sk_userlocks - %SO_SNDBUF and %SO_RCVBUF settings
* @sk_lock - synchronizer
* @sk_rcvbuf - size of receive buffer in bytes
* @sk_sleep - sock wait queue
* @sk_dst_cache - destination cache
* @sk_dst_lock - destination cache lock
* @sk_policy - flow policy
* @sk_rmem_alloc - receive queue bytes committed
* @sk_receive_queue - incoming packets
* @sk_wmem_alloc - transmit queue bytes committed
* @sk_write_queue - Packet sending queue
* @sk_omem_alloc - "o" is "option" or "other"
* @sk_wmem_queued - persistent queue size
* @sk_forward_alloc - space allocated forward
* @sk_allocation - allocation mode
* @sk_sndbuf - size of send buffer in bytes
* @sk_flags - %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, %SO_OOBINLINE settings
* @sk_no_check - %SO_NO_CHECK setting, wether or not checkup packets
* @sk_debug - %SO_DEBUG setting
* @sk_rcvtstamp - %SO_TIMESTAMP setting
* @sk_no_largesend - whether to sent large segments or not
* @sk_route_caps - route capabilities (e.g. %NETIF_F_TSO)
* @sk_lingertime - %SO_LINGER l_linger setting
* @sk_hashent - hash entry in several tables (e.g. tcp_ehash)
* @sk_backlog - always used with the per-socket spinlock held
* @sk_callback_lock - used with the callbacks in the end of this struct
* @sk_error_queue - rarely used
* @sk_prot - protocol handlers inside a network family
* @sk_err - last error
* @sk_err_soft - errors that don't cause failure but are the cause of a persistent failure not just 'timed out'
* @sk_ack_backlog - current listen backlog
* @sk_max_ack_backlog - listen backlog set in listen()
* @sk_priority - %SO_PRIORITY setting
* @sk_type - socket type (%SOCK_STREAM, etc)
* @sk_localroute - route locally only, %SO_DONTROUTE setting
* @sk_protocol - which protocol this socket belongs in this network family
* @sk_peercred - %SO_PEERCRED setting
* @sk_rcvlowat - %SO_RCVLOWAT setting
* @sk_rcvtimeo - %SO_RCVTIMEO setting
* @sk_sndtimeo - %SO_SNDTIMEO setting
* @sk_filter - socket filtering instructions
* @sk_protinfo - private area, net family specific, when not using slab
* @sk_slab - the slabcache this instance was allocated from
* @sk_timer - sock cleanup timer
* @sk_stamp - time stamp of last packet received
* @sk_socket - Identd and reporting IO signals
* @sk_user_data - RPC layer private data
* @sk_owner - module that owns this socket
* @sk_sndmsg_page - cached page for sendmsg
* @sk_sndmsg_off - cached offset for sendmsg
* @sk_send_head - front of stuff to transmit
* @sk_write_pending - a write to stream socket waits to start
* @sk_queue_shrunk - write queue has been shrunk recently
* @sk_state_change - callback to indicate change in the state of the sock
* @sk_data_ready - callback to indicate there is data to be processed
* @sk_write_space - callback to indicate there is bf sending space available
* @sk_error_report - callback to indicate errors (e.g. %MSG_ERRQUEUE)
* @sk_backlog_rcv - callback to process the backlog
* @sk_destruct - called at sock freeing time, i.e. when all refcnt == 0
*/
struct sock {
/*
* Now struct tcp_tw_bucket also uses sock_common, so please just
* don't add nothing before this first member (__sk_common) --acme
*/
struct sock_common __sk_common;
#define sk_family __sk_common.skc_family
#define sk_state __sk_common.skc_state
#define sk_reuse __sk_common.skc_reuse
#define sk_bound_dev_if __sk_common.skc_bound_dev_if
#define sk_node __sk_common.skc_node
#define sk_bind_node __sk_common.skc_bind_node
#define sk_refcnt __sk_common.skc_refcnt
volatile unsigned char sk_zapped;
unsigned char sk_shutdown;
unsigned char sk_use_write_queue;
unsigned char sk_userlocks;
socket_lock_t sk_lock;
int sk_rcvbuf;
wait_queue_head_t *sk_sleep;
struct dst_entry *sk_dst_cache;
rwlock_t sk_dst_lock;
struct xfrm_policy *sk_policy[2];
atomic_t sk_rmem_alloc;
struct sk_buff_head sk_receive_queue;
atomic_t sk_wmem_alloc;
struct sk_buff_head sk_write_queue;
atomic_t sk_omem_alloc;
int sk_wmem_queued;
int sk_forward_alloc;
unsigned int sk_allocation;
int sk_sndbuf;
unsigned long sk_flags;
char sk_no_check;
unsigned char sk_debug;
unsigned char sk_rcvtstamp;
unsigned char sk_no_largesend;
int sk_route_caps;
unsigned long sk_lingertime;
int sk_hashent;
/*
* The backlog queue is special, it is always used with
* the per-socket spinlock held and requires low latency
* access. Therefore we special case it's implementation.
*/
struct {
struct sk_buff *head;
struct sk_buff *tail;
} sk_backlog;
rwlock_t sk_callback_lock;
struct sk_buff_head sk_error_queue;
struct proto *sk_prot;
int sk_err,
sk_err_soft;
unsigned short sk_ack_backlog;
unsigned short sk_max_ack_backlog;
__u32 sk_priority;
unsigned short sk_type;
unsigned char sk_localroute;
unsigned char sk_protocol;
struct ucred sk_peercred;
int sk_rcvlowat;
long sk_rcvtimeo;
long sk_sndtimeo;
struct sk_filter *sk_filter;
void *sk_protinfo;
kmem_cache_t *sk_slab;
struct timer_list sk_timer;
struct timeval sk_stamp;
struct socket *sk_socket;
void *sk_user_data;
struct module *sk_owner;
struct page *sk_sndmsg_page;
__u32 sk_sndmsg_off;
struct sk_buff *sk_send_head;
int sk_write_pending;
void *sk_security;
__u8 sk_queue_shrunk;
/* three bytes hole, try to pack */
void (*sk_state_change)(struct sock *sk);
void (*sk_data_ready)(struct sock *sk, int bytes);
void (*sk_write_space)(struct sock *sk);
void (*sk_error_report)(struct sock *sk);
int (*sk_backlog_rcv)(struct sock *sk,
struct sk_buff *skb);
void (*sk_destruct)(struct sock *sk);
};
/**
* struct sock_common - minimal network layer representation of sockets
* @skc_family - network address family
* @skc_state - Connection state
* @skc_reuse - %SO_REUSEADDR setting
* @skc_bound_dev_if - bound device index if != 0
* @skc_node - main hash linkage for various protocol lookup tables
* @skc_bind_node - bind hash linkage for various protocol lookup tables
* @skc_refcnt - reference count
*
* This is the minimal network layer representation of sockets, the header
* for struct sock and struct tcp_tw_bucket.
*/
struct sock_common {
unsigned short skc_family;
volatile unsigned char skc_state;
unsigned char skc_reuse;
int skc_bound_dev_if;
struct hlist_node skc_node;
struct hlist_node skc_bind_node;
atomic_t skc_refcnt;
};
/* This is the per-socket lock. The spinlock provides a synchronization
* between user contexts and software interrupt processing, whereas the
* mini-semaphore synchronizes multiple users amongst themselves.
*/
struct sock_iocb;
typedef struct {
spinlock_t slock;
struct sock_iocb *owner;
wait_queue_head_t wq;
} socket_lock_t;
/* Each dst_entry has reference count and sits in some parent list(s).
* When it is removed from parent list, it is "freed" (dst_free).
* After this it enters dead state (dst->obsolete > 0) and if its refcnt
* is zero, it can be destroyed immediately, otherwise it is added
* to gc list and garbage collector periodically checks the refcnt.
*/
struct sk_buff;
struct dst_entry
{
struct dst_entry *next;
atomic_t __refcnt; /* client references */
int __use;
struct dst_entry *child;
struct net_device *dev;
int obsolete;
int flags;
#define DST_HOST 1
#define DST_NOXFRM 2
#define DST_NOPOLICY 4
#define DST_NOHASH 8
unsigned long lastuse;
unsigned long expires;
unsigned short header_len; /* more space at head required */
unsigned short trailer_len; /* space to reserve at tail */
u32 metrics[RTAX_MAX];
struct dst_entry *path;
unsigned long rate_last; /* rate limiting for ICMP */
unsigned long rate_tokens;
int error;
struct neighbour *neighbour;
struct hh_cache *hh;
struct xfrm_state *xfrm;
int (*input)(struct sk_buff*);
int (*output)(struct sk_buff*);
#ifdef CONFIG_NET_CLS_ROUTE
__u32 tclassid;
#endif
struct dst_ops *ops;
struct rcu_head rcu_head;
char info[0];
};
struct sk_buff_head {
/* These two members must be first. */
struct sk_buff *next;
struct sk_buff *prev;
__u32 qlen;
spinlock_t lock;
};
**********************************************************************
/* TCP/UDP层:处理传输层的操作,传输层用struct inet_protocol和struct proto两个结构表示。文件主要
*/
************************************************************************/
struct {
int (*open)(struct net_device *dev);
void (*close)(struct net_device *dev);
/* if open & DCD */
void (*start)(struct net_device *dev);
/* if open & !DCD */
void (*stop)(struct net_device *dev);
void (*detach)(struct hdlc_device_struct *hdlc);
int (*netif_rx)(struct sk_buff *skb);
unsigned short (*type_trans)(struct sk_buff *skb,
struct net_device *dev);
int id; /* IF_PROTO_HDLC/CISCO/FR/etc. */
}proto;
/*****************************************************************************
/*IP层:处理网络层的操作*/
*****************************************************************************/
struct packet_type {
unsigned short type; /* This is really htons(ether_type). */
struct net_device *dev; /* NULL is wildcarded here */
int (*func) (struct sk_buff *, struct net_device *,
struct packet_type *);
void *af_packet_priv;
struct list_head list;
};
/*
* The DEVICE structure.
* Actually, this whole structure is a big mistake. It mixes I/O
* data with strictly "high-level" data, and it has to know about
* almost every data structure used in the INET module.
*
* FIXME: cleanup struct net_device such that network protocol info
* moves out.
*/
struct net_device
{
/*
* This is the first field of the "visible" part of this structure
* (i.e. as seen by users in the "Space.c" file). It is the name
* the interface.
*/
char name[IFNAMSIZ];
/*
* I/O specific fields
* FIXME: Merge these and struct ifmap into one
*/
unsigned long mem_end; /* shared mem end */
unsigned long mem_start; /* shared mem start */
unsigned long base_addr; /* device I/O address */
unsigned int irq; /* device IRQ number */
/*
* Some hardware also needs these fields, but they are not
* part of the usual set specified in Space.c.
*/
unsigned char if_port; /* Selectable AUI, TP,..*/
unsigned char dma; /* DMA channel */
unsigned long state;
struct net_device *next;
/* The device initialization function. Called only once. */
int (*init)(struct net_device *dev);
/* ------- Fields preinitialized in Space.c finish here ------- */
struct net_device *next_sched;
/* Interface index. Unique device identifier */
int ifindex;
int iflink;
struct net_device_stats* (*get_stats)(struct net_device *dev);
struct iw_statistics* (*get_wireless_stats)(struct net_device *dev);
/* List of functions to handle Wireless Extensions (instead of ioctl).
* See <net/iw_handler.h> for details. Jean II */
const struct iw_handler_def * wireless_handlers;
/* Instance data managed by the core of Wireless Extensions. */
struct iw_public_data * wireless_data;
struct ethtool_ops *ethtool_ops;
/*
* This marks the end of the "visible" part of the structure. All
* fields hereafter are internal to the system, and may change at
* will (read: may be cleaned up at will).
*/
/* These may be needed for future network-power-down code. */
unsigned long trans_start; /* Time (in jiffies) of last Tx */
unsigned long last_rx; /* Time of last Rx */
unsigned short flags; /* interface flags (a la BSD) */
unsigned short gflags;
unsigned short priv_flags; /* Like 'flags' but invisible to userspace. */
unsigned short unused_alignment_fixer; /* Because we need priv_flags,
* and we want to be 32-bit aligned.
*/
unsigned mtu; /* interface MTU value */
unsigned short type; /* interface hardware type */
unsigned short hard_header_len; /* hardware hdr length */
void *priv; /* pointer to private data */
struct net_device *master; /* Pointer to master device of a group,
* which this device is member of.
*/
/* Interface address info. */
unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */
unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */
unsigned char addr_len; /* hardware address length */
struct dev_mc_list *mc_list; /* Multicast mac addresses */
int mc_count; /* Number of installed mcasts */
int promiscuity;
int allmulti;
int watchdog_timeo;
struct timer_list watchdog_timer;
/* Protocol specific pointers */
void *atalk_ptr; /* AppleTalk link */
void *ip_ptr; /* IPv4 specific data */
void *dn_ptr; /* DECnet specific data */
void *ip6_ptr; /* IPv6 specific data */
void *ec_ptr; /* Econet specific data */
void *ax25_ptr; /* AX.25 specific data */
struct list_head poll_list; /* Link to poll list */
int quota;
int weight;
struct Qdisc *qdisc;
struct Qdisc *qdisc_sleeping;
struct Qdisc *qdisc_ingress;
struct list_head qdisc_list;
unsigned long tx_queue_len; /* Max frames per queue allowed */
/* ingress path synchronizer */
spinlock_t ingress_lock;
/* hard_start_xmit synchronizer */
spinlock_t xmit_lock;
/* cpu id of processor entered to hard_start_xmit or -1,
if nobody entered there.
*/
int xmit_lock_owner;
/* device queue lock */
spinlock_t queue_lock;
/* Number of references to this device */
atomic_t refcnt;
/* delayed register/unregister */
struct list_head todo_list;
/* device name hash chain */
struct hlist_node name_hlist;
/* device index hash chain */
struct hlist_node index_hlist;
/* register/unregister state machine */
enum { NETREG_UNINITIALIZED=0,
NETREG_REGISTERING, /* called register_netdevice */
NETREG_REGISTERED, /* completed register todo */
NETREG_UNREGISTERING, /* called unregister_netdevice */
NETREG_UNREGISTERED, /* completed unregister todo */
NETREG_RELEASED, /* called free_netdev */
} reg_state;
/* Net device features */
int features;
#define NETIF_F_SG 1 /* Scatter/gather IO. */
#define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */
#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */
#define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */
#define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */
#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */
#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */
#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */
#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */
#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */
#define NETIF_F_LLTX 4096 /* LockLess TX */
/* Called after device is detached from network. */
void (*uninit)(struct net_device *dev);
/* Called after last user reference disappears. */
void (*destructor)(struct net_device *dev);
/* Pointers to interface service routines. */
int (*open)(struct net_device *dev);
int (*stop)(struct net_device *dev);
int (*hard_start_xmit) (struct sk_buff *skb,
struct net_device *dev);
#define HAVE_NETDEV_POLL
int (*poll) (struct net_device *dev, int *quota);
int (*hard_header) (struct sk_buff *skb,
struct net_device *dev,
unsigned short type,
void *daddr,
void *saddr,
unsigned len);
int (*rebuild_header)(struct sk_buff *skb);
#define HAVE_MULTICAST
void (*set_multicast_list)(struct net_device *dev);
#define HAVE_SET_MAC_ADDR
int (*set_mac_address)(struct net_device *dev,
void *addr);
#define HAVE_PRIVATE_IOCTL
int (*do_ioctl)(struct net_device *dev,
struct ifreq *ifr, int cmd);
#define HAVE_SET_CONFIG
int (*set_config)(struct net_device *dev,
struct ifmap *map);
#define HAVE_HEADER_CACHE
int (*hard_header_cache)(struct neighbour *neigh,
struct hh_cache *hh);
void (*header_cache_update)(struct hh_cache *hh,
struct net_device *dev,
unsigned char * haddr);
#define HAVE_CHANGE_MTU
int (*change_mtu)(struct net_device *dev, int new_mtu);
#define HAVE_TX_TIMEOUT
void (*tx_timeout) (struct net_device *dev);
void (*vlan_rx_register)(struct net_device *dev,
struct vlan_group *grp);
void (*vlan_rx_add_vid)(struct net_device *dev,
unsigned short vid);
void (*vlan_rx_kill_vid)(struct net_device *dev,
unsigned short vid);
int (*hard_header_parse)(struct sk_buff *skb,
unsigned char *haddr);
int (*neigh_setup)(struct net_device *dev, struct neigh_parms *);
int (*accept_fastpath)(struct net_device *, struct dst_entry*);
#ifdef CONFIG_NETPOLL
int netpoll_rx;
#endif
#ifdef CONFIG_NET_POLL_CONTROLLER
void (*poll_controller)(struct net_device *dev);
#endif
/* bridge stuff */
struct net_bridge_port *br_port;
#ifdef CONFIG_NET_DIVERT
/* this will get initialized at each interface type init routine */
struct divert_blk *divert;
#endif /* CONFIG_NET_DIVERT */
/* class/net/name entry */
struct class_device class_dev;
/* how much padding had been added by alloc_netdev() */
int padded;
};