LINUX 内核协议栈各层的数据结构

 

一, BSD

/**
 *  struct socket - general BSD socket
 *  @state - socket state (%SS_CONNECTED, etc)
 *  @flags - socket flags (%SOCK_ASYNC_NOSPACE, etc)
 *  @ops - protocol specific socket operations
 *  @fasync_list - Asynchronous wake up list
 *  @file - File back pointer for gc
 *  @sk - internal networking protocol agnostic socket representation
 *  @wait - wait queue for several uses
 *  @type - socket type (%SOCK_STREAM, etc)
 *  @passcred - credentials (used only in Unix Sockets (aka PF_LOCAL))
 */
struct socket {
 socket_state  state;
 unsigned long  flags;
 struct proto_ops *ops;
 struct fasync_struct *fasync_list;
 struct file  *file;
 struct sock  *sk;
 wait_queue_head_t wait;
 short   type;
 unsigned char  passcred;
};

struct proto_ops {
 int  family;
 struct module *owner;
 int  (*release)   (struct socket *sock);
 int  (*bind)      (struct socket *sock,
          struct sockaddr *myaddr,
          int sockaddr_len);
 int  (*connect)   (struct socket *sock,
          struct sockaddr *vaddr,
          int sockaddr_len, int flags);
 int  (*socketpair)(struct socket *sock1,
          struct socket *sock2);
 int  (*accept)    (struct socket *sock,
          struct socket *newsock, int flags);
 int  (*getname)   (struct socket *sock,
          struct sockaddr *addr,
          int *sockaddr_len, int peer);
 unsigned int (*poll)      (struct file *file, struct socket *sock,
          struct poll_table_struct *wait);
 int  (*ioctl)     (struct socket *sock, unsigned int cmd,
          unsigned long arg);
 int  (*listen)    (struct socket *sock, int len);
 int  (*shutdown)  (struct socket *sock, int flags);
 int  (*setsockopt)(struct socket *sock, int level,
          int optname, char __user *optval, int optlen);
 int  (*getsockopt)(struct socket *sock, int level,
          int optname, char __user *optval, int __user *optlen);
 int  (*sendmsg)   (struct kiocb *iocb, struct socket *sock,
          struct msghdr *m, size_t total_len);
 int  (*recvmsg)   (struct kiocb *iocb, struct socket *sock,
          struct msghdr *m, size_t total_len,
          int flags);
 int  (*mmap)      (struct file *file, struct socket *sock,
          struct vm_area_struct * vma);
 ssize_t  (*sendpage)  (struct socket *sock, struct page *page,
          int offset, size_t size, int flags);
};

struct fasync_struct {
 int magic;
 int fa_fd;
 struct fasync_struct *fa_next; /* singly linked list */
 struct file   *fa_file;
};

struct file {
 struct file *next;
 struct file *parent;
 char *name;
 int lineno;
 int flags;
};

struct __wait_queue_head {
 spinlock_t lock;
 struct list_head task_list;
};
typedef struct __wait_queue_head wait_queue_head_t;

***************************************************************************

/* INET socket层: */ 

 ***************************************************************************

 /**
  * struct sock - network layer representation of sockets
  * @__sk_common - shared layout with tcp_tw_bucket
  * @sk_zapped - ax25 & ipx means !linked
  * @sk_shutdown - mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN
  * @sk_use_write_queue - wheter to call sk->sk_write_space in sock_wfree
  * @sk_userlocks - %SO_SNDBUF and %SO_RCVBUF settings
  * @sk_lock - synchronizer
  * @sk_rcvbuf - size of receive buffer in bytes
  * @sk_sleep - sock wait queue
  * @sk_dst_cache - destination cache
  * @sk_dst_lock - destination cache lock
  * @sk_policy - flow policy
  * @sk_rmem_alloc - receive queue bytes committed
  * @sk_receive_queue - incoming packets
  * @sk_wmem_alloc - transmit queue bytes committed
  * @sk_write_queue - Packet sending queue
  * @sk_omem_alloc - "o" is "option" or "other"
  * @sk_wmem_queued - persistent queue size
  * @sk_forward_alloc - space allocated forward
  * @sk_allocation - allocation mode
  * @sk_sndbuf - size of send buffer in bytes
  * @sk_flags - %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, %SO_OOBINLINE settings
  * @sk_no_check - %SO_NO_CHECK setting, wether or not checkup packets
  * @sk_debug - %SO_DEBUG setting
  * @sk_rcvtstamp - %SO_TIMESTAMP setting
  * @sk_no_largesend - whether to sent large segments or not
  * @sk_route_caps - route capabilities (e.g. %NETIF_F_TSO)
  * @sk_lingertime - %SO_LINGER l_linger setting
  * @sk_hashent - hash entry in several tables (e.g. tcp_ehash)
  * @sk_backlog - always used with the per-socket spinlock held
  * @sk_callback_lock - used with the callbacks in the end of this struct
  * @sk_error_queue - rarely used
  * @sk_prot - protocol handlers inside a network family
  * @sk_err - last error
  * @sk_err_soft - errors that don't cause failure but are the cause of a persistent failure not just 'timed out'
  * @sk_ack_backlog - current listen backlog
  * @sk_max_ack_backlog - listen backlog set in listen()
  * @sk_priority - %SO_PRIORITY setting
  * @sk_type - socket type (%SOCK_STREAM, etc)
  * @sk_localroute - route locally only, %SO_DONTROUTE setting
  * @sk_protocol - which protocol this socket belongs in this network family
  * @sk_peercred - %SO_PEERCRED setting
  * @sk_rcvlowat - %SO_RCVLOWAT setting
  * @sk_rcvtimeo - %SO_RCVTIMEO setting
  * @sk_sndtimeo - %SO_SNDTIMEO setting
  * @sk_filter - socket filtering instructions
  * @sk_protinfo - private area, net family specific, when not using slab
  * @sk_slab - the slabcache this instance was allocated from
  * @sk_timer - sock cleanup timer
  * @sk_stamp - time stamp of last packet received
  * @sk_socket - Identd and reporting IO signals
  * @sk_user_data - RPC layer private data
  * @sk_owner - module that owns this socket
  * @sk_sndmsg_page - cached page for sendmsg
  * @sk_sndmsg_off - cached offset for sendmsg
  * @sk_send_head - front of stuff to transmit
  * @sk_write_pending - a write to stream socket waits to start
  * @sk_queue_shrunk - write queue has been shrunk recently
  * @sk_state_change - callback to indicate change in the state of the sock
  * @sk_data_ready - callback to indicate there is data to be processed
  * @sk_write_space - callback to indicate there is bf sending space available
  * @sk_error_report - callback to indicate errors (e.g. %MSG_ERRQUEUE)
  * @sk_backlog_rcv - callback to process the backlog
  * @sk_destruct - called at sock freeing time, i.e. when all refcnt == 0
 */
struct sock {
 /*
  * Now struct tcp_tw_bucket also uses sock_common, so please just
  * don't add nothing before this first member (__sk_common) --acme
  */
 struct sock_common __sk_common;
#define sk_family  __sk_common.skc_family
#define sk_state  __sk_common.skc_state
#define sk_reuse  __sk_common.skc_reuse
#define sk_bound_dev_if  __sk_common.skc_bound_dev_if
#define sk_node   __sk_common.skc_node
#define sk_bind_node  __sk_common.skc_bind_node
#define sk_refcnt  __sk_common.skc_refcnt
 volatile unsigned char sk_zapped;
 unsigned char  sk_shutdown;
 unsigned char  sk_use_write_queue;
 unsigned char  sk_userlocks;
 socket_lock_t  sk_lock;
 int   sk_rcvbuf;
 wait_queue_head_t *sk_sleep;
 struct dst_entry *sk_dst_cache;
 rwlock_t  sk_dst_lock;
 struct xfrm_policy *sk_policy[2];
 atomic_t  sk_rmem_alloc;
 struct sk_buff_head sk_receive_queue;
 atomic_t  sk_wmem_alloc;
 struct sk_buff_head sk_write_queue;
 atomic_t  sk_omem_alloc;
 int   sk_wmem_queued;
 int   sk_forward_alloc;
 unsigned int  sk_allocation;
 int   sk_sndbuf;
 unsigned long   sk_flags;
 char    sk_no_check;
 unsigned char  sk_debug;
 unsigned char  sk_rcvtstamp;
 unsigned char  sk_no_largesend;
 int   sk_route_caps;
 unsigned long         sk_lingertime;
 int   sk_hashent;
 /*
  * The backlog queue is special, it is always used with
  * the per-socket spinlock held and requires low latency
  * access. Therefore we special case it's implementation.
  */
 struct {
  struct sk_buff *head;
  struct sk_buff *tail;
 } sk_backlog;
 rwlock_t  sk_callback_lock;
 struct sk_buff_head sk_error_queue;
 struct proto  *sk_prot;
 int   sk_err,
    sk_err_soft;
 unsigned short  sk_ack_backlog;
 unsigned short  sk_max_ack_backlog;
 __u32   sk_priority;
 unsigned short  sk_type;
 unsigned char  sk_localroute;
 unsigned char  sk_protocol;
 struct ucred  sk_peercred;
 int   sk_rcvlowat;
 long   sk_rcvtimeo;
 long   sk_sndtimeo;
 struct sk_filter       *sk_filter;
 void   *sk_protinfo;
 kmem_cache_t  *sk_slab;
 struct timer_list sk_timer;
 struct timeval  sk_stamp;
 struct socket  *sk_socket;
 void   *sk_user_data;
 struct module  *sk_owner;
 struct page  *sk_sndmsg_page;
 __u32   sk_sndmsg_off;
 struct sk_buff  *sk_send_head;
 int   sk_write_pending;
 void   *sk_security;
 __u8   sk_queue_shrunk;
 /* three bytes hole, try to pack */
 void   (*sk_state_change)(struct sock *sk);
 void   (*sk_data_ready)(struct sock *sk, int bytes);
 void   (*sk_write_space)(struct sock *sk);
 void   (*sk_error_report)(struct sock *sk);
   int   (*sk_backlog_rcv)(struct sock *sk,
        struct sk_buff *skb); 
 void                    (*sk_destruct)(struct sock *sk);
};

/**
  * struct sock_common - minimal network layer representation of sockets
  * @skc_family - network address family
  * @skc_state - Connection state
  * @skc_reuse - %SO_REUSEADDR setting
  * @skc_bound_dev_if - bound device index if != 0
  * @skc_node - main hash linkage for various protocol lookup tables
  * @skc_bind_node - bind hash linkage for various protocol lookup tables
  * @skc_refcnt - reference count
  *
  * This is the minimal network layer representation of sockets, the header
  * for struct sock and struct tcp_tw_bucket.
  */
struct sock_common {
 unsigned short  skc_family;
 volatile unsigned char skc_state;
 unsigned char  skc_reuse;
 int   skc_bound_dev_if;
 struct hlist_node skc_node;
 struct hlist_node skc_bind_node;
 atomic_t  skc_refcnt;
};

/* This is the per-socket lock.  The spinlock provides a synchronization
 * between user contexts and software interrupt processing, whereas the
 * mini-semaphore synchronizes multiple users amongst themselves.
 */
struct sock_iocb;
typedef struct {
 spinlock_t  slock;
 struct sock_iocb *owner;
 wait_queue_head_t wq;
} socket_lock_t;

/* Each dst_entry has reference count and sits in some parent list(s).
 * When it is removed from parent list, it is "freed" (dst_free).
 * After this it enters dead state (dst->obsolete > 0) and if its refcnt
 * is zero, it can be destroyed immediately, otherwise it is added
 * to gc list and garbage collector periodically checks the refcnt.
 */

struct sk_buff;

struct dst_entry
{
 struct dst_entry        *next;
 atomic_t  __refcnt; /* client references */
 int   __use;
 struct dst_entry *child;
 struct net_device       *dev;
 int   obsolete;
 int   flags;
#define DST_HOST  1
#define DST_NOXFRM  2
#define DST_NOPOLICY  4
#define DST_NOHASH  8
 unsigned long  lastuse;
 unsigned long  expires;

 unsigned short  header_len; /* more space at head required */
 unsigned short  trailer_len; /* space to reserve at tail */

 u32   metrics[RTAX_MAX];
 struct dst_entry *path;

 unsigned long  rate_last; /* rate limiting for ICMP */
 unsigned long  rate_tokens;

 int   error;

 struct neighbour *neighbour;
 struct hh_cache  *hh;
 struct xfrm_state *xfrm;

 int   (*input)(struct sk_buff*);
 int   (*output)(struct sk_buff*);

#ifdef CONFIG_NET_CLS_ROUTE
 __u32   tclassid;
#endif

 struct  dst_ops         *ops;
 struct rcu_head  rcu_head;
 
 char   info[0];
};

struct sk_buff_head {
 /* These two members must be first. */
 struct sk_buff *next;
 struct sk_buff *prev;

 __u32  qlen;
 spinlock_t lock;
};

 **********************************************************************

/* TCP/UDP层:处理传输层的操作,传输层用struct inet_protocol和struct proto两个结构表示。文件主要

 */

************************************************************************/

  struct {
  int (*open)(struct net_device *dev);
  void (*close)(struct net_device *dev);

  /* if open & DCD */
  void (*start)(struct net_device *dev);
  /* if open & !DCD */
  void (*stop)(struct net_device *dev);

  void (*detach)(struct hdlc_device_struct *hdlc);
  int (*netif_rx)(struct sk_buff *skb);
  unsigned short (*type_trans)(struct sk_buff *skb,
          struct net_device *dev);
  int id;  /* IF_PROTO_HDLC/CISCO/FR/etc. */
 }proto;

/*****************************************************************************

/*IP层:处理网络层的操作*/

*****************************************************************************/


struct packet_type {
 unsigned short  type; /* This is really htons(ether_type). */
 struct net_device  *dev; /* NULL is wildcarded here  */
 int   (*func) (struct sk_buff *, struct net_device *,
      struct packet_type *);
 void   *af_packet_priv;
 struct list_head list;
};


/*
 * The DEVICE structure.
 * Actually, this whole structure is a big mistake.  It mixes I/O
 * data with strictly "high-level" data, and it has to know about
 * almost every data structure used in the INET module.
 *
 * FIXME: cleanup struct net_device such that network protocol info
 * moves out.
 */

struct net_device
{

 /*
  * This is the first field of the "visible" part of this structure
  * (i.e. as seen by users in the "Space.c" file).  It is the name
  * the interface.
  */
 char   name[IFNAMSIZ];

 /*
  * I/O specific fields
  * FIXME: Merge these and struct ifmap into one
  */
 unsigned long  mem_end; /* shared mem end */
 unsigned long  mem_start; /* shared mem start */
 unsigned long  base_addr; /* device I/O address */
 unsigned int  irq;  /* device IRQ number */

 /*
  * Some hardware also needs these fields, but they are not
  * part of the usual set specified in Space.c.
  */

 unsigned char  if_port; /* Selectable AUI, TP,..*/
 unsigned char  dma;  /* DMA channel  */

 unsigned long  state;

 struct net_device *next;
 
 /* The device initialization function. Called only once. */
 int   (*init)(struct net_device *dev);

 /* ------- Fields preinitialized in Space.c finish here ------- */

 struct net_device *next_sched;

 /* Interface index. Unique device identifier */
 int   ifindex;
 int   iflink;


 struct net_device_stats* (*get_stats)(struct net_device *dev);
 struct iw_statistics* (*get_wireless_stats)(struct net_device *dev);

 /* List of functions to handle Wireless Extensions (instead of ioctl).
  * See <net/iw_handler.h> for details. Jean II */
 const struct iw_handler_def * wireless_handlers;
 /* Instance data managed by the core of Wireless Extensions. */
 struct iw_public_data * wireless_data;

 struct ethtool_ops *ethtool_ops;

 /*
  * This marks the end of the "visible" part of the structure. All
  * fields hereafter are internal to the system, and may change at
  * will (read: may be cleaned up at will).
  */

 /* These may be needed for future network-power-down code. */
 unsigned long  trans_start; /* Time (in jiffies) of last Tx */
 unsigned long  last_rx; /* Time of last Rx */

 unsigned short  flags; /* interface flags (a la BSD) */
 unsigned short  gflags;
        unsigned short          priv_flags; /* Like 'flags' but invisible to userspace. */
        unsigned short          unused_alignment_fixer; /* Because we need priv_flags,
                                                         * and we want to be 32-bit aligned.
                                                         */

 unsigned  mtu; /* interface MTU value  */
 unsigned short  type; /* interface hardware type */
 unsigned short  hard_header_len; /* hardware hdr length */
 void   *priv; /* pointer to private data */

 struct net_device *master; /* Pointer to master device of a group,
       * which this device is member of.
       */

 /* Interface address info. */
 unsigned char  broadcast[MAX_ADDR_LEN]; /* hw bcast add */
 unsigned char  dev_addr[MAX_ADDR_LEN]; /* hw address */
 unsigned char  addr_len; /* hardware address length */

 struct dev_mc_list *mc_list; /* Multicast mac addresses */
 int   mc_count; /* Number of installed mcasts */
 int   promiscuity;
 int   allmulti;

 int   watchdog_timeo;
 struct timer_list watchdog_timer;

 /* Protocol specific pointers */
 
 void    *atalk_ptr; /* AppleTalk link  */
 void   *ip_ptr; /* IPv4 specific data */ 
 void                    *dn_ptr;        /* DECnet specific data */
 void                    *ip6_ptr;       /* IPv6 specific data */
 void   *ec_ptr; /* Econet specific data */
 void   *ax25_ptr; /* AX.25 specific data */

 struct list_head poll_list; /* Link to poll list */
 int   quota;
 int   weight;

 struct Qdisc  *qdisc;
 struct Qdisc  *qdisc_sleeping;
 struct Qdisc  *qdisc_ingress;
 struct list_head qdisc_list;
 unsigned long  tx_queue_len; /* Max frames per queue allowed */

 /* ingress path synchronizer */
 spinlock_t  ingress_lock;
 /* hard_start_xmit synchronizer */
 spinlock_t  xmit_lock;
 /* cpu id of processor entered to hard_start_xmit or -1,
    if nobody entered there.
  */
 int   xmit_lock_owner;
 /* device queue lock */
 spinlock_t  queue_lock;
 /* Number of references to this device */
 atomic_t  refcnt;
 /* delayed register/unregister */
 struct list_head todo_list;
 /* device name hash chain */
 struct hlist_node name_hlist;
 /* device index hash chain */
 struct hlist_node index_hlist;

 /* register/unregister state machine */
 enum { NETREG_UNINITIALIZED=0,
        NETREG_REGISTERING, /* called register_netdevice */
        NETREG_REGISTERED, /* completed register todo */
        NETREG_UNREGISTERING, /* called unregister_netdevice */
        NETREG_UNREGISTERED, /* completed unregister todo */
        NETREG_RELEASED,  /* called free_netdev */
 } reg_state;

 /* Net device features */
 int   features;
#define NETIF_F_SG  1 /* Scatter/gather IO. */
#define NETIF_F_IP_CSUM  2 /* Can checksum only TCP/UDP over IPv4. */
#define NETIF_F_NO_CSUM  4 /* Does not require checksum. F.e. loopack. */
#define NETIF_F_HW_CSUM  8 /* Can checksum all the packets. */
#define NETIF_F_HIGHDMA  32 /* Can DMA to high memory. */
#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */
#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */
#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */
#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */
#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
#define NETIF_F_TSO  2048 /* Can offload TCP/IP segmentation */
#define NETIF_F_LLTX  4096 /* LockLess TX */

 /* Called after device is detached from network. */
 void   (*uninit)(struct net_device *dev);
 /* Called after last user reference disappears. */
 void   (*destructor)(struct net_device *dev);

 /* Pointers to interface service routines. */
 int   (*open)(struct net_device *dev);
 int   (*stop)(struct net_device *dev);
 int   (*hard_start_xmit) (struct sk_buff *skb,
          struct net_device *dev);
#define HAVE_NETDEV_POLL
 int   (*poll) (struct net_device *dev, int *quota);
 int   (*hard_header) (struct sk_buff *skb,
      struct net_device *dev,
      unsigned short type,
      void *daddr,
      void *saddr,
      unsigned len);
 int   (*rebuild_header)(struct sk_buff *skb);
#define HAVE_MULTICAST   
 void   (*set_multicast_list)(struct net_device *dev);
#define HAVE_SET_MAC_ADDR    
 int   (*set_mac_address)(struct net_device *dev,
         void *addr);
#define HAVE_PRIVATE_IOCTL
 int   (*do_ioctl)(struct net_device *dev,
         struct ifreq *ifr, int cmd);
#define HAVE_SET_CONFIG
 int   (*set_config)(struct net_device *dev,
           struct ifmap *map);
#define HAVE_HEADER_CACHE
 int   (*hard_header_cache)(struct neighbour *neigh,
           struct hh_cache *hh);
 void   (*header_cache_update)(struct hh_cache *hh,
             struct net_device *dev,
             unsigned char *  haddr);
#define HAVE_CHANGE_MTU
 int   (*change_mtu)(struct net_device *dev, int new_mtu);

#define HAVE_TX_TIMEOUT
 void   (*tx_timeout) (struct net_device *dev);

 void   (*vlan_rx_register)(struct net_device *dev,
          struct vlan_group *grp);
 void   (*vlan_rx_add_vid)(struct net_device *dev,
         unsigned short vid);
 void   (*vlan_rx_kill_vid)(struct net_device *dev,
          unsigned short vid);

 int   (*hard_header_parse)(struct sk_buff *skb,
           unsigned char *haddr);
 int   (*neigh_setup)(struct net_device *dev, struct neigh_parms *);
 int   (*accept_fastpath)(struct net_device *, struct dst_entry*);
#ifdef CONFIG_NETPOLL
 int   netpoll_rx;
#endif
#ifdef CONFIG_NET_POLL_CONTROLLER
 void                    (*poll_controller)(struct net_device *dev);
#endif

 /* bridge stuff */
 struct net_bridge_port *br_port;

#ifdef CONFIG_NET_DIVERT
 /* this will get initialized at each interface type init routine */
 struct divert_blk *divert;
#endif /* CONFIG_NET_DIVERT */

 /* class/net/name entry */
 struct class_device class_dev;
 /* how much padding had been added by alloc_netdev() */
 int padded;
};

 

 

你可能感兴趣的:(linux,struct,socket,list,callback,DST)