IPVS相关数据结构

安利一个网站,快速的查内核源码中某个结构在哪定义,在哪被引用:
https://elixir.bootlin.com/linux/v4.9.1/source/include/net/ip_vs.h#L1212
感觉比source Insight的查找功能要方便。

1 ——net

Linux提供的一种内核级别环境隔离,最重要的就是namespace的概念。
有6种namespace对不同的资源进行隔离,包括:
Mount namespaces
UTS namespaces
IPC namespaces
PID namespaces
Network namespaces
User namespaces

每个network namespace都有一个net结构。
net定义在include/net/net_namespace.h中

struct net {
	atomic_t		passive;	/* To decided when the network
						 * namespace should be freed.
						 */
	atomic_t		count;		/* To decided when the network
						 *  namespace should be shut down.
						 */
	spinlock_t		rules_mod_lock;

	atomic64_t		cookie_gen;

	struct list_head	list;		/* list of network namespaces */
	struct list_head	cleanup_list;	/* namespaces on death row */
	struct list_head	exit_list;	/* Use only net_mutex */

	struct user_namespace   *user_ns;	/* Owning user namespace */
	struct ucounts		*ucounts;
	spinlock_t		nsid_lock;
	struct idr		netns_ids;

	struct ns_common	ns;

	struct proc_dir_entry 	*proc_net;
	struct proc_dir_entry 	*proc_net_stat;

#ifdef CONFIG_SYSCTL
	struct ctl_table_set	sysctls;
#endif

	struct sock 		*rtnl;			/* rtnetlink socket */
	struct sock		*genl_sock;

	struct list_head 	dev_base_head;
	struct hlist_head 	*dev_name_head;
	struct hlist_head	*dev_index_head;
	unsigned int		dev_base_seq;	/* protected by rtnl_mutex */
	int			ifindex;
	unsigned int		dev_unreg_count;

	/* core fib_rules */
	struct list_head	rules_ops;


	struct net_device       *loopback_dev;          /* The loopback */
	struct netns_core	core;
	struct netns_mib	mib;
	struct netns_packet	packet;
	struct netns_unix	unx;
	struct netns_ipv4	ipv4;
#if IS_ENABLED(CONFIG_IPV6)
	struct netns_ipv6	ipv6;
#endif
#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
	struct netns_ieee802154_lowpan	ieee802154_lowpan;
#endif
#if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE)
	struct netns_sctp	sctp;
#endif
#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
	struct netns_dccp	dccp;
#endif
#ifdef CONFIG_NETFILTER
	struct netns_nf		nf;
	struct netns_xt		xt;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
	struct netns_ct		ct;
#endif
#if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE)
	struct netns_nftables	nft;
#endif
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
	struct netns_nf_frag	nf_frag;
#endif
	struct sock		*nfnl;
	struct sock		*nfnl_stash;
#if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT)
	struct list_head        nfnl_acct_list;
#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
	struct list_head	nfct_timeout_list;
#endif
#endif
#ifdef CONFIG_WEXT_CORE
	struct sk_buff_head	wext_nlevents;
#endif
	struct net_generic __rcu	*gen;

	/* Note : following structs are cache line aligned */
#ifdef CONFIG_XFRM
	struct netns_xfrm	xfrm;
#endif
#if IS_ENABLED(CONFIG_IP_VS)
	struct netns_ipvs	*ipvs;
#endif
#if IS_ENABLED(CONFIG_MPLS)
	struct netns_mpls	mpls;
#endif
	struct sock		*diag_nlsk;
	atomic_t		fnhe_genid;
};

2 ——netns_ipvs

net结构中关于ipvs的定义如下:

#if IS_ENABLED(CONFIG_IP_VS)
	struct netns_ipvs	*ipvs;
#endif

也就是说,每个net_namespace中,都有一个ipvs的结构。
netns_ipvs定义于include/net/ip_vs.h中

/* IPVS in network namespace */
struct netns_ipvs {
	int			gen;		/* Generation */
	int			enable;		/* enable like nf_hooks do */
	/* Hash table: for real service lookups */
	#define IP_VS_RTAB_BITS 4
	#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
	#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)

	struct hlist_head	rs_table[IP_VS_RTAB_SIZE];
	/* ip_vs_app */
	struct list_head	app_list;
	/* ip_vs_proto */
	#define IP_VS_PROTO_TAB_SIZE	32	/* must be power of 2 */
	struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
	/* ip_vs_proto_tcp */
#ifdef CONFIG_IP_VS_PROTO_TCP
	#define	TCP_APP_TAB_BITS	4
	#define	TCP_APP_TAB_SIZE	(1 << TCP_APP_TAB_BITS)
	#define	TCP_APP_TAB_MASK	(TCP_APP_TAB_SIZE - 1)
	struct list_head	tcp_apps[TCP_APP_TAB_SIZE];
#endif
	/* ip_vs_proto_udp */
#ifdef CONFIG_IP_VS_PROTO_UDP
	#define	UDP_APP_TAB_BITS	4
	#define	UDP_APP_TAB_SIZE	(1 << UDP_APP_TAB_BITS)
	#define	UDP_APP_TAB_MASK	(UDP_APP_TAB_SIZE - 1)
	struct list_head	udp_apps[UDP_APP_TAB_SIZE];
#endif
	/* ip_vs_proto_sctp */
#ifdef CONFIG_IP_VS_PROTO_SCTP
	#define SCTP_APP_TAB_BITS	4
	#define SCTP_APP_TAB_SIZE	(1 << SCTP_APP_TAB_BITS)
	#define SCTP_APP_TAB_MASK	(SCTP_APP_TAB_SIZE - 1)
	/* Hash table for SCTP application incarnations	 */
	struct list_head	sctp_apps[SCTP_APP_TAB_SIZE];
#endif
	/* ip_vs_conn */
	atomic_t		conn_count;      /* connection counter */

	/* ip_vs_ctl */
	struct ip_vs_stats		tot_stats;  /* Statistics & est. */

	int			num_services;    /* no of virtual services */

	/* Trash for destinations */
	struct list_head	dest_trash;
	spinlock_t		dest_trash_lock;
	struct timer_list	dest_trash_timer; /* expiration timer */
	/* Service counters */
	atomic_t		ftpsvc_counter;
	atomic_t		nullsvc_counter;
	atomic_t		conn_out_counter;

#ifdef CONFIG_SYSCTL
	/* 1/rate drop and drop-entry variables */
	struct delayed_work	defense_work;   /* Work handler */
	int			drop_rate;
	int			drop_counter;
	atomic_t		dropentry;
	/* locks in ctl.c */
	spinlock_t		dropentry_lock;  /* drop entry handling */
	spinlock_t		droppacket_lock; /* drop packet handling */
	spinlock_t		securetcp_lock;  /* state and timeout tables */

	/* sys-ctl struct */
	struct ctl_table_header	*sysctl_hdr;
	struct ctl_table	*sysctl_tbl;
#endif

	/* sysctl variables */
	int			sysctl_amemthresh;
	int			sysctl_am_droprate;
	int			sysctl_drop_entry;
	int			sysctl_drop_packet;
	int			sysctl_secure_tcp;
#ifdef CONFIG_IP_VS_NFCT
	int			sysctl_conntrack;
#endif
	int			sysctl_snat_reroute;
	int			sysctl_sync_ver;
	int			sysctl_sync_ports;
	int			sysctl_sync_persist_mode;
	unsigned long		sysctl_sync_qlen_max;
	int			sysctl_sync_sock_size;
	int			sysctl_cache_bypass;
	int			sysctl_expire_nodest_conn;
	int			sysctl_sloppy_tcp;
	int			sysctl_sloppy_sctp;
	int			sysctl_expire_quiescent_template;
	int			sysctl_sync_threshold[2];
	unsigned int		sysctl_sync_refresh_period;
	int			sysctl_sync_retries;
	int			sysctl_nat_icmp_send;
	int			sysctl_pmtu_disc;
	int			sysctl_backup_only;
	int			sysctl_conn_reuse_mode;
	int			sysctl_schedule_icmp;
	int			sysctl_ignore_tunneled;

	/* ip_vs_lblc */
	int			sysctl_lblc_expiration;
	struct ctl_table_header	*lblc_ctl_header;
	struct ctl_table	*lblc_ctl_table;
	/* ip_vs_lblcr */
	int			sysctl_lblcr_expiration;
	struct ctl_table_header	*lblcr_ctl_header;
	struct ctl_table	*lblcr_ctl_table;
	/* ip_vs_est */
	struct list_head	est_list;	/* estimator list */
	spinlock_t		est_lock;
	struct timer_list	est_timer;	/* Estimation timer */
	/* ip_vs_sync */
	spinlock_t		sync_lock;
	struct ipvs_master_sync_state *ms;
	spinlock_t		sync_buff_lock;
	struct task_struct	**backup_threads;
	int			threads_mask;
	volatile int		sync_state;
	struct mutex		sync_mutex;
	struct ipvs_sync_daemon_cfg	mcfg;	/* Master Configuration */
	struct ipvs_sync_daemon_cfg	bcfg;	/* Backup Configuration */
	/* net name space ptr */
	struct net		*net;            /* Needed by timer routines */
	/* Number of heterogeneous destinations, needed becaus heterogeneous
	 * are not supported when synchronization is enabled.
	 */
	unsigned int		mixed_address_family_dests;
};

3—— ip_vs_proto_data

ipvs支持的协议包括:TCP,UDP,SCTP,AH,ESP。
存储协议信息的结构为:ip_vs_proto_data。
定义在:include/net/ip_vs.h

/* protocol data per netns */
struct ip_vs_proto_data {
	struct ip_vs_proto_data	*next;
	struct ip_vs_protocol	*pp;
	int			*timeout_table;	/* protocol timeout table */
	atomic_t		appcnt;		/* counter of proto app incs. */
	struct tcp_states_t	*tcp_state_table;
};

ip_vs_protocol也定义在include/net/ip_vs.h,:

struct ip_vs_protocol {
	struct ip_vs_protocol	*next;
	char			*name;
	u16			protocol;
	u16			num_states;
	int			dont_defrag;

	void (*init)(struct ip_vs_protocol *pp);

	void (*exit)(struct ip_vs_protocol *pp);

	int (*init_netns)(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd);

	void (*exit_netns)(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd);

	int (*conn_schedule)(struct netns_ipvs *ipvs,
			     int af, struct sk_buff *skb,
			     struct ip_vs_proto_data *pd,
			     int *verdict, struct ip_vs_conn **cpp,
			     struct ip_vs_iphdr *iph);

	struct ip_vs_conn *
	(*conn_in_get)(struct netns_ipvs *ipvs,
		       int af,
		       const struct sk_buff *skb,
		       const struct ip_vs_iphdr *iph);

	struct ip_vs_conn *
	(*conn_out_get)(struct netns_ipvs *ipvs,
			int af,
			const struct sk_buff *skb,
			const struct ip_vs_iphdr *iph);

	int (*snat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp,
			    struct ip_vs_conn *cp, struct ip_vs_iphdr *iph);

	int (*dnat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp,
			    struct ip_vs_conn *cp, struct ip_vs_iphdr *iph);

	int (*csum_check)(int af, struct sk_buff *skb,
			  struct ip_vs_protocol *pp);

	const char *(*state_name)(int state);

	void (*state_transition)(struct ip_vs_conn *cp, int direction,
				 const struct sk_buff *skb,
				 struct ip_vs_proto_data *pd);

	int (*register_app)(struct netns_ipvs *ipvs, struct ip_vs_app *inc);

	void (*unregister_app)(struct netns_ipvs *ipvs, struct ip_vs_app *inc);

	int (*app_conn_bind)(struct ip_vs_conn *cp);

	void (*debug_packet)(int af, struct ip_vs_protocol *pp,
			     const struct sk_buff *skb,
			     int offset,
			     const char *msg);

	void (*timeout_change)(struct ip_vs_proto_data *pd, int flags);
};

可以看出,只是声明了一系列的函数,却并没有函数的实现。
函数的实现在具体的协议对应的文件中。比如:net/netfilter/ipvs/ip_vs_proto_tcp.c

struct ip_vs_protocol ip_vs_protocol_tcp = {
	.name =			"TCP",
	.protocol =		IPPROTO_TCP,
	.num_states =		IP_VS_TCP_S_LAST,
	.dont_defrag =		0,
	.init =			NULL,
	.exit =			NULL,
	.init_netns =		__ip_vs_tcp_init,
	.exit_netns =		__ip_vs_tcp_exit,
	.register_app =		tcp_register_app,
	.unregister_app =	tcp_unregister_app,
	.conn_schedule =	tcp_conn_schedule,
	.conn_in_get =		ip_vs_conn_in_get_proto,
	.conn_out_get =		ip_vs_conn_out_get_proto,
	.snat_handler =		tcp_snat_handler,
	.dnat_handler =		tcp_dnat_handler,
	.csum_check =		tcp_csum_check,
	.state_name =		tcp_state_name,
	.state_transition =	tcp_state_transition,
	.app_conn_bind =	tcp_app_conn_bind,
	.debug_packet =		ip_vs_tcpudp_debug_packet,
	.timeout_change =	tcp_timeout_change,
};

4—— ip_vs_conn

同样定义在include/net/ip_vs.h中

/* IP_VS structure allocated for each dynamically scheduled connection */
struct ip_vs_conn {
	struct hlist_node	c_list;         /* hashed list heads */
	/* Protocol, addresses and port numbers */
	__be16                  cport;
	__be16                  dport;
	__be16                  vport;
	u16			af;		/* address family */
	union nf_inet_addr      caddr;          /* client address */
	union nf_inet_addr      vaddr;          /* virtual address */
	union nf_inet_addr      daddr;          /* destination address */
	volatile __u32          flags;          /* status flags */
	__u16                   protocol;       /* Which protocol (TCP/UDP) */
	__u16			daf;		/* Address family of the dest */
	struct netns_ipvs	*ipvs;

	/* counter and timer */
	atomic_t		refcnt;		/* reference count */
	struct timer_list	timer;		/* Expiration timer */
	volatile unsigned long	timeout;	/* timeout */

	/* Flags and state transition */
	spinlock_t              lock;           /* lock for state transition */
	volatile __u16          state;          /* state info */
	volatile __u16          old_state;      /* old state, to be used for
						 * state transition triggerd
						 * synchronization
						 */
	__u32			fwmark;		/* Fire wall mark from skb */
	unsigned long		sync_endtime;	/* jiffies + sent_retries */

	/* Control members */
	struct ip_vs_conn       *control;       /* Master control connection */
	atomic_t                n_control;      /* Number of controlled ones */
	struct ip_vs_dest       *dest;          /* real server */
	atomic_t                in_pkts;        /* incoming packet counter */

	/* Packet transmitter for different forwarding methods.  If it
	 * mangles the packet, it must return NF_DROP or better NF_STOLEN,
	 * otherwise this must be changed to a sk_buff **.
	 * NF_ACCEPT can be returned when destination is local.
	 */
	int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
			   struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);

	/* Note: we can group the following members into a structure,
	 * in order to save more space, and the following members are
	 * only used in VS/NAT anyway
	 */
	struct ip_vs_app        *app;           /* bound ip_vs_app object */
	void                    *app_data;      /* Application private data */
	struct ip_vs_seq        in_seq;         /* incoming seq. struct */
	struct ip_vs_seq        out_seq;        /* outgoing seq. struct */

	const struct ip_vs_pe	*pe;
	char			*pe_data;
	__u8			pe_data_len;

	struct rcu_head		rcu_head;
};

其中packet_xmit函数的作用是发送数据。DR/NAT/TUN分别有不同的函数。

5—— conn_schedule

当一个连接的第一个报文到达时,会进入此逻辑,调度到某个realserver上,并创立连接。

你可能感兴趣的:(LVS)