netfilter连接跟踪主要数据结构

linux协议栈中netfiler框架对数据包做过滤处理基本上都是基于连接跟踪来做的,比如snat、dnat等,连接跟踪就是记录数据流在协议栈中的两个方向,不同的协议有不同的特征,tcp/udp就是五元组(源ip、目的Ip、源端口、目的端口、协议号),icmp协议就是ip、id、type、code。

1、struct nf_conn

struct nf_conn结构体是连接跟踪的抽象结构,其中最重要的元素是struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]记录了条连接的特征信息,根据这个结构中的信息可以确定唯一的一条连接,还有是连接的状态unsigned long status,各元素的详解如下。

struct nf_conn {
	/* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
           plus 1 for any connection(s) we are `master' for */
       /*连接跟踪引用计数*/
	struct nf_conntrack ct_general;

	spinlock_t lock;

	/* XXX should I move this to the tail ? - Y.K */
	/* These are my tuples; original and reply */
	/*记录连接跟踪的信息,比如tcp/udp协议就是五元组来确定一个
	条连接,icmp协议通过ip、type、id、type来确定一条连接有两个方向*/
	struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];

	/* Have we seen traffic both ways yet? (bitset) */
	/*连接的状态*/
	unsigned long status;

	/* If we were expected by an expectation, this will be it */
	/*如果是一条子连接,就指向它的主连接*/
	struct nf_conn *master;

	/* Timer function; drops refcnt when it goes off. */
	/*超时*/
	struct timer_list timeout;

#if defined(CONFIG_NF_CONNTRACK_MARK)
	/*用来标记一条连接*/
	u_int32_t mark;
#endif

#ifdef CONFIG_NF_CONNTRACK_SECMARK
	u_int32_t secmark;
#endif

	/* Storage reserved for other modules: */
	/*保存不同协议的私有数据*/
	union nf_conntrack_proto proto;

	/* Extensions */
	/*扩展结构*/
	struct nf_ct_ext *ext;
#ifdef CONFIG_NET_NS
	struct net *ct_net;
#endif
};

2、数据包连接状态

数据包的连接状态定义在枚举变量ip_conntrack_info中

enum ip_conntrack_info {
	/* Part of an established connection (either direction). */
	/*表示两个方向都有了数据包*/
	IP_CT_ESTABLISHED,

	/* Like NEW, but related to an existing connection, or ICMP error
	   (in either direction). */
	   /*数据包对应的连接还没有reply方向的数据包
	关联了一个已有连接,是他的一个子连接*/
	IP_CT_RELATED,

	/* Started a new connection to track (only
           IP_CT_DIR_ORIGINAL); may be a retransmission. */
        /*reply方向还没有数据包*/
	IP_CT_NEW,

	/* >= this indicates reply direction */
    /*数据包是reply方向*/
	IP_CT_IS_REPLY,

	/* Number of distinct IP_CT types (no NEW in reply dirn). */
	IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1
};

3、连接跟踪状态

enum ip_conntrack_status {
	/* It's an expected connection: bit 0 set.  This bit never changed */
	/*表示是个子连接*/
	IPS_EXPECTED_BIT = 0,
	IPS_EXPECTED = (1 << IPS_EXPECTED_BIT),

	/* We've seen packets both ways: bit 1 set.  Can be set, not unset. */
	/*连接双向上都有数据*/
	IPS_SEEN_REPLY_BIT = 1,
	IPS_SEEN_REPLY = (1 << IPS_SEEN_REPLY_BIT),

	/* Conntrack should never be early-expired. */
	/*tcp三次握手后设定这个标志,udp
	双向都有数据后设定这个标志*/
	IPS_ASSURED_BIT = 2,
	IPS_ASSURED = (1 << IPS_ASSURED_BIT),

	/* Connection is confirmed: originating packet has left box */
	/*连接已经添加到net->ct.hash表中*/
	IPS_CONFIRMED_BIT = 3,
	IPS_CONFIRMED = (1 << IPS_CONFIRMED_BIT),

	/* Connection needs src nat in orig dir.  This bit never changed. */
	/*postrouting处当reply tuple完成设置该标志*/
	IPS_SRC_NAT_BIT = 4,
	IPS_SRC_NAT = (1 << IPS_SRC_NAT_BIT),

	/* Connection needs dst nat in orig dir.  This bit never changed. */
	/*prerouting处reply tuple完成设置该标志*/
	IPS_DST_NAT_BIT = 5,
	IPS_DST_NAT = (1 << IPS_DST_NAT_BIT),

	/* Both together. */
	IPS_NAT_MASK = (IPS_DST_NAT | IPS_SRC_NAT),

	/* Connection needs TCP sequence adjusted. */
	IPS_SEQ_ADJUST_BIT = 6,
	IPS_SEQ_ADJUST = (1 << IPS_SEQ_ADJUST_BIT),

	/* NAT initialization bits. */
	/*postrouting处已经做完了snat并添加到bysource链中*/
	IPS_SRC_NAT_DONE_BIT = 7,
	IPS_SRC_NAT_DONE = (1 << IPS_SRC_NAT_DONE_BIT),

	/*prerouting处已经做完了dnat并添加到bysource链中*/
	IPS_DST_NAT_DONE_BIT = 8,
	IPS_DST_NAT_DONE = (1 << IPS_DST_NAT_DONE_BIT),

	/* Both together */
	IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE),

	/* Connection is dying (removed from lists), can not be unset. */
	/*连接被释放*/
	IPS_DYING_BIT = 9,
	IPS_DYING = (1 << IPS_DYING_BIT),

	/* Connection has fixed timeout. */
	/*固定连接超时*/
	IPS_FIXED_TIMEOUT_BIT = 10,
	IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT),

	/* Conntrack is a template */
	/*ct的target设置只能设置在raw表中*/
	IPS_TEMPLATE_BIT = 11,
	IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT),
};

4、struct nf_conntrack_tuple_hash

hnnode是一个链表,主要是struct nf_conntrack_tuple

/* Connections have two entries in the hash table: one for each way */
struct nf_conntrack_tuple_hash {
	struct hlist_nulls_node hnnode;
	/*不同协议的特征*/
	struct nf_conntrack_tuple tuple;
};

struct nf_conntrack_tuple这个结构保存了连接不同协议的特征信息,u是一个联合体,不同协议有不同特征,tcp/udp协议的五元组,icmp协议的type、code、id

/* This contains the information to distinguish a connection. */
/*一条连接的信息tcp/udp就是五元组*/
struct nf_conntrack_tuple {
    /*保存三层、四层一些信息*/
	struct nf_conntrack_man src;

	/* These are the parts of the tuple which are fixed. */
	struct {
		union nf_inet_addr u3;
        /*联合体*/
		union {
			/* Add other protocols here. */
			__be16 all;

			struct {
				__be16 port;
			} tcp;
			struct {
				__be16 port;
			} udp;
            /*icmp协议的type code*/
			struct {
				u_int8_t type, code;
			} icmp;
			struct {
				__be16 port;
			} dccp;
			struct {
				__be16 port;
			} sctp;
			struct {
				__be16 key;
			} gre;
		} u;

		/* The protocol. */
		/*协议号*/
		u_int8_t protonum;

		/* The direction (for tuplehash) */
		/*连接方向是origin还是reply*/
		u_int8_t dir;
	} dst;
};

struct nf_conntrack_man主要保存三层ip和四层端口

/* The manipulable part of the tuple. */
struct nf_conntrack_man {
	/*三层ip*/	
	union nf_inet_addr u3;
	/*四层端口号*/
	union nf_conntrack_man_proto u;
	/* Layer 3 protocol三层协议号 */
	u_int16_t l3num;
};

5、 struct nf_conntrack_l3proto

struct nf_conntrack_l2proto主要定义了一些三层Ip协议的操作函数,获取tuple结构三层协议相关的特征(源ip、目的ip),nfnetlink格式和tuple格式的源ip、目的ip转换。

struct nf_conntrack_l3proto {
	/* L3 Protocol Family number. ex) PF_INET */
	/*三层协议号*/
	u_int16_t l3proto;

	/* Protocol name */
	/*协议名称*/
	const char *name;

	/*
	 * Try to fill in the third arg: nhoff is offset of l3 proto
         * hdr.  Return true if possible.
	 */
	 /*从数据包中计算处tuple结构体中的源ip、目的ip*/
	bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int nhoff,
			     struct nf_conntrack_tuple *tuple);

	/*
	 * Invert the per-proto part of the tuple: ie. turn xmit into reply.
	 * Some packets can't be inverted: return 0 in that case.
	 */
	 /*根据orig方向的源ip、目的ip设置reply方向的源ip、目的ip,反过来*/
	bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
			     const struct nf_conntrack_tuple *orig);

	/* Print out the per-protocol part of the tuple. */
	/*输出源ip、目的ip*/
	int (*print_tuple)(struct seq_file *s,
			   const struct nf_conntrack_tuple *);

	/*
	 * Called before tracking. 
	 *	*dataoff: offset of protocol header (TCP, UDP,...) in skb
	 *	*protonum: protocol number
	 */
	 /*获取四层协议号*/
	int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff,
			   unsigned int *dataoff, u_int8_t *protonum);
	/*tuple结构中的三层源ip、目的ip地址按照nfnetlink规定的形式进行填充*/
	int (*tuple_to_nlattr)(struct sk_buff *skb,
			       const struct nf_conntrack_tuple *t);

	/*
	 * Calculate size of tuple nlattr
	 */
	int (*nlattr_tuple_size)(void);
	/*将nfnetlink消息传递过来的变量,转换成tuple结构中的三层源ip、目的ip地址,*/
	int (*nlattr_to_tuple)(struct nlattr *tb[],
			       struct nf_conntrack_tuple *t);
	const struct nla_policy *nla_policy;

	size_t nla_size;

#ifdef CONFIG_SYSCTL
	struct ctl_table_header	*ctl_table_header;
	struct ctl_path		*ctl_table_path;
	struct ctl_table	*ctl_table;
#endif /* CONFIG_SYSCTL */

	/* Module (if any) which this is connected to. */
	struct module *me;
}

struct nf_conntrack_l3proto结构实例如下

struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
	.l3proto	 = PF_INET,
	.name		 = "ipv4",
	/*从skb获取源ip和目的ip保存到tuple中*/
	.pkt_to_tuple	 = ipv4_pkt_to_tuple,
	/*根据orig方向的源ip、目的ip设置reply方向的源ip、目的ip,反过来*/
	.invert_tuple	 = ipv4_invert_tuple,
	/*输出源ip、目的ip*/
	.print_tuple	 = ipv4_print_tuple,
	/*获取四层协议号*/
	.get_l4proto	 = ipv4_get_l4proto,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
	/*tuple结构中的三层源ip、目的ip地址按照nfnetlink规定的形式进行填充*/
	.tuple_to_nlattr = ipv4_tuple_to_nlattr,
	.nlattr_tuple_size = ipv4_nlattr_tuple_size,
	/*将nfnetlink消息传递过来的变量,转换成tuple结构中的三层源ip、目的ip地址,*/
	.nlattr_to_tuple = ipv4_nlattr_to_tuple,
	.nla_policy	 = ipv4_nla_policy,
#endif
#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
	.ctl_table_path  = nf_net_ipv4_netfilter_sysctl_path,
	.ctl_table	 = ip_ct_sysctl_table,
#endif
	.me		 = THIS_MODULE,
};

6、struct nf_conntrack_l4proto

struct nf_conntrack_l4proto结构主要是四层协议和tuple相关的操作函数,获取tuple结构四层协议相关的特征,tcp/udp协议就是源端口、目的端口,icmp协议是id、type、code,nfnetlink格式和tuple格式四层特性元素转换。

struct nf_conntrack_l4proto {
	/* L3 Protocol number. */
	u_int16_t l3proto;

	/* L4 Protocol number. */
	u_int8_t l4proto;

	/* Try to fill in the third arg: dataoff is offset past network protocol
           hdr.  Return true if possible. */
	bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff,
			     struct nf_conntrack_tuple *tuple);

	/* Invert the per-proto part of the tuple: ie. turn xmit into reply.
	 * Some packets can't be inverted: return 0 in that case.
	 */
	bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
			     const struct nf_conntrack_tuple *orig);

	/* Returns verdict for packet, or -1 for invalid. */
	int (*packet)(struct nf_conn *ct,
		      const struct sk_buff *skb,
		      unsigned int dataoff,
		      enum ip_conntrack_info ctinfo,
		      u_int8_t pf,
		      unsigned int hooknum);

	/* Called when a new connection for this protocol found;
	 * returns TRUE if it's OK.  If so, packet() called next. */
	bool (*new)(struct nf_conn *ct, const struct sk_buff *skb,
		    unsigned int dataoff);

	/* Called when a conntrack entry is destroyed */
	void (*destroy)(struct nf_conn *ct);

	int (*error)(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
		     unsigned int dataoff, enum ip_conntrack_info *ctinfo,
		     u_int8_t pf, unsigned int hooknum);

	/* Print out the per-protocol part of the tuple. Return like seq_* */
	int (*print_tuple)(struct seq_file *s,
			   const struct nf_conntrack_tuple *);

	/* Print out the private part of the conntrack. */
	int (*print_conntrack)(struct seq_file *s, struct nf_conn *);

	/* convert protoinfo to nfnetink attributes */
	int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla,
			 struct nf_conn *ct);
	/* Calculate protoinfo nlattr size */
	int (*nlattr_size)(void);

	/* convert nfnetlink attributes to protoinfo */
	int (*from_nlattr)(struct nlattr *tb[], struct nf_conn *ct);

	int (*tuple_to_nlattr)(struct sk_buff *skb,
			       const struct nf_conntrack_tuple *t);
	/* Calculate tuple nlattr size */
	int (*nlattr_tuple_size)(void);
	int (*nlattr_to_tuple)(struct nlattr *tb[],
			       struct nf_conntrack_tuple *t);
	const struct nla_policy *nla_policy;

	size_t nla_size;

#ifdef CONFIG_SYSCTL
	struct ctl_table_header	**ctl_table_header;
	struct ctl_table	*ctl_table;
	unsigned int		*ctl_table_users;
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
	struct ctl_table_header	*ctl_compat_table_header;
	struct ctl_table	*ctl_compat_table;
#endif
#endif
	/* Protocol name */
	const char *name;

	/* Module (if any) which this is connected to. */
	struct module *me;
};

6.1 、nf_conntrack_l4proto_tcp4

tcp协议struct nf_conntrack_l4proto实例化是nf_conntrack_l4_proto_tcp4

struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
{
	.l3proto		= PF_INET,
	.l4proto 		= IPPROTO_TCP,
	.name 			= "tcp",
	/*从tcp协议skb中获取一个tuple的源端口、目的端口*/
	.pkt_to_tuple 		= tcp_pkt_to_tuple,
	/*tcp协议把orig方向的源端口、目的端口赋值给reply方向的目的端口、源端口*/
	.invert_tuple 		= tcp_invert_tuple,
	/*打印输出tuple的源端口、目的端口*/
	.print_tuple 		= tcp_print_tuple,
	.print_conntrack 	= tcp_print_conntrack,
	.packet 		= tcp_packet,
	.new 			= tcp_new,
	.error			= tcp_error,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
	/*tuple中的源端口、目的端口转换为nfnetlink格式填充*/
	.to_nlattr		= tcp_to_nlattr,
	.nlattr_size		= tcp_nlattr_size,
	/*nfnetlink格式的源端口、目的端口转换为tuple*/
	.from_nlattr		= nlattr_to_tcp,
	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
	.nla_policy		= nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
	.ctl_table_users	= &tcp_sysctl_table_users,
	.ctl_table_header	= &tcp_sysctl_header,
	.ctl_table		= tcp_sysctl_table,
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
	.ctl_compat_table	= tcp_compat_sysctl_table,
#endif
#endif
};

6.2 、nf_conntrack_l4proto_udp4

udp协议struct nf_conntrack_l4proto实例如下

struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
{
	.l3proto		= PF_INET,
	.l4proto		= IPPROTO_UDP,
	.name			= "udp",
	.pkt_to_tuple		= udp_pkt_to_tuple,
	.invert_tuple		= udp_invert_tuple,
	.print_tuple		= udp_print_tuple,
	.packet			= udp_packet,
	.new			= udp_new,
	.error			= udp_error,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,
	.nla_policy		= nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
	.ctl_table_users	= &udp_sysctl_table_users,
	.ctl_table_header	= &udp_sysctl_header,
	.ctl_table		= udp_sysctl_table,
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
	.ctl_compat_table	= udp_compat_sysctl_table,
#endif
#endif

6.3、nf_conntrack_l4proto_icmp

imcp协议的struct nf_conntrack_l4proto的实例如下

struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
{
	.l3proto		= PF_INET,
	.l4proto		= IPPROTO_ICMP,
	.name			= "icmp",
	.pkt_to_tuple		= icmp_pkt_to_tuple,
	.invert_tuple		= icmp_invert_tuple,
	.print_tuple		= icmp_print_tuple,
	.packet			= icmp_packet,
	.new			= icmp_new,
	.error			= icmp_error,
	.destroy		= NULL,
	.me			= NULL,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
	.tuple_to_nlattr	= icmp_tuple_to_nlattr,
	.nlattr_tuple_size	= icmp_nlattr_tuple_size,
	.nlattr_to_tuple	= icmp_nlattr_to_tuple,
	.nla_policy		= icmp_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
	.ctl_table_header	= &icmp_sysctl_header,
	.ctl_table		= icmp_sysctl_table,
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
	.ctl_compat_table	= icmp_compat_sysctl_table,
#endif
#endif
};

 

你可能感兴趣的:(网络,协议栈,个人笔记,netfilter,连接跟踪)