linux协议栈中netfiler框架对数据包做过滤处理基本上都是基于连接跟踪来做的,比如snat、dnat等,连接跟踪就是记录数据流在协议栈中的两个方向,不同的协议有不同的特征,tcp/udp就是五元组(源ip、目的Ip、源端口、目的端口、协议号),icmp协议就是ip、id、type、code。
1、struct nf_conn
struct nf_conn结构体是连接跟踪的抽象结构,其中最重要的元素是struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]记录了条连接的特征信息,根据这个结构中的信息可以确定唯一的一条连接,还有是连接的状态unsigned long status,各元素的详解如下。
struct nf_conn {
/* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
plus 1 for any connection(s) we are `master' for */
/*连接跟踪引用计数*/
struct nf_conntrack ct_general;
spinlock_t lock;
/* XXX should I move this to the tail ? - Y.K */
/* These are my tuples; original and reply */
/*记录连接跟踪的信息,比如tcp/udp协议就是五元组来确定一个
条连接,icmp协议通过ip、type、id、type来确定一条连接有两个方向*/
struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
/* Have we seen traffic both ways yet? (bitset) */
/*连接的状态*/
unsigned long status;
/* If we were expected by an expectation, this will be it */
/*如果是一条子连接,就指向它的主连接*/
struct nf_conn *master;
/* Timer function; drops refcnt when it goes off. */
/*超时*/
struct timer_list timeout;
#if defined(CONFIG_NF_CONNTRACK_MARK)
/*用来标记一条连接*/
u_int32_t mark;
#endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK
u_int32_t secmark;
#endif
/* Storage reserved for other modules: */
/*保存不同协议的私有数据*/
union nf_conntrack_proto proto;
/* Extensions */
/*扩展结构*/
struct nf_ct_ext *ext;
#ifdef CONFIG_NET_NS
struct net *ct_net;
#endif
};
2、数据包连接状态
数据包的连接状态定义在枚举变量ip_conntrack_info中
enum ip_conntrack_info {
/* Part of an established connection (either direction). */
/*表示两个方向都有了数据包*/
IP_CT_ESTABLISHED,
/* Like NEW, but related to an existing connection, or ICMP error
(in either direction). */
/*数据包对应的连接还没有reply方向的数据包
关联了一个已有连接,是他的一个子连接*/
IP_CT_RELATED,
/* Started a new connection to track (only
IP_CT_DIR_ORIGINAL); may be a retransmission. */
/*reply方向还没有数据包*/
IP_CT_NEW,
/* >= this indicates reply direction */
/*数据包是reply方向*/
IP_CT_IS_REPLY,
/* Number of distinct IP_CT types (no NEW in reply dirn). */
IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1
};
3、连接跟踪状态
enum ip_conntrack_status {
/* It's an expected connection: bit 0 set. This bit never changed */
/*表示是个子连接*/
IPS_EXPECTED_BIT = 0,
IPS_EXPECTED = (1 << IPS_EXPECTED_BIT),
/* We've seen packets both ways: bit 1 set. Can be set, not unset. */
/*连接双向上都有数据*/
IPS_SEEN_REPLY_BIT = 1,
IPS_SEEN_REPLY = (1 << IPS_SEEN_REPLY_BIT),
/* Conntrack should never be early-expired. */
/*tcp三次握手后设定这个标志,udp
双向都有数据后设定这个标志*/
IPS_ASSURED_BIT = 2,
IPS_ASSURED = (1 << IPS_ASSURED_BIT),
/* Connection is confirmed: originating packet has left box */
/*连接已经添加到net->ct.hash表中*/
IPS_CONFIRMED_BIT = 3,
IPS_CONFIRMED = (1 << IPS_CONFIRMED_BIT),
/* Connection needs src nat in orig dir. This bit never changed. */
/*postrouting处当reply tuple完成设置该标志*/
IPS_SRC_NAT_BIT = 4,
IPS_SRC_NAT = (1 << IPS_SRC_NAT_BIT),
/* Connection needs dst nat in orig dir. This bit never changed. */
/*prerouting处reply tuple完成设置该标志*/
IPS_DST_NAT_BIT = 5,
IPS_DST_NAT = (1 << IPS_DST_NAT_BIT),
/* Both together. */
IPS_NAT_MASK = (IPS_DST_NAT | IPS_SRC_NAT),
/* Connection needs TCP sequence adjusted. */
IPS_SEQ_ADJUST_BIT = 6,
IPS_SEQ_ADJUST = (1 << IPS_SEQ_ADJUST_BIT),
/* NAT initialization bits. */
/*postrouting处已经做完了snat并添加到bysource链中*/
IPS_SRC_NAT_DONE_BIT = 7,
IPS_SRC_NAT_DONE = (1 << IPS_SRC_NAT_DONE_BIT),
/*prerouting处已经做完了dnat并添加到bysource链中*/
IPS_DST_NAT_DONE_BIT = 8,
IPS_DST_NAT_DONE = (1 << IPS_DST_NAT_DONE_BIT),
/* Both together */
IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE),
/* Connection is dying (removed from lists), can not be unset. */
/*连接被释放*/
IPS_DYING_BIT = 9,
IPS_DYING = (1 << IPS_DYING_BIT),
/* Connection has fixed timeout. */
/*固定连接超时*/
IPS_FIXED_TIMEOUT_BIT = 10,
IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT),
/* Conntrack is a template */
/*ct的target设置只能设置在raw表中*/
IPS_TEMPLATE_BIT = 11,
IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT),
};
4、struct nf_conntrack_tuple_hash
hnnode是一个链表,主要是struct nf_conntrack_tuple
/* Connections have two entries in the hash table: one for each way */
struct nf_conntrack_tuple_hash {
struct hlist_nulls_node hnnode;
/*不同协议的特征*/
struct nf_conntrack_tuple tuple;
};
struct nf_conntrack_tuple这个结构保存了连接不同协议的特征信息,u是一个联合体,不同协议有不同特征,tcp/udp协议的五元组,icmp协议的type、code、id
/* This contains the information to distinguish a connection. */
/*一条连接的信息tcp/udp就是五元组*/
struct nf_conntrack_tuple {
/*保存三层、四层一些信息*/
struct nf_conntrack_man src;
/* These are the parts of the tuple which are fixed. */
struct {
union nf_inet_addr u3;
/*联合体*/
union {
/* Add other protocols here. */
__be16 all;
struct {
__be16 port;
} tcp;
struct {
__be16 port;
} udp;
/*icmp协议的type code*/
struct {
u_int8_t type, code;
} icmp;
struct {
__be16 port;
} dccp;
struct {
__be16 port;
} sctp;
struct {
__be16 key;
} gre;
} u;
/* The protocol. */
/*协议号*/
u_int8_t protonum;
/* The direction (for tuplehash) */
/*连接方向是origin还是reply*/
u_int8_t dir;
} dst;
};
struct nf_conntrack_man主要保存三层ip和四层端口
/* The manipulable part of the tuple. */
struct nf_conntrack_man {
/*三层ip*/
union nf_inet_addr u3;
/*四层端口号*/
union nf_conntrack_man_proto u;
/* Layer 3 protocol三层协议号 */
u_int16_t l3num;
};
5、 struct nf_conntrack_l3proto
struct nf_conntrack_l2proto主要定义了一些三层Ip协议的操作函数,获取tuple结构三层协议相关的特征(源ip、目的ip),nfnetlink格式和tuple格式的源ip、目的ip转换。
struct nf_conntrack_l3proto {
/* L3 Protocol Family number. ex) PF_INET */
/*三层协议号*/
u_int16_t l3proto;
/* Protocol name */
/*协议名称*/
const char *name;
/*
* Try to fill in the third arg: nhoff is offset of l3 proto
* hdr. Return true if possible.
*/
/*从数据包中计算处tuple结构体中的源ip、目的ip*/
bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple);
/*
* Invert the per-proto part of the tuple: ie. turn xmit into reply.
* Some packets can't be inverted: return 0 in that case.
*/
/*根据orig方向的源ip、目的ip设置reply方向的源ip、目的ip,反过来*/
bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig);
/* Print out the per-protocol part of the tuple. */
/*输出源ip、目的ip*/
int (*print_tuple)(struct seq_file *s,
const struct nf_conntrack_tuple *);
/*
* Called before tracking.
* *dataoff: offset of protocol header (TCP, UDP,...) in skb
* *protonum: protocol number
*/
/*获取四层协议号*/
int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum);
/*tuple结构中的三层源ip、目的ip地址按照nfnetlink规定的形式进行填充*/
int (*tuple_to_nlattr)(struct sk_buff *skb,
const struct nf_conntrack_tuple *t);
/*
* Calculate size of tuple nlattr
*/
int (*nlattr_tuple_size)(void);
/*将nfnetlink消息传递过来的变量,转换成tuple结构中的三层源ip、目的ip地址,*/
int (*nlattr_to_tuple)(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
const struct nla_policy *nla_policy;
size_t nla_size;
#ifdef CONFIG_SYSCTL
struct ctl_table_header *ctl_table_header;
struct ctl_path *ctl_table_path;
struct ctl_table *ctl_table;
#endif /* CONFIG_SYSCTL */
/* Module (if any) which this is connected to. */
struct module *me;
}
struct nf_conntrack_l3proto结构实例如下
struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
.l3proto = PF_INET,
.name = "ipv4",
/*从skb获取源ip和目的ip保存到tuple中*/
.pkt_to_tuple = ipv4_pkt_to_tuple,
/*根据orig方向的源ip、目的ip设置reply方向的源ip、目的ip,反过来*/
.invert_tuple = ipv4_invert_tuple,
/*输出源ip、目的ip*/
.print_tuple = ipv4_print_tuple,
/*获取四层协议号*/
.get_l4proto = ipv4_get_l4proto,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
/*tuple结构中的三层源ip、目的ip地址按照nfnetlink规定的形式进行填充*/
.tuple_to_nlattr = ipv4_tuple_to_nlattr,
.nlattr_tuple_size = ipv4_nlattr_tuple_size,
/*将nfnetlink消息传递过来的变量,转换成tuple结构中的三层源ip、目的ip地址,*/
.nlattr_to_tuple = ipv4_nlattr_to_tuple,
.nla_policy = ipv4_nla_policy,
#endif
#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
.ctl_table_path = nf_net_ipv4_netfilter_sysctl_path,
.ctl_table = ip_ct_sysctl_table,
#endif
.me = THIS_MODULE,
};
6、struct nf_conntrack_l4proto
struct nf_conntrack_l4proto结构主要是四层协议和tuple相关的操作函数,获取tuple结构四层协议相关的特征,tcp/udp协议就是源端口、目的端口,icmp协议是id、type、code,nfnetlink格式和tuple格式四层特性元素转换。
struct nf_conntrack_l4proto {
/* L3 Protocol number. */
u_int16_t l3proto;
/* L4 Protocol number. */
u_int8_t l4proto;
/* Try to fill in the third arg: dataoff is offset past network protocol
hdr. Return true if possible. */
bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff,
struct nf_conntrack_tuple *tuple);
/* Invert the per-proto part of the tuple: ie. turn xmit into reply.
* Some packets can't be inverted: return 0 in that case.
*/
bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig);
/* Returns verdict for packet, or -1 for invalid. */
int (*packet)(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
u_int8_t pf,
unsigned int hooknum);
/* Called when a new connection for this protocol found;
* returns TRUE if it's OK. If so, packet() called next. */
bool (*new)(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff);
/* Called when a conntrack entry is destroyed */
void (*destroy)(struct nf_conn *ct);
int (*error)(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
unsigned int dataoff, enum ip_conntrack_info *ctinfo,
u_int8_t pf, unsigned int hooknum);
/* Print out the per-protocol part of the tuple. Return like seq_* */
int (*print_tuple)(struct seq_file *s,
const struct nf_conntrack_tuple *);
/* Print out the private part of the conntrack. */
int (*print_conntrack)(struct seq_file *s, struct nf_conn *);
/* convert protoinfo to nfnetink attributes */
int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla,
struct nf_conn *ct);
/* Calculate protoinfo nlattr size */
int (*nlattr_size)(void);
/* convert nfnetlink attributes to protoinfo */
int (*from_nlattr)(struct nlattr *tb[], struct nf_conn *ct);
int (*tuple_to_nlattr)(struct sk_buff *skb,
const struct nf_conntrack_tuple *t);
/* Calculate tuple nlattr size */
int (*nlattr_tuple_size)(void);
int (*nlattr_to_tuple)(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
const struct nla_policy *nla_policy;
size_t nla_size;
#ifdef CONFIG_SYSCTL
struct ctl_table_header **ctl_table_header;
struct ctl_table *ctl_table;
unsigned int *ctl_table_users;
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
struct ctl_table_header *ctl_compat_table_header;
struct ctl_table *ctl_compat_table;
#endif
#endif
/* Protocol name */
const char *name;
/* Module (if any) which this is connected to. */
struct module *me;
};
6.1 、nf_conntrack_l4proto_tcp4
tcp协议struct nf_conntrack_l4proto实例化是nf_conntrack_l4_proto_tcp4
struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_TCP,
.name = "tcp",
/*从tcp协议skb中获取一个tuple的源端口、目的端口*/
.pkt_to_tuple = tcp_pkt_to_tuple,
/*tcp协议把orig方向的源端口、目的端口赋值给reply方向的目的端口、源端口*/
.invert_tuple = tcp_invert_tuple,
/*打印输出tuple的源端口、目的端口*/
.print_tuple = tcp_print_tuple,
.print_conntrack = tcp_print_conntrack,
.packet = tcp_packet,
.new = tcp_new,
.error = tcp_error,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
/*tuple中的源端口、目的端口转换为nfnetlink格式填充*/
.to_nlattr = tcp_to_nlattr,
.nlattr_size = tcp_nlattr_size,
/*nfnetlink格式的源端口、目的端口转换为tuple*/
.from_nlattr = nlattr_to_tcp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
.nlattr_tuple_size = tcp_nlattr_tuple_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
.ctl_table_users = &tcp_sysctl_table_users,
.ctl_table_header = &tcp_sysctl_header,
.ctl_table = tcp_sysctl_table,
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
.ctl_compat_table = tcp_compat_sysctl_table,
#endif
#endif
};
6.2 、nf_conntrack_l4proto_udp4
udp协议struct nf_conntrack_l4proto实例如下
struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_UDP,
.name = "udp",
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
.print_tuple = udp_print_tuple,
.packet = udp_packet,
.new = udp_new,
.error = udp_error,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
.ctl_table_users = &udp_sysctl_table_users,
.ctl_table_header = &udp_sysctl_header,
.ctl_table = udp_sysctl_table,
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
.ctl_compat_table = udp_compat_sysctl_table,
#endif
#endif
6.3、nf_conntrack_l4proto_icmp
imcp协议的struct nf_conntrack_l4proto的实例如下
struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_ICMP,
.name = "icmp",
.pkt_to_tuple = icmp_pkt_to_tuple,
.invert_tuple = icmp_invert_tuple,
.print_tuple = icmp_print_tuple,
.packet = icmp_packet,
.new = icmp_new,
.error = icmp_error,
.destroy = NULL,
.me = NULL,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = icmp_tuple_to_nlattr,
.nlattr_tuple_size = icmp_nlattr_tuple_size,
.nlattr_to_tuple = icmp_nlattr_to_tuple,
.nla_policy = icmp_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
.ctl_table_header = &icmp_sysctl_header,
.ctl_table = icmp_sysctl_table,
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
.ctl_compat_table = icmp_compat_sysctl_table,
#endif
#endif
};