回顾了项目需求是系统的统计tcp连接数;
于是想到了 nf_conntrack
这个Linux内核提供的记录和跟踪连接状态的功能;
然后写了个程序解析 /proc/net/nf_conntrack
这个映射文件,后来悲剧就发生了,当conntrack表记录变增加到1w以上之后,解析速度急速下降,到了10w规模后,解析耗时几十秒都不能完成,,,
终于后来翻到了netfilter的老巢,发现了解决方法:libnml、libnetfilter_conntrack
核心原理是通过netlink套接字
的方式,与内核交互,查询得到结果
libmnl基本方法:
extern struct mnl_socket *mnl_socket_open(int bus);
extern int mnl_socket_bind(struct mnl_socket *nl, unsigned int groups, pid_t pid);
extern int mnl_socket_close(struct mnl_socket *nl);
extern ssize_t mnl_socket_sendto(const struct mnl_socket *nl, const void *req, size_t siz);
extern ssize_t mnl_socket_recvfrom(const struct mnl_socket *nl, void *buf, size_t siz);
libnetfilter_conntrack则主要是对获取的结果进行解析,比如拿出源地址、协议簇信息
/* conntrack attributes */
enum nf_conntrack_attr {
ATTR_ORIG_IPV4_SRC = 0, /* u32 bits */
ATTR_IPV4_SRC = ATTR_ORIG_IPV4_SRC, /* alias */
ATTR_ORIG_IPV4_DST, /* u32 bits */
ATTR_IPV4_DST = ATTR_ORIG_IPV4_DST, /* alias */
ATTR_REPL_IPV4_SRC, /* u32 bits */
ATTR_REPL_IPV4_DST, /* u32 bits */
ATTR_ORIG_IPV6_SRC = 4, /* u128 bits */
ATTR_IPV6_SRC = ATTR_ORIG_IPV6_SRC, /* alias */
ATTR_ORIG_IPV6_DST, /* u128 bits */
ATTR_IPV6_DST = ATTR_ORIG_IPV6_DST, /* alias */
ATTR_REPL_IPV6_SRC, /* u128 bits */
ATTR_REPL_IPV6_DST, /* u128 bits */
ATTR_ORIG_PORT_SRC = 8, /* u16 bits */
ATTR_PORT_SRC = ATTR_ORIG_PORT_SRC, /* alias */
ATTR_ORIG_PORT_DST, /* u16 bits */
ATTR_PORT_DST = ATTR_ORIG_PORT_DST, /* alias */
ATTR_REPL_PORT_SRC, /* u16 bits */
ATTR_REPL_PORT_DST, /* u16 bits */
ATTR_ICMP_TYPE = 12, /* u8 bits */
ATTR_ICMP_CODE, /* u8 bits */
ATTR_ICMP_ID, /* u16 bits */
ATTR_ORIG_L3PROTO, /* u8 bits */
ATTR_L3PROTO = ATTR_ORIG_L3PROTO, /* alias */
ATTR_REPL_L3PROTO = 16, /* u8 bits */
ATTR_ORIG_L4PROTO, /* u8 bits */
ATTR_L4PROTO = ATTR_ORIG_L4PROTO, /* alias */
ATTR_REPL_L4PROTO, /* u8 bits */
ATTR_TCP_STATE, /* u8 bits */
ATTR_SNAT_IPV4 = 20, /* u32 bits */
ATTR_DNAT_IPV4, /* u32 bits */
ATTR_SNAT_PORT, /* u16 bits */
ATTR_DNAT_PORT, /* u16 bits */
ATTR_TIMEOUT = 24, /* u32 bits */
ATTR_MARK, /* u32 bits */
ATTR_ORIG_COUNTER_PACKETS, /* u64 bits */
ATTR_REPL_COUNTER_PACKETS, /* u64 bits */
ATTR_ORIG_COUNTER_BYTES = 28, /* u64 bits */
ATTR_REPL_COUNTER_BYTES, /* u64 bits */
ATTR_USE, /* u32 bits */
ATTR_ID, /* u32 bits */
ATTR_STATUS = 32, /* u32 bits */
ATTR_TCP_FLAGS_ORIG, /* u8 bits */
ATTR_TCP_FLAGS_REPL, /* u8 bits */
ATTR_TCP_MASK_ORIG, /* u8 bits */
ATTR_TCP_MASK_REPL = 36, /* u8 bits */
ATTR_MASTER_IPV4_SRC, /* u32 bits */
ATTR_MASTER_IPV4_DST, /* u32 bits */
ATTR_MASTER_IPV6_SRC, /* u128 bits */
ATTR_MASTER_IPV6_DST = 40, /* u128 bits */
ATTR_MASTER_PORT_SRC, /* u16 bits */
ATTR_MASTER_PORT_DST, /* u16 bits */
ATTR_MASTER_L3PROTO, /* u8 bits */
ATTR_MASTER_L4PROTO = 44, /* u8 bits */
ATTR_SECMARK, /* u32 bits */
ATTR_ORIG_NAT_SEQ_CORRECTION_POS, /* u32 bits */
ATTR_ORIG_NAT_SEQ_OFFSET_BEFORE, /* u32 bits */
ATTR_ORIG_NAT_SEQ_OFFSET_AFTER = 48, /* u32 bits */
ATTR_REPL_NAT_SEQ_CORRECTION_POS, /* u32 bits */
ATTR_REPL_NAT_SEQ_OFFSET_BEFORE, /* u32 bits */
ATTR_REPL_NAT_SEQ_OFFSET_AFTER, /* u32 bits */
ATTR_SCTP_STATE = 52, /* u8 bits */
ATTR_SCTP_VTAG_ORIG, /* u32 bits */
ATTR_SCTP_VTAG_REPL, /* u32 bits */
ATTR_HELPER_NAME, /* string (30 bytes max) */
ATTR_DCCP_STATE = 56, /* u8 bits */
ATTR_DCCP_ROLE, /* u8 bits */
ATTR_DCCP_HANDSHAKE_SEQ, /* u64 bits */
ATTR_TCP_WSCALE_ORIG, /* u8 bits */
ATTR_TCP_WSCALE_REPL = 60, /* u8 bits */
ATTR_ZONE, /* u16 bits */
ATTR_SECCTX, /* string */
ATTR_TIMESTAMP_START, /* u64 bits, linux >= 2.6.38 */
ATTR_TIMESTAMP_STOP = 64, /* u64 bits, linux >= 2.6.38 */
ATTR_HELPER_INFO, /* variable length */
ATTR_CONNLABELS, /* variable length */
ATTR_CONNLABELS_MASK, /* variable length */
ATTR_ORIG_ZONE, /* u16 bits */
ATTR_REPL_ZONE, /* u16 bits */
ATTR_SNAT_IPV6, /* u128 bits */
ATTR_DNAT_IPV6, /* u128 bits */
ATTR_SYNPROXY_ISN, /* u32 bits */
ATTR_SYNPROXY_ITS, /* u32 bits */
ATTR_SYNPROXY_TSOFF, /* u32 bits */
ATTR_MAX
};
以下例子为打印TCP当前连接情况
main函数主要就是创建一个netlink套接字,发送请求IPCTNL_MSG_CT_GET
获取整个conntrack表信息
最终结果接收在buf中,使用mnl_cb_run
进行循环解析。
int main(void)
{
struct mnl_socket *nl;
struct nlmsghdr *nlh;
struct nfgenmsg *nfh;
char buf[MNL_SOCKET_BUFFER_SIZE];
unsigned int seq, portid;
int ret;
nl = mnl_socket_open(NETLINK_NETFILTER);
if (nl == NULL) {
perror("mnl_socket_open");
exit(EXIT_FAILURE);
}
if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) {
perror("mnl_socket_bind");
exit(EXIT_FAILURE);
}
portid = mnl_socket_get_portid(nl);
nlh = mnl_nlmsg_put_header(buf);
nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_GET;
nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
nlh->nlmsg_seq = seq = time(NULL);
nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
nfh->nfgen_family = AF_INET;
nfh->version = NFNETLINK_V0;
nfh->res_id = 0;
ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
if (ret == -1) {
perror("mnl_socket_recvfrom");
exit(EXIT_FAILURE);
}
ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
while (ret > 0) {
ret = mnl_cb_run(buf, ret, seq, portid, data_cb, NULL);
if (ret <= MNL_CB_STOP) {
break;
}
ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
}
if (ret == -1) {
perror("mnl_socket_recvfrom");
exit(EXIT_FAILURE);
}
mnl_socket_close(nl);
return 0;
}
以下为 回调函数的实现,在本例子中,则筛选出TCP连接进行展示
#include
#include
#include
#include
#include
#include
#include
static int data_cb(const struct nlmsghdr *nlh, void *data)
{
struct nf_conntrack *ct;
char buf[4096];
ct = nfct_new();
if (ct == NULL) {
return MNL_CB_OK;
}
nfct_nlmsg_parse(nlh, ct);
switch (nfct_get_attr_u8(ct, ATTR_ORIG_L4PROTO)) {
case IPPROTO_TCP:
nfct_snprintf(buf, sizeof(buf), ct, NFCT_T_UNKNOWN, NFCT_O_DEFAULT, 0);
printf("%s\n", buf);
break;
}
nfct_destroy(ct);
return MNL_CB_OK;
}
运行结果涉及本机一些地址,就不展示了,结果与 /proc/net/nf_conntrack
一致,但到10w记录的环境下,并不会有巨大的开销。
只要不设置notrack标识的连接,就可以通过 nf_conntrack进行获取连接数;
而使用libnml+libnetfilter_conntrack的netlink套接字的方式,比直接cat文件速度快很多;
查看了官方手册,发现libnetfilter_conntrack不仅可解析conntrack表,还能够进行监控、修改等高级操作,功能十分强大!
参考文章:
[1] https://en.wikipedia.org/wiki/Netfilter
[2] https://www.netfilter.org/projects/libnetfilter_conntrack/index.html