ss(socket statistics)和netstat获取网络连接的实现

1 名词解释

  • netstat:network statistics,网络统计信息,通过解析/proc/net/tcp展示网络连接信息
  • ss:socket statistics,使用netlink展示网络连接信息
  • netlink:netlink socket,用户态空间和内核态空间通信的socket
  • rtnetlink:routing netlink,用于实现路由和IP,linux实现中是netlink的子集,创建socket时,family设置为AF_NETLINK,protocol设置为NETLINK_ROUTE
  • libnetlink:操作netlink的library

2 通过netlink获取连接信息(ss)

程序开始时,会通过解析选项,将选项变成下面的struct filter和一些变量的值。

struct filter {
    int dbs; // 过滤协议,例如,TCP、UDP、UNIX
    int states; // 过滤连接状态,例如,SS_LISTEN、SS_CLOSE
    uint64_t families; // 过滤协议族
    struct ssfilter *f;
    bool kill;
};

ss命令的一些选项就是设置该过滤器:

  • -t选项就是设置dbs为TCP_DB
  • -4 -6就是设置families协议族

当处理好选项后就是执行数据获取,默认的行为就是通过netlink的方式获取数据:

tcp_show -> inet_show_netlink

static int inet_show_netlink(struct filter *f, FILE *dump_fp, int protocol)
{
    int err = 0;
    struct rtnl_handle rth, rth2;
    int family = PF_INET;
    struct inet_diag_arg arg = { .f = f, .protocol = protocol };
 
    if (rtnl_open_byproto(&rth, 0, NETLINK_SOCK_DIAG))
        return -1;
 
    if (f->kill) {
        if (rtnl_open_byproto(&rth2, 0, NETLINK_SOCK_DIAG)) {
            rtnl_close(&rth);
            return -1;
        }
        arg.rth = &rth2;
    }
 
    rth.dump = MAGIC_SEQ;
    rth.dump_fp = dump_fp;
    if (preferred_family == PF_INET6)
        family = PF_INET6;
 
again:
    if ((err = sockdiag_send(family, rth.fd, protocol, f)))
        goto Exit;
 
    if ((err = rtnl_dump_filter(&rth, show_one_inet_sock, &arg))) {
        if (family != PF_UNSPEC) {
            family = PF_UNSPEC;
            goto again;
        }
        goto Exit;
    }
    if (family == PF_INET && preferred_family != PF_INET) {
        family = PF_INET6;
        goto again;
    }
 
Exit:
    rtnl_close(&rth);
    if (arg.rth)
        rtnl_close(arg.rth);
    return err;
}
  • 首先调用rtnl_open_byproto创建套接字,相当于执行socket(AF_NETLINK, SOCK_RAW, NETLINK_INET_DIAG))
  • sockdiag_send向内核发送请求
  • rtnl_dump_filter对内核返回的数据进行过滤和解析

进入到lib/libnetlink.c,查看rtnl_open_byproto的代码,其实就是创建socket,然后设置一些选项,然后调用bind函数:

int rtnl_open_byproto(struct rtnl_handle *rth, unsigned int subscriptions,
		      int protocol)
{
	socklen_t addr_len;
	int sndbuf = 32768;
	int one = 1;

	memset(rth, 0, sizeof(*rth));

	rth->proto = protocol;
	rth->fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol);
	if (rth->fd < 0) {
		perror("Cannot open netlink socket");
		return -1;
	}

	if (setsockopt(rth->fd, SOL_SOCKET, SO_SNDBUF,
		       &sndbuf, sizeof(sndbuf)) < 0) {
		perror("SO_SNDBUF");
		return -1;
	}

	if (setsockopt(rth->fd, SOL_SOCKET, SO_RCVBUF,
		       &rcvbuf, sizeof(rcvbuf)) < 0) {
		perror("SO_RCVBUF");
		return -1;
	}

	/* Older kernels may no support extended ACK reporting */
	setsockopt(rth->fd, SOL_NETLINK, NETLINK_EXT_ACK,
		   &one, sizeof(one));

	memset(&rth->local, 0, sizeof(rth->local));
	rth->local.nl_family = AF_NETLINK;
	rth->local.nl_groups = subscriptions;

	if (bind(rth->fd, (struct sockaddr *)&rth->local,
		 sizeof(rth->local)) < 0) {
		perror("Cannot bind netlink socket");
		return -1;
	}
	addr_len = sizeof(rth->local);
	if (getsockname(rth->fd, (struct sockaddr *)&rth->local,
			&addr_len) < 0) {
		perror("Cannot getsockname");
		return -1;
	}
	if (addr_len != sizeof(rth->local)) {
		fprintf(stderr, "Wrong address length %d\n", addr_len);
		return -1;
	}
	if (rth->local.nl_family != AF_NETLINK) {
		fprintf(stderr, "Wrong address family %d\n",
			rth->local.nl_family);
		return -1;
	}
	rth->seq = time(NULL);
	return 0;
}

这里面涉及的几个数据结构:

struct sockaddr_nl { // netlink的socket地址结构,对应socketaddr_in
    __kernel_sa_family_t nl_family; /* 设置为AF_NETLINK */
    unsigned short nl_pad;          /* zero*/
    __u32 nl_pid;                   /* port ID*/
    __u32 nl_groups;                /* multicast groups mask */
};
 
struct nlmsghdr {
    __u32 nlmsg_len;   /* 整个消息的长度,sizeof(nlmsghdr)+sizeof(inet_diag_req) */
    __u16 nlmsg_type;  /* 消息的类型,NLMSG_DONE(本次的消息的最后一个报文),NLM_F_REQUEST(请求消息),NLM_F_MULTI(多个报文中的一个) */
    __u16 nlmsg_flags; /* Additional flags */
    __u32 nlmsg_seq;   /* Sequence number */
    __u32 nlmsg_pid;   /* Sending process port ID */
};
 
struct inet_diag_req {
    __u8    idiag_family;       /* Family of addresses. */
    __u8    idiag_src_len;
    __u8    idiag_dst_len;
    __u8    idiag_ext;      /* Query extended information */
 
    struct inet_diag_sockid id;
 
    __u32   idiag_states;       /* 要过滤的连接的状态 */
    __u32   idiag_dbs;      /* Tables to dump (NI) */
};
 
struct iovec
  {
    void *iov_base; /* 数据的地址,发送数据时包含struct nlmsghdr和struct inet_diag_req  */
    size_t iov_len; /* 数据的长度  */
  };
 
struct msghdr // 发送的数据
  {
    void *msg_name;     /* 收发数据的地址,设置为&sockaddr_nl  */
    socklen_t msg_namelen;  /* sizeof(sockaddr_nl)  */
 
    struct iovec *msg_iov;  /* 收发的数据,struct iovec的数组地址  */
    size_t msg_iovlen;      /* Number of elements in the vector.  */
 
    void *msg_control;      /* Ancillary data (eg BSD filedesc passing). */
    size_t msg_controllen;  /* Ancillary data buffer length.
                   !! The type should be socklen_t but the
                   definition of the kernel is incompatible
                   with this.  */
 
    int msg_flags;      /* Flags on received message.  */
  };

发送数据时,将整个msghdr发送给内核,内核收到数据后,进行解包然后执行对应的逻辑,然后返回数据,此时,用户态程序就需要以类似的逻辑来解析收到的数据:使用recvmsg接收数据,将接收到的数据转换成nlmsghdr,再通过NLMSG_开头的一些宏(NLMSG_OK:正常收到数据;NLMSG_DATA:得到本次收到的报文数据;NLMSG_NEXT:获取下一个报文;NLMSG_DONE:本次的报文处理完毕)对数据进行处理,rtnl_dump_filter就是调用rtnl_dump_filter_l:

int rtnl_dump_filter_l(struct rtnl_handle *rth,
		       const struct rtnl_dump_filter_arg *arg)
{
	struct sockaddr_nl nladdr;
	struct iovec iov;
	struct msghdr msg = {
		.msg_name = &nladdr,
		.msg_namelen = sizeof(nladdr),
		.msg_iov = &iov,
		.msg_iovlen = 1,
	};
	char *buf;
	int dump_intr = 0;

	while (1) {
		int status;
		const struct rtnl_dump_filter_arg *a;
		int found_done = 0;
		int msglen = 0;

		status = rtnl_recvmsg(rth->fd, &msg, &buf);
		if (status < 0)
			return status;

		if (rth->dump_fp)
			fwrite(buf, 1, NLMSG_ALIGN(status), rth->dump_fp);

		for (a = arg; a->filter; a++) {
			struct nlmsghdr *h = (struct nlmsghdr *)buf;

			msglen = status;

			while (NLMSG_OK(h, msglen)) {
				int err = 0;

				h->nlmsg_flags &= ~a->nc_flags;

				if (nladdr.nl_pid != 0 ||
				    h->nlmsg_pid != rth->local.nl_pid ||
				    h->nlmsg_seq != rth->dump)
					goto skip_it;

				if (h->nlmsg_flags & NLM_F_DUMP_INTR)
					dump_intr = 1;

				if (h->nlmsg_type == NLMSG_DONE) {
					err = rtnl_dump_done(h);
					if (err < 0) {
						free(buf);
						return -1;
					}

					found_done = 1;
					break; /* process next filter */
				}

				if (h->nlmsg_type == NLMSG_ERROR) {
					rtnl_dump_error(rth, h);
					free(buf);
					return -1;
				}

				if (!rth->dump_fp) {
					err = a->filter(&nladdr, h, a->arg1);
					if (err < 0) {
						free(buf);
						return err;
					}
				}

skip_it:
				h = NLMSG_NEXT(h, msglen);
			}
		}
		free(buf);

		if (found_done) {
			if (dump_intr)
				fprintf(stderr,
					"Dump was interrupted and may be inconsistent.\n");
			return 0;
		}

		if (msg.msg_flags & MSG_TRUNC) {
			fprintf(stderr, "Message truncated\n");
			continue;
		}
		if (msglen) {
			fprintf(stderr, "!!!Remnant of size %d\n", msglen);
			exit(1);
		}
	}
}

ss命令在实现时是直接使用了rtnl_dump_filter函数对数据进行处理,收到一个报就会调用回调函数show_on_inet_sock:

if ((err = rtnl_dump_filter(&rth, show_one_inet_sock, &arg))) {
    if (family != PF_UNSPEC) {
        family = PF_UNSPEC;
        goto again;
    }
    goto Exit;
}

在show_one_inet_sock中,主要就是三个调用:

  • parse_diag_msg:使用NLMSG_DATA宏获取数据,从数据中提取出对应的字段,然后放到struct sockstat结构体中
  • run_ssfilter:如果用户有提供过滤的表达式,则判断是否应该进行后续处理
  • inet_show_sock:根据给定的选项打印数据

parse_diag_msg:将收到的数据转成inet_diag_msg后,就可以通过其中的inet_diag_sockid获取连接的信息。

struct inet_diag_sockid {
    __be16  idiag_sport;
    __be16  idiag_dport;
    __be32  idiag_src[4];
    __be32  idiag_dst[4];
    __u32   idiag_if;
    __u32   idiag_cookie[2];
#define INET_DIAG_NOCOOKIE (~0U)
};
 
struct inet_diag_msg {
    __u8    idiag_family;
    __u8    idiag_state;
    __u8    idiag_timer;
    __u8    idiag_retrans;
 
    struct inet_diag_sockid id;
 
    __u32   idiag_expires;
    __u32   idiag_rqueue;
    __u32   idiag_wqueue;
    __u32   idiag_uid;
    __u32   idiag_inode;
};

inet_show_sock在进行打印输出时会先调用inet_stats_print输出一些基本信息:

  • sock_stat_print:打印State、Recv-Q、Send-Q
  • inet_addr_print:打印Local Address:Port和Peer Address:Port
  • proc_ctx_print:打印进程的SELinux上下文或者进程信息

然后就需要考虑各种选项,在程序开始的部分会根据选项对一些变量赋值,然后在打印时会判断这些变量的值,然后进行相应的输出:

  • 如果设置了选项-o、-e、-b,就会设置show_options为1,此时就会打印timer信息
  • 如果设置了-e,就会设置show_details为1,此时会输出uid、ino、sk等信息
  • 如果设置了-m,就会设置show_mem为1,此时会输出skmem

3 通过proc获取连接信息(netstat)

此时ss命令已经执行结束了,但是回到最开始调用的tcp_show,当inet_show_netlink执行失败,就会采用读取/proc/net/tcp的方式:

if (f->families & FAMILY_MASK(AF_INET)) {
    if ((fp = net_tcp_open()) == NULL)
        goto outerr;
 
    setbuffer(fp, buf, bufsize);
    if (generic_record_read(fp, tcp_show_line, f, AF_INET))
        goto outerr;
    fclose(fp);
}
 
if ((f->families & FAMILY_MASK(AF_INET6)) &&
    (fp = net_tcp6_open()) != NULL) {
    setbuffer(fp, buf, bufsize);
    if (generic_record_read(fp, tcp_show_line, f, AF_INET6))
        goto outerr;
    fclose(fp);
}

如果是ipv4,就打开/proc/net/tcp,如果是ipv6,就打开/proc/net/tcp6,两个逻辑都是调用generic_record_read和tcp_show_line对文件进行解析,generic_record_read会读取文件的每一行数据,当成功读取到一行数据后就会调用tcp_show_line,因此,tcp_show_line中就是对文件的一行进行解析和打印:

  • proc_parse_inet_addr:解析本端和对端地址以及端口号
  • 然后使用sccanf读取剩下的字段,例如state、wq、rq等
  • 最后打印的逻辑基本跟netlink的方式类似

你可能感兴趣的:(Linux,网络,linux)