基于字节流套接字(SOCK_STREAM)和数据报套接字(SOCK_DGRAM)不可以访问传输层协议,只是对应用层的报文进行操作,传输层的数据报格式都是由系统提供的协议栈实现,用户只需要填充相应的应用层报文,由系统完成底层报文首部的填充并发送。原始套接字(SOCK_RAW)可以访问位于基层的传输层协议,原始套接字没有端口号。
原始套接字(SOCK_RAW)是一种不同于 SOCK_STREAM、SOCK_DGRAM 的套接字,它实现于系统核心。原始套接字使进程可以读与写 ICMP、IGMP 等网络报文;也可以处理特殊的 IPv4 报文;进程还可以通过设置 IP_HDRINCL 套接字选项由用户自行构造 IP 首部。原始套接字可以用来自行组装 IP 数据报,然后将数据报发送到其他终端。但是只有管理员权限才能使用原始套接字,可防止普通用户往网络写入它们自行构造的 IP 数据报。
调用 socket 函数创建套接字时,指定套接字类型为 SOCK_RAW 以创建一个原始套接字。
int sockfd; /* 创建一个 IPv4 的原始套接字 */ sockfd = socket(AF_INET, SOCK_RAW, protocol);
const int on = 1; if(setsockopt(sockfd, IPPROTO_IP, IP_HDRINCL, &on, sizeof(on)) < 0) /* 接下来是一些错误处理程序 */
原始套接字的输出遵循以下规则:
原始套接字遵循以下规则:
内核在传递 IP 数据报到原始套接字之前,必须对所有进程上的所有原始套接字进行匹配检测,若匹配成功,才把 IP 数据报的副本传递到匹配的原始套接字。检测匹配步骤如下:
ping 程序的操作比较简单,当源主机向目标主机发送了 ICMP 回显请求数据报后,它期待着目标主机的回答。目标主机在收到一个 ICMP 回显请求数据报后,它会交换源、目的主机的地址,然后将收到的 ICMP 回显请求数据报中的数据部分原封不动地封装在自己的 ICMP 回显应答数据报中,然后发回给发送 ICMP 回显请求的一方。如果校验正确,发送者便认为目标主机的回显服务正常,也即物理连接畅通。
ping 程序编程需要用到 ICMP 协议,有关 ICMP 协议的知识可以参考前面的文章《ICMP 协议》。ping 命令只使用众多 ICMP 报文中的两种:"请求(ICMP_ECHO)"和"回应(ICMP_ECHOREPLY)",这两种 ICMP 报文格式如下图所示:
首先看下系统自带 ping 程序的输出:
$ ping www.github.com PING github.com (192.30.252.128) 56(84) bytes of data. 64 bytes from github.com (192.30.252.128): icmp_req=1 ttl=45 time=269 ms 64 bytes from github.com (192.30.252.128): icmp_req=2 ttl=45 time=274 ms 64 bytes from github.com (192.30.252.128): icmp_req=3 ttl=45 time=270 ms 64 bytes from github.com (192.30.252.128): icmp_req=4 ttl=45 time=281 ms 64 bytes from github.com (192.30.252.128): icmp_req=5 ttl=45 time=283 ms 64 bytes from github.com (192.30.252.128): icmp_req=6 ttl=45 time=249 ms 64 bytes from github.com (192.30.252.128): icmp_req=7 ttl=45 time=253 ms ^C --- github.com ping statistics --- 7 packets transmitted, 7 received, 0% packet loss, time 6006ms rtt min/avg/max/mdev = 249.472/269.010/283.945/12.186 ms
ping 程序的编程步骤:
1) 创建类型为 SOCK_RAW 的原始套接字,同时设定协议为 IPPROTO_ICMP;
2) 创建并初始化 ICMP 首部;
3) 调用 sendto 函数,将 ICMP 请求发给远程主机;
4) 调用 recvform函数,以接收任何 ICMP 响应;
Linux 中<netinet/ip_icmp.h> ICMP 的数据结构定义如下:
struct icmp { u_int8_t icmp_type; /* type of message, see below */ u_int8_t icmp_code; /* type sub code */ u_int16_t icmp_cksum; /* ones complement checksum of struct */ union { u_char ih_pptr; /* ICMP_PARAMPROB */ struct in_addr ih_gwaddr; /* gateway address */ struct ih_idseq /* echo datagram */ { u_int16_t icd_id; u_int16_t icd_seq; } ih_idseq; u_int32_t ih_void; /* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */ struct ih_pmtu { u_int16_t ipm_void; u_int16_t ipm_nextmtu; } ih_pmtu; struct ih_rtradv { u_int8_t irt_num_addrs; u_int8_t irt_wpa; u_int16_t irt_lifetime; } ih_rtradv; } icmp_hun; #define icmp_pptr icmp_hun.ih_pptr #define icmp_gwaddr icmp_hun.ih_gwaddr #define icmp_id icmp_hun.ih_idseq.icd_id #define icmp_seq icmp_hun.ih_idseq.icd_seq #define icmp_void icmp_hun.ih_void #define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void #define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu #define icmp_num_addrs icmp_hun.ih_rtradv.irt_num_addrs #define icmp_wpa icmp_hun.ih_rtradv.irt_wpa #define icmp_lifetime icmp_hun.ih_rtradv.irt_lifetime union { struct { u_int32_t its_otime; u_int32_t its_rtime; u_int32_t its_ttime; } id_ts; struct { struct ip idi_ip; /* options and then 64 bits of data */ } id_ip; struct icmp_ra_addr id_radv; u_int32_t id_mask; u_int8_t id_data[1]; } icmp_dun; #define icmp_otime icmp_dun.id_ts.its_otime #define icmp_rtime icmp_dun.id_ts.its_rtime #define icmp_ttime icmp_dun.id_ts.its_ttime #define icmp_ip icmp_dun.id_ip.idi_ip #define icmp_radv icmp_dun.id_radv #define icmp_mask icmp_dun.id_mask #define icmp_data icmp_dun.id_data };
首先定义一个头文件:
#ifndef PING_H #define PING_H #include <netinet/in.h> #include <netinet/ip.h> #include <netinet/ip_icmp.h> #include <sys/socket.h> #include <signal.h> #include <unistd.h> #include <arpa/inet.h> #include <netdb.h> #include <stdio.h> #include <string.h> #include <stdlib.h> #include <errno.h> #include <sys/time.h> #include <sys/types.h> #define BUFSIZE 4096 char sendbuf[BUFSIZE]; extern int datalen; /* # bytes of data following ICMP header */ char *host; int nsent; /* add 1 for each sendto() */ int nrecv; /* add 1 for each recvmsg() */ pid_t pid; /* our PID */ int sockfd; int verbose; /* function prototypes */ void init_v6(void); void proc_v4(char *, ssize_t, struct msghdr *, struct timeval *); void proc_v6(char *, ssize_t, struct msghdr *, struct timeval *); void send_v4(void); void send_v6(void); void readloop(void); void sig_alrm(int); void tv_sub(struct timeval *, struct timeval *); /* 这个结构主要是为了处理IPv4与IPv6之间的差异 */ struct proto { /* 3个函数指针 */ void (*fproc)(char *, ssize_t, struct msghdr *, struct timeval *); void (*fsend)(void); void (*finit)(void); /* 2个套接字地址结构指针 */ struct sockaddr *sasend; /* sockaddr{} for send, from getaddrinfo */ struct sockaddr *sarecv; /* sockaddr for receiving */ socklen_t salen; /* length of sockaddr{}s */ /* ICMP 协议值 */ int icmpprot; /* IPPROTO_xxx value for ICMP */ } *pr; #ifdef IPV6 #include <netinet/ip6.h> #include <netinet/icmp6.h> #endif #endif
#include "ping.h" /* 初始化IPv4结构 */ struct proto proto_v4 = {proc_v4, send_v4, NULL, NULL, NULL, 0, IPPROTO_ICMP}; #ifdef IPV6 /* 若存在IPv6,则初始化IPv6结构 */ struct proto proto_v6 = {proc_v6, send_v6, init_v6, NULL, NULL, 0, IPPROTO_ICMPV6}; #endif typedef void Sigfunc(int); extern int datalen = 56; /* data that goes with ICMP echo request */ extern Sigfunc *MySignal(int signo, Sigfunc *func); extern struct addrinfo *host_serv(const char *host, const char *serv, int family, int socktype); extern char *Sock_ntop_host(const struct sockaddr *sa, socklen_t salen); extern void *Calloc(size_t n, size_t size); void statistics(int signo); int main(int argc, char **argv) { int n; struct addrinfo *ai; char *h; opterr = 0; /* don't want getopt() writing to stderr */ /* 只实现ping的一个参数选项-v供查询 */ /* 有关getopt函数的使用可以查阅相关资料 */ while( (n = getopt(argc, argv, "v")) != -1) { switch(n) { case 'v': verbose++; break; case '?': printf("unrecognize option: %c\n", n); exit(1); } } if(optind != argc-1) { perror("usage: ping [ -v ] <hostname>"); exit(1); } host = argv[optind]; pid = getpid() & 0xffff; /* ICMP ID field is 16 bits */ MySignal(SIGALRM, sig_alrm); MySignal(SIGINT, statistics); /* 将主机名和服务名映射到一个地址,并返回指向addrinfo的指针 */ ai = host_serv(host, NULL, 0, 0); /* 将网络字节序的地址转换为字符串格式地址,并返回该字符串的指针 */ h = Sock_ntop_host(ai->ai_addr, ai->ai_addrlen); /* 显示PING的主机名、地址与数据字节数 */ printf("PING %s (%s) %d bytes of data.\n", ai->ai_canonname ? ai->ai_canonname : h, h, datalen); /* initialize according to protocol */ if(ai->ai_family == AF_INET) { pr = &proto_v4;/* proto结构指针pr指向对应域的结构,这里是IPv4域的结构 */ #ifdef IPV6 }else if(ai->family == AF_INET6) { pr = &proc_v6; if(IN6_IS_ADDR_V4MAPPED(&(((struct sockaddr_in6 *)ai->ai_addr)->sin6_addr))) { perror("connot ping IPv4-mapped IPv6 address"); exit(1); } #endif }else { printf("unknown address family %d", ai->ai_family); exit(1); } pr->sasend = ai->ai_addr;/* 发送地址赋值 */ pr->sarecv = (struct sockaddr *)Calloc(1, ai->ai_addrlen); pr->salen = ai->ai_addrlen;/* 地址的大小 */ /* 处理数据 */ readloop(); exit(0); } /* 显示发送和接收数据报的个数,并计算丢包率 */ void statistics(int signo) { printf("\n----------- %s ping statistics -----------\n", Sock_ntop_host(pr->sarecv, pr->salen)); int lost = 100*(nsent-nrecv)/nsent; printf("%d packets transmitted, %d received, %d packet lost\n", nsent, nrecv, lost); close(sockfd); exit(1); }
#include "ping.h" void readloop() { int size; char recvbuf[BUFSIZE]; char controlbuf[BUFSIZE]; struct msghdr msg; struct iovec iov; ssize_t n; struct timeval tval; /* 创建ICMP的原始套接字,必须是root权限 */ if( (sockfd = socket(pr->sasend->sa_family, SOCK_RAW, pr->icmpprot)) < 0) { perror("socket error"); exit(1); } /* 回收root权限,设置当前用户权限 */ setuid(getuid()); /* 初始化IPv6 */ if(pr->finit) (*pr->finit)(); size = 60 * 1024; /* 设置接收缓冲区的大小为60k,主要为了减小接收缓冲区溢出 */ setsockopt(sockfd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size)); /* 发送第一个数据包 */ sig_alrm(SIGALRM); /* 初始化接收缓冲区 */ iov.iov_base = recvbuf; iov.iov_len = sizeof(recvbuf); msg.msg_name = pr->sarecv; msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = controlbuf; for( ; ;) { /* 接收ICMP数据包 */ msg.msg_namelen = pr->salen; msg.msg_controllen = sizeof(controlbuf); /* 从套接字接收数据 */ n = recvmsg(sockfd, &msg, 0); if(n < 0) { if(errno == EINTR) continue; else { perror("recvmsg error"); exit(1); } } /* 记录接收时间 */ gettimeofday(&tval, NULL); /* 调用处理函数 */ (*pr->fproc)(recvbuf, n, &msg, &tval); } }
#include "ping.h" /* 在IPv4域中发送数据包 */ extern uint16_t in_cksum(uint16_t *addr, int len); void send_v4(void) { int len; struct icmp *icmp; /* 设置ICMP报头 */ icmp = (struct icmp *) sendbuf; icmp->icmp_type = ICMP_ECHO;/* 回显请求 */ icmp->icmp_code = 0; icmp->icmp_id = pid; icmp->icmp_seq = nsent++; memset(icmp->icmp_data, 0xa5, datalen); /* fill with pattern */ gettimeofday((struct timeval *) icmp->icmp_data, NULL);/* 记录发送时间 */ len = 8 + datalen; /* checksum ICMP header and data */ icmp->icmp_cksum = 0; /* 检验和算法 */ icmp->icmp_cksum = in_cksum((u_short *) icmp, len); /* 发送数据包 */ if( len != sendto(sockfd, sendbuf, len, 0, pr->sasend, pr->salen)) { perror("sendto error"); exit(1); } }
#include "ping.h" extern char *Sock_ntop_host(const struct sockaddr *sa, socklen_t salen); void proc_v4(char *ptr, ssize_t len, struct msghdr *msg, struct timeval *tvrecv) { int hlen1, icmplen; double rtt; struct ip *ip; struct icmp *icmp; struct timeval *tvsend; ip = (struct ip *) ptr; /* start of IP header */ /* IP报文首部长度,即IP报文首部的长度标志乘以4 */ hlen1 = ip->ip_hl << 2; /* length of IP header */ if (ip->ip_p != IPPROTO_ICMP) return; /* not ICMP */ /* 越过IP报头,指向ICMP报头 */ icmp = (struct icmp *) (ptr + hlen1); /* start of ICMP header */ /* ICMP报头及ICMP数据报的总长度,若小于8,则不合理 */ if ( (icmplen = len - hlen1) < 8) return; /* malformed packet */ /* 确保所有接收的数据报是ICMP回显应答 */ if (icmp->icmp_type == ICMP_ECHOREPLY) { if (icmp->icmp_id != pid) return; /* not a response to our ECHO_REQUEST */ if (icmplen < 16) return; /* not enough data to use */ tvsend = (struct timeval *) icmp->icmp_data; /* 计算接收和发送的时间差 */ tv_sub(tvrecv, tvsend); /* 以毫秒为单位计算rtt */ rtt = tvrecv->tv_sec * 1000.0 + tvrecv->tv_usec / 1000.0; /* 打印相关信息 */ printf("%d bytes from %s: icmp_seq=%u ttl=%d rtt=%.3f ms\n", icmplen, Sock_ntop_host(pr->sarecv, pr->salen), icmp->icmp_seq, ip->ip_ttl, rtt); nrecv++; } else if (verbose) { printf(" %d bytes from %s: icmp_type = %d, icmp_code = %d\n", icmplen, Sock_ntop_host(pr->sarecv, pr->salen), icmp->icmp_type, icmp->icmp_code); } }
#include <stdint.h> /* 检验和算法 */ uint16_t in_cksum(uint16_t *addr, int len) { int nleft = len; uint32_t sum = 0; uint16_t *w = addr; uint16_t answer = 0; /* * Our algorithm is simple, using a 32 bit accumulator (sum), we add * sequential 16 bit words to it, and at the end, fold back all the * carry bits from the top 16 bits into the lower 16 bits. */ /* 把ICMP报头二进制数据以2字节为单位进行累加 */ while (nleft > 1) { sum += *w++; nleft -= 2; } /* 4mop up an odd byte, if necessary */ if (nleft == 1) {/* 若ICMP报头为奇数个字节,把最后一个字节视为2字节数据的高字节,则低字节为0,继续累加 */ *(unsigned char *)(&answer) = *(unsigned char *)w ; sum += answer; } /* 4add back carry outs from top 16 bits to low 16 bits */ sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */ sum += (sum >> 16); /* add carry */ answer = ~sum; /* truncate to 16 bits */ return(answer); }
#include "ping.h" /* 发送数据包,并设置闹钟,一秒钟后给所在进程发送SIGALRM信号 */ void sig_alrm(int signo) { (*pr->fsend)(); alarm(1); return; }
编译步骤:
sudo make sudo chmod u+s Ping
测试:
$ ./Ping www.github.com PING github.com (192.30.252.129) 56 bytes of data. 64 bytes from 192.30.252.129: icmp_seq=0 ttl=45 rtt=303.057 ms 64 bytes from 192.30.252.129: icmp_seq=1 ttl=45 rtt=301.416 ms 64 bytes from 192.30.252.129: icmp_seq=2 ttl=45 rtt=301.614 ms 64 bytes from 192.30.252.129: icmp_seq=3 ttl=45 rtt=301.727 ms 64 bytes from 192.30.252.129: icmp_seq=4 ttl=45 rtt=308.911 ms 64 bytes from 192.30.252.129: icmp_seq=5 ttl=45 rtt=303.088 ms 64 bytes from 192.30.252.129: icmp_seq=6 ttl=45 rtt=305.763 ms ^C ----------- 192.30.252.129 ping statistics ----------- 7 packets transmitted, 7 received, 0 packet lost
《Unix 网络编程》