原始套接字提供如下功能:
1、读写ICMPv4,IGMPv4及ICMPv6分组。如ping程序,就是使用原始套接口中发送ICMP回显请求,并接受ICMP回显应答
2、读写特殊的IPv4数据报。大多数内核处理值为1(ICMP),2(IGMP)、6(TCP)和17(UDP)的数据报。协议字段还可能为其他值
3、使用IP_HDRINCL套接口选项可以构造自己的IPv4头部。
原始套接口的创建
一般分为以下几步
1、第二个参数为SOCK_RAW,调用 socket函数创建一个原始套接口。第三个参数一般不为0。protocol可以为IPPROTO_ICMP或者 IPPROTO_IGMP。只有超级用户才有权创建原始套接字
2、可以设置 IP_HDRINCL套接口选项,如
const int on = 1;
setsockopt(sockfd, IPPROTO_IP, IP_HDRINCL, &on, sizeof(on));
3、可以对原始套接品调用 bind函数,但是不常用 。这个仅用来设置 本地地址,对于端口号没有意义
4、可以在原始套接口上调用 connect函数,也不常用 ,只是设置 目地地址,对于端口也没有意义。调用 connect后,因为指定了目的地址,可以调用 write 或send,而不是sendto
原始套接口输出
输出有以下规则
1、普通输出调用 sendto或sendmsg并指定目的IP地址来完成。如果套接口已连接,可以用write,writev或send
2、如果IP_HDRINCL没有设置 ,写的数据起始地址为IP头部后的第一个字节,其中头部协议字段填写为socket调用时的第三个参数
3、如果IP_HDRINCL已经设置,写的数据起始地址为IP头部的第一个字节,用户提供的数据大小值必须包括头部的字节数。此进进程除了标识字段和检验和字段外,其它可能由进程来设置 。检验和是由内核计算填充的
4、对于 超出外出接口MTU的分组,内核将其分片。
原始套接口输入
接收到的以下分组哪些会及不会传递给原始套接口,有以下规则
1、TCP和UDP分组不会传递给原始套接口
2、当内核处理完ICMP消息后,绝大部分 ICMP分组会传递给原始套接口
3、内核处理完IGMP消息后,所有IGMP分组都将传递给原始套接口
4、内核不能识别的协议字段的IP数据报都将传递给原始套接口。内核对这些分组唯一做的就是检验IP头部的某些字段:IP版本,IPv4头部检验和,头部长度及目的IP地址
5、如果数据以片段形式到达,则该分组将原所有片段到达重组后传给原始套接口
下面是用原始套接口中写的类似Ping程序
#include <sys/types.h> #include <sys/socket.h> #include <netinet/in.h> #include <stdlib.h> #include <errno.h> #include <string.h> #include <arpa/inet.h> #include <netinet/in_systm.h> #include <netinet/ip.h> #include <netinet/ip_icmp.h> #include <netdb.h> #include <signal.h> #include <sys/time.h> #include <stdio.h> #define BUFSIZE 1500 char recvbuf[BUFSIZE]; char sendbuf[BUFSIZE]; //int datalen; char *host; int nsent; pid_t pid; int sockfd; int verbose; void proc_v4(char *, ssize_t, struct timeval *); //void proc_v6(char *, ssize_t, struct timeval *); void send_v4(); //void send_v6(); void readloop(); void sig_alrm(int); void tv_sub(struct timeval*, struct timeval*); struct proto { void (*fproc)(char *, ssize_t, struct timeval*); void (*fsend)(void); struct sockaddr *sasend; struct sockaddr *sarecv; socklen_t salen; int icmpproto; }*pr; char *sock_ntop(struct sockaddr *sa, socklen_t len) { char portstr[7]; static char str[128]; switch (sa->sa_family) { case AF_INET: { struct sockaddr_in *sin = (struct sockaddr_in*)sa; if (inet_ntop(AF_INET, &sin->sin_addr, str, sizeof(str)) == NULL) return NULL; if (ntohs(sin->sin_port) != 0) { snprintf(portstr, sizeof(portstr), "port=%d", ntohs(sin->sin_port)); strcat(str, portstr); } return str; } } } struct addrinfo *host_serv(const char *host, const char *serv, int family, int socktype) { struct addrinfo hints, *res; int n; bzero(&hints, sizeof(hints)); hints.ai_flags = AI_CANONNAME; hints.ai_family = family; hints.ai_socktype = socktype; if ((n = getaddrinfo(host, serv, &hints, &res)) != 0) return NULL; return res; } struct proto proto_v4 = {proc_v4, send_v4, NULL, NULL, 0, IPPROTO_ICMP}; int datalen = 56; int main(int argc, char **argv) { int c; struct addrinfo *ai; pid = getpid(); signal(SIGALRM, sig_alrm); ai = host_serv(argv[1], NULL, 0, 0); pr = &proto_v4; printf("ICMP_ECHO=%d\n", ICMP_ECHO); pr->sasend = ai->ai_addr; pr->sarecv = calloc(1, ai->ai_addrlen); pr->salen = ai->ai_addrlen; readloop(); exit(0); } void readloop(void) { int size; char recvbuf[BUFSIZE]; socklen_t len; ssize_t n; struct timeval tval; sockfd = socket(pr->sasend->sa_family, SOCK_RAW, pr->icmpproto); setuid(getuid()); size = 60 * 1024; setsockopt(sockfd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size)); sig_alrm(SIGALRM); for (;;) { len = pr->salen; n = recvfrom(sockfd, recvbuf, sizeof(recvbuf), 0, pr->sarecv, &len); if (n < 0) { if (errno == EINTR) continue; else { printf("recvfrom error:%s\n", strerror(errno)); return; } } gettimeofday(&tval, NULL); (*pr->fproc)(recvbuf, n, &tval); } } void tv_sub(struct timeval* out, struct timeval *in) { if ((out->tv_usec -= in->tv_usec) < 0) { --out->tv_sec; out->tv_usec += 1000000; } out->tv_sec -= in->tv_sec; } void proc_v4(char *ptr, ssize_t len, struct timeval *tvrecv) { int hlen1, icmplen; double rtt; struct ip *ip; struct icmp *icmp; struct timeval *tvsend; ip = (struct ip*)ptr; hlen1 = ip->ip_hl << 2; icmp = (struct icmp*)(ptr + hlen1); if ((icmplen = len - hlen1) < 8) { fprintf(stderr, "icmp len error\n"); return; } if (icmp->icmp_type == ICMP_ECHOREPLY) { if (icmp->icmp_id != pid) return; if (icmplen < 16) { fprintf(stderr, "icmplen (%d) < 16\n", icmplen); return; } tvsend = (struct timeval *)icmp->icmp_data; tv_sub(tvrecv, tvsend); rtt = tvrecv->tv_sec * 1000 + tvrecv->tv_usec / 1000; printf("%d bytes from %s:seq=%u, ttl=%d, rtt=%.3f ms\n", icmplen, sock_ntop(pr->sarecv, pr->salen), icmp->icmp_seq, ip->ip_ttl, rtt); } else if (verbose) { printf("%d bytes from %s:type=%d, code=%d\n", icmplen, sock_ntop(pr->sarecv, pr->salen), icmp->icmp_type, icmp->icmp_code); } } void sig_alrm(int signo) { (*pr->fsend)(); alarm(1); return; } unsigned short in_cksum(unsigned short *addr, int len) { int nleft = len; int sum = 0; unsigned short *w = addr; unsigned short answer = 0; while (nleft > 1) { sum += *w++; nleft -= 2; } if (nleft == 1) { *(unsigned char *)(&answer) = *(unsigned char *)w; sum += answer; } sum = (sum >> 16) + (sum & 0xffff); sum += (sum >> 16); answer = ~sum; return answer; } void send_v4() { int len; struct icmp *icmp; icmp = (struct icmp *)sendbuf; icmp->icmp_type = ICMP_ECHO; icmp->icmp_code = 0; icmp->icmp_id = pid; icmp->icmp_seq = nsent++; gettimeofday((struct timeval*)icmp->icmp_data, NULL); len = 8 + datalen; icmp->icmp_cksum = 0; icmp->icmp_cksum = in_cksum((unsigned short*)icmp, len); sendto(sockfd, sendbuf, len, 0, pr->sasend, pr->salen); }