traceroute允许我们确定IP数据报从本地主机游历到某个远程主机所经过的路径。
我们先来说明tranceroute的工作原理:是IP路由过程中对数据包TTL(Time to Live,存活时间)的处理。当路由器收到一个IP包时,会修改IP包的TTL(及由此造成的头部检测和checksum变化)。每收到一个包,检查这个的TTL是否是0或1.如果是,表明这个包还没有到达目的地,而且剩余时间不多了,肯定是到不了目的地了。这样路由器就简单地丢弃这个包,并给源主机发送ICMP通知,说明这个包已经过时了。ICMP的通知信息里包含当前路由器发送时所用的IP。
这样就可以通过构造数据包,来间接检查到达一个主机时经过了哪些路由器。一开始发送一个TTL为1的包,这样到达第一个路由器的时侯就已经超时了,第一个路由器就发通知说包超时,这样就可以记录下所经过的第一个路由器所的IP。然后TTL加1,安全通过第一个路由器,而第二个路由器的处理与第一个相同,丢包,发通知说包超时了。这样记录下的第二个路由器IP,由此可以一直下去,直到这个数据包到达目标主机,由此打印所有经过的路由器。
在通信中,IP层只负责数据的路由与传输,并不处理数据包的内容。例如ICMP,或TCP,UDP,这些协议是依赖IP层的传输功能来传输数据的。在通信双方的主机中,收到这些协议的数据包后,一般在通信的对应主机上,会有程序来处理这些数据。因此traceroute程序发送一个UDP包来试探。对路由器来说,UDP数据报只是IP数据报的一种,它并不关心UDP数据报的具体内容。直到这个包到达目的端口的主机,目的主机的内核会解析UDP数据报,并查找数据报中要求的端口是否已经有进程在使用。如果找到,则通知进程有数据到达。而如果找不到,则发送一个“目的端口不可达”的ICMP错误数据回到源主机。
这样就可以完全确定下来。trcertroute建立一个UDP数据包,不断修改TTL值并发送出去,如果收到"超时错",表示刚刚到达的是路由器,而如果收到的是"端口不可达"错误,表示刚刚到达的就是目的主机。这样路由跟踪完成,程序结束。
下面给出部分源码(主要针对IPv4):
trace.h:
#include "unp.h"
#include
#include
#include
#include
#define BUFSIZE 1500
struct rec { /* format of outgoing UDP data */
u_short rec_seq; /* sequence number */
u_short rec_ttl; /* TTL packet left with */
struct timeval rec_tv; /* time packet left */
};
/* globals */
char recvbuf[BUFSIZE];
char sendbuf[BUFSIZE];
int datalen; /* # bytes of data following ICMP header */
char *host;
u_short sport, dport;
int nsent; /* add 1 for each sendto() */
pid_t pid; /* our PID */
int probe, nprobes;
int sendfd, recvfd; /* send on UDP sock, read on raw ICMP sock */
int ttl, max_ttl;
int verbose;
/* function prototypes */
const char *icmpcode_v4(int);
int recv_v4(int, struct timeval *);
void sig_alrm(int);
void traceloop(void);
void tv_sub(struct timeval *, struct timeval *);
struct proto {
const char *(*icmpcode)(int);
int (*recv)(int, struct timeval *);
struct sockaddr *sasend; /* sockaddr{} for send, from getaddrinfo */
struct sockaddr *sarecv; /* sockaddr{} for receiving */
struct sockaddr *salast; /* last sockaddr{} for receiving */
struct sockaddr *sabind; /* sockaddr{} for binding source port */
socklen_t salen; /* length of sockaddr{}s */
int icmpproto; /* IPPROTO_xxx value for ICMP */
int ttllevel; /* setsockopt() level to set TTL */
int ttloptname; /* setsockopt() name to set TTL */
} *pr;
#ifdef IPV6
#include
#include
#endif
main.c:
#include "trace.h"
struct proto proto_v4 = { icmpcode_v4, recv_v4, NULL, NULL, NULL, NULL, 0,
IPPROTO_ICMP, IPPROTO_IP, IP_TTL };
int datalen = sizeof(struct rec); /* defaults */
int max_ttl = 30;
int nprobes = 3;
u_short dport = 32768 + 666;
int
main(int argc, char **argv)
{
int c;
struct addrinfo *ai;
char *h;
opterr = 0; /* don't want getopt() writing to stderr */
//对命令行参数的处理
while ( (c = getopt(argc, argv, "m:v")) != -1) {
switch (c) {
case 'm':
if ( (max_ttl = atoi(optarg)) <= 1)
err_quit("invalid -m value");
break;
case 'v':
verbose++;
break;
case '?':
err_quit("unrecognized option: %c", c);
}
}
if (optind != argc-1)
err_quit("usage: traceroute [ -m -v ] ");
host = argv[optind];
pid = getpid();
Signal(SIGALRM, sig_alrm);
//处理目的主机或ip,返回一个指向addrinfo结构体的指针
ai = Host_serv(host, NULL, 0, 0);
h = Sock_ntop_host(ai->ai_addr, ai->ai_addrlen);
printf("traceroute to %s (%s): %d hops max, %d data bytes\n",
ai->ai_canonname ? ai->ai_canonname : h,
h, max_ttl, datalen);
/* initialize according to protocol */
if (ai->ai_family == AF_INET) {
//当协议为IPv4时,初始化协议结构体
pr = &proto_v4;
} else
err_quit("unknown address family %d", ai->ai_family);
pr->sasend = ai->ai_addr; /* contains destination address */
pr->sarecv = Calloc(1, ai->ai_addrlen);
pr->salast = Calloc(1, ai->ai_addrlen);
pr->sabind = Calloc(1, ai->ai_addrlen);
pr->salen = ai->ai_addrlen;
traceloop();
exit(0);
}
traceloop.c:
#include "trace.h"
void
traceloop(void)
{
int seq, code, done;
double rtt;
struct rec *rec;
struct timeval tvrecv;
//创建原始套接字
recvfd = Socket(pr->sasend->sa_family, SOCK_RAW, pr->icmpproto);
setuid(getuid()); /* don't need special permissions anymore */
#ifdef IPV6
if (pr->sasend->sa_family == AF_INET6 && verbose == 0) {
struct icmp6_filter myfilt;
ICMP6_FILTER_SETBLOCKALL(&myfilt);
ICMP6_FILTER_SETPASS(ICMP6_TIME_EXCEEDED, &myfilt);
ICMP6_FILTER_SETPASS(ICMP6_DST_UNREACH, &myfilt);
setsockopt(recvfd, IPPROTO_IPV6, ICMP6_FILTER,
&myfilt, sizeof(myfilt));
}
#endif
//创建数据报套接字
sendfd = Socket(pr->sasend->sa_family, SOCK_DGRAM, 0);
pr->sabind->sa_family = pr->sasend->sa_family;
sport = (getpid() & 0xffff) | 0x8000; /* our source UDP port # */
//设置端口
sock_set_port(pr->sabind, pr->salen, htons(sport));
//监听udp数据报的套接字
Bind(sendfd, pr->sabind, pr->salen);
sig_alrm(SIGALRM);
seq = 0;
done = 0;
for (ttl = 1; ttl <= max_ttl && done == 0; ttl++) {
//进入循环之后每次首先设置生存时间,pr->ttllevel = IPPROTO_IP
//pr->ttloptname = IP_TTL
Setsockopt(sendfd, pr->ttllevel, pr->ttloptname, &ttl, sizeof(int));
//清空pr->salast指向的套接字结构
bzero(pr->salast, pr->salen);
printf("%2d ", ttl);
fflush(stdout);
//nprobes = 3
for (probe = 0; probe < nprobes; probe++) {
rec = (struct rec *) sendbuf;
//设置序列号
rec->rec_seq = ++seq;
//设置跳数
rec->rec_ttl = ttl;
//获取当前时间
Gettimeofday(&rec->rec_tv, NULL);
//设置发送套接字的端口
sock_set_port(pr->sasend, pr->salen, htons(dport + seq));
//发送数据
Sendto(sendfd, sendbuf, datalen, 0, pr->sasend, pr->salen);
if ( (code = (*pr->recv)(seq, &tvrecv)) == -3)
printf(" *"); /* timeout, no reply */
else {
char str[NI_MAXHOST];
//如果发送应答icmp的节点ip地址发生变化,显示应答发送主机的主机名和ip地址
if (sock_cmp_addr(pr->sarecv, pr->salast, pr->salen) != 0) {
if (getnameinfo(pr->sarecv, pr->salen, str, sizeof(str),
NULL, 0, 0) == 0)
printf(" %s (%s)", str,
Sock_ntop_host(pr->sarecv, pr->salen));
else
printf(" %s",
Sock_ntop_host(pr->sarecv, pr->salen));
memcpy(pr->salast, pr->sarecv, pr->salen);
}
//计算往返时间
tv_sub(&tvrecv, &rec->rec_tv);
rtt = tvrecv.tv_sec * 1000.0 + tvrecv.tv_usec / 1000.0;
printf(" %.3f ms", rtt);
//显示ICMP代码值
if (code == -1) /* port unreachable; at destination */
done++;
else if (code >= 0)
printf(" (ICMP %s)", (*pr->icmpcode)(code));
}
fflush(stdout);
}
printf("\n");
}
}
#include "trace.h"
extern int gotalarm;
/*
* Return: -3 on timeout
* -2 on ICMP time exceeded in transit (caller keeps going)
* -1 on ICMP port unreachable (caller is done)
* >= 0 return value is some other ICMP unreachable code
*/
int
recv_v4(int seq, struct timeval *tv)
{
int hlen1, hlen2, icmplen, ret;
socklen_t len;
ssize_t n;
struct ip *ip, *hip;
struct icmp *icmp;
struct udphdr *udp;
gotalarm = 0;
alarm(3);
for ( ; ; ) {
if (gotalarm)
return(-3); /* alarm expired */
len = pr->salen;
//接收数据
n = recvfrom(recvfd, recvbuf, sizeof(recvbuf), 0, pr->sarecv, &len);
if (n < 0) {
if (errno == EINTR)
continue;
else
err_sys("recvfrom error");
}
//获取ip的头
ip = (struct ip *) recvbuf; /* start of IP header */
//获取ip报文的长度
hlen1 = ip->ip_hl << 2; /* length of IP header */
//获取icmp报文的头部
icmp = (struct icmp *) (recvbuf + hlen1); /* start of ICMP header */
if ( (icmplen = n - hlen1) < 8)
continue; /* not enough to look at ICMP header */
if (icmp->icmp_type == ICMP_TIMXCEED &&
icmp->icmp_code == ICMP_TIMXCEED_INTRANS) {
//第一种情况处理ICMP传输中超时错误,“time exceeded in transmit”
if (icmplen < 8 + sizeof(struct ip))
continue; /* not enough data to look at inner IP */
//将hip指向在ICMP消息中返回的IPv4首部,它跟在8字节的ICMP首部之后
hip = (struct ip *) (recvbuf + hlen1 + 8);
hlen2 = hip->ip_hl << 2;
if (icmplen < 8 + hlen2 + 4)
continue; /* not enough data to look at UDP ports */
//udp指向跟在这个IPv4首部之后的UDP首部
udp = (struct udphdr *) (recvbuf + hlen1 + 8 + hlen2);
//如果ICMP返回的错误是由于某个UDP数据报引起的,并且UDP数据报的源端口和目的端口是本进程发送的值
//那么它是某个中间路由器的响应我们的探测分组的一个应答
if (hip->ip_p == IPPROTO_UDP &&
udp->source == htons(sport) &&
udp->dest == htons(dport + seq)) {
ret = -2; /* we hit an intermediate router */
//返回-2
break;
}
} else if (icmp->icmp_type == ICMP_UNREACH) {
//当返回ICMP为端口不可达时
if (icmplen < 8 + sizeof(struct ip))
continue; /* not enough data to look at inner IP */
//将hip指向在ICMP消息中返回的IPv4首部,它跟在8字节的ICMP首部之后
hip = (struct ip *) (recvbuf + hlen1 + 8);
hlen2 = hip->ip_hl << 2;
if (icmplen < 8 + hlen2 + 4)
continue; /* not enough data to look at UDP ports */
//udp指向跟在这个IPv4首部之后的UDP首部
udp = (struct udphdr *) (recvbuf + hlen1 + 8 + hlen2);
if (hip->ip_p == IPPROTO_UDP &&
//如果ICMP返回的错误是由于某个UDP数据报引起的,并且UDP数据报的源端口和目的端口是本进程发送的值
//那么它是某个中间路由器的响应我们的探测分组的一个应答
//
udp->source == htons(sport) &&
udp->dest == htons(dport + seq)) {
if (icmp->icmp_code == ICMP_UNREACH_PORT)
//如果ICMP的代码为"port unreachable"
//返回-1,因为其探测分组已经到达最终目的地
ret = -1; /* have reached destination */
else
//否则返回ICMP代码值
ret = icmp->icmp_code; /* 0, 1, 2, ... */
break;
}
}
if (verbose) {
printf(" (from %s: type = %d, code = %d)\n",
Sock_ntop_host(pr->sarecv, pr->salen),
icmp->icmp_type, icmp->icmp_code);
}
/* Some other ICMP error, recvfrom() again */
}
alarm(0); /* don't leave alarm running */
Gettimeofday(tv, NULL); /* get time of packet arrival */
return(ret);
}
sock_set_port.c:
#include "unp.h"
void
sock_set_port(struct sockaddr *sa, socklen_t salen, int port)
{
switch (sa->sa_family) {
case AF_INET: {
struct sockaddr_in *sin = (struct sockaddr_in *) sa;
sin->sin_port = port;
return;
}
#ifdef IPV6
case AF_INET6: {
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) sa;
sin6->sin6_port = port;
return;
}
#endif
}
return;
}
/* include host_serv */
#include "unp.h"
struct addrinfo *
host_serv(const char *host, const char *serv, int family, int socktype)
{
int n;
struct addrinfo hints, *res;
//清零
bzero(&hints, sizeof(struct addrinfo));
//用于返回主机的规范名称
hints.ai_flags = AI_CANONNAME; /* always return canonical name */
//其值为0代表:协议无关
hints.ai_family = family; /* AF_UNSPEC, AF_INET, AF_INET6, etc. */
hints.ai_socktype = socktype; /* 0, SOCK_STREAM, SOCK_DGRAM, etc. */
if ( (n = getaddrinfo(host, serv, &hints, &res)) != 0)
return(NULL);
return(res); /* return pointer to first on linked list */
}
/* end host_serv */
/*
* There is no easy way to pass back the integer return code from
* getaddrinfo() in the function above, short of adding another argument
* that is a pointer, so the easiest way to provide the wrapper function
* is just to duplicate the simple function as we do here.
*/
struct addrinfo *
Host_serv(const char *host, const char *serv, int family, int socktype)
{
int n;
struct addrinfo hints, *res;
bzero(&hints, sizeof(struct addrinfo));
hints.ai_flags = AI_CANONNAME; /* always return canonical name */
hints.ai_family = family; /* 0, AF_INET, AF_INET6, etc. */
hints.ai_socktype = socktype; /* 0, SOCK_STREAM, SOCK_DGRAM, etc. */
if ( (n = getaddrinfo(host, serv, &hints, &res)) != 0)
err_quit("host_serv error for %s, %s: %s",
(host == NULL) ? "(no hostname)" : host,
(serv == NULL) ? "(no service name)" : serv,
gai_strerror(n));
return(res); /* return pointer to first on linked list */
}
#include "unp.h"
void
sock_set_port(struct sockaddr *sa, socklen_t salen, int port)
{
switch (sa->sa_family) {
case AF_INET: {
struct sockaddr_in *sin = (struct sockaddr_in *) sa;
sin->sin_port = port;
return;
}
#ifdef IPV6
case AF_INET6: {
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) sa;
sin6->sin6_port = port;
return;
}
#endif
}
return;
}
#include "unp.h"
#ifdef HAVE_SOCKADDR_DL_STRUCT
#include
#endif
/* include sock_ntop */
char *
sock_ntop(const struct sockaddr *sa, socklen_t salen)
{
char portstr[8];
static char str[128]; /* Unix domain is largest */
switch (sa->sa_family) {
//当是IPv4协议时
case AF_INET: {
struct sockaddr_in *sin = (struct sockaddr_in *) sa;
//点分十进制与二进制的转化
if (inet_ntop(AF_INET, &sin->sin_addr, str, sizeof(str)) == NULL)
return(NULL);
//将端口的网络字节序转换为主机字节序
if (ntohs(sin->sin_port) != 0) {
snprintf(portstr, sizeof(portstr), ":%d", ntohs(sin->sin_port));
strcat(str, portstr);
}
return(str);
}
/* end sock_ntop */
#ifdef IPV6
case AF_INET6: {
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) sa;
str[0] = '[';
if (inet_ntop(AF_INET6, &sin6->sin6_addr, str + 1, sizeof(str) - 1) == NULL)
return(NULL);
if (ntohs(sin6->sin6_port) != 0) {
snprintf(portstr, sizeof(portstr), "]:%d", ntohs(sin6->sin6_port));
strcat(str, portstr);
return(str);
}
return (str + 1);
}
#endif
#ifdef AF_UNIX
case AF_UNIX: {
struct sockaddr_un *unp = (struct sockaddr_un *) sa;
/* OK to have no pathname bound to the socket: happens on
every connect() unless client calls bind() first. */
if (unp->sun_path[0] == 0)
strcpy(str, "(no pathname bound)");
else
snprintf(str, sizeof(str), "%s", unp->sun_path);
return(str);
}
#endif
#ifdef HAVE_SOCKADDR_DL_STRUCT
case AF_LINK: {
struct sockaddr_dl *sdl = (struct sockaddr_dl *) sa;
if (sdl->sdl_nlen > 0)
snprintf(str, sizeof(str), "%*s (index %d)",
sdl->sdl_nlen, &sdl->sdl_data[0], sdl->sdl_index);
else
snprintf(str, sizeof(str), "AF_LINK, index=%d", sdl->sdl_index);
return(str);
}
#endif
default:
snprintf(str, sizeof(str), "sock_ntop: unknown AF_xxx: %d, len %d",
sa->sa_family, salen);
return(str);
}
return (NULL);
}
char *
Sock_ntop(const struct sockaddr *sa, socklen_t salen)
{
char *ptr;
if ( (ptr = sock_ntop(sa, salen)) == NULL)
err_sys("sock_ntop error"); /* inet_ntop() sets errno */
return(ptr);
}
#include "unp.h"
#ifdef HAVE_SOCKADDR_DL_STRUCT
#include
#endif
int
sock_cmp_addr(const struct sockaddr *sa1, const struct sockaddr *sa2,
socklen_t salen)
{
if (sa1->sa_family != sa2->sa_family)
return(-1);
switch (sa1->sa_family) {
case AF_INET: {
return(memcmp( &((struct sockaddr_in *) sa1)->sin_addr,
&((struct sockaddr_in *) sa2)->sin_addr,
sizeof(struct in_addr)));
}
#ifdef IPV6
case AF_INET6: {
return(memcmp( &((struct sockaddr_in6 *) sa1)->sin6_addr,
&((struct sockaddr_in6 *) sa2)->sin6_addr,
sizeof(struct in6_addr)));
}
#endif
#ifdef AF_UNIX
case AF_UNIX: {
return(strcmp( ((struct sockaddr_un *) sa1)->sun_path,
((struct sockaddr_un *) sa2)->sun_path));
}
#endif
#ifdef HAVE_SOCKADDR_DL_STRUCT
case AF_LINK: {
return(-1); /* no idea what to compare here ? */
}
#endif
}
return (-1);
}
#include "trace.h"
int gotalarm;
void
sig_alrm(int signo)
{
gotalarm = 1; /* set flag to note that alarm occurred */
return; /* and interrupt the recvfrom() */
}
#include "unp.h"
void
tv_sub(struct timeval *out, struct timeval *in)
{
if ( (out->tv_usec -= in->tv_usec) < 0) { /* out -= in */
--out->tv_sec;
out->tv_usec += 1000000;
}
out->tv_sec -= in->tv_sec;
}
#include "trace.h"
const char *
icmpcode_v4(int code)
{
static char errbuf[100];
switch (code) {
case 0: return("network unreachable");
case 1: return("host unreachable");
case 2: return("protocol unreachable");
case 3: return("port unreachable");
case 4: return("fragmentation required but DF bit set");
case 5: return("source route failed");
case 6: return("destination network unknown");
case 7: return("destination host unknown");
case 8: return("source host isolated (obsolete)");
case 9: return("destination network administratively prohibited");
case 10: return("destination host administratively prohibited");
case 11: return("network unreachable for TOS");
case 12: return("host unreachable for TOS");
case 13: return("communication administratively prohibited by filtering");
case 14: return("host recedence violation");
case 15: return("precedence cutoff in effect");
default: sprintf(errbuf, "[unknown code %d]", code);
return errbuf;
}
}
上图分别展示了自己的traceroute程序和系统的traceroute程序的执行结果。。。。