ping源码解析

ping的源码解析

1、下载Ubuntu的ping源码

  1. 查看ping的源码在哪个包下

  2. 下载源码包:apt-get source iputils-ping
    • 如果出现了"You must put some 'source' URIs in your sources.list"错误,需要先在系统设置->Software&Updates里把Source code选项打开
    • 下载的源代码在当前的文件夹下

2、编译/执行ping

  1. 在源码文件夹iputils-20121221下执行make ping

    如果出现了"fatal error: sys/capability.h: No such file or directory
    compilation terminated"错误,则执行"apt-get install libcap-dev"

  2. 得到编译好的ping.o和ping的可执行文件。测试是否能用。

ping源码解析_第1张图片

3、源码分析

  1. 查看makefile可以看到,ping主要是由ping_common.h、ping_common.c、ping.c编译得来的。
  2. 查看ping.c里包含的头文件,包括netinet/ip.h、netinet/ip_icmp.h、ifaddrs.h。在/usr/include下把这些头文件也给找到。
    • find指令:find /usr/include -name ip.h
    • 整体结构图
      ping源码解析_第2张图片

4、main函数

先从最顶层开始看,且由于ping的选项很多,关注顶层时不要死抓住各种选项的设置不放,先通过研究ping的最基本用法【ping 地址】来理清主要框架。

// 全局变量和结构声明
struct sockaddr_in whereto; /* who to ping */
int optlen = 0;
int settos = 0;         /* Set TOS, Precendence or other QOS options */
int icmp_sock;          /* socket file descriptor */
u_char outpack[0x10000];
int maxpacket = sizeof(outpack);
struct sockaddr_in source;

// sockaddr_in定义在中,这里把它拿过来
struct sockaddr_in {
    sa_family_t    sin_family;         //地址族
    uint16_t        sin_port;            // 16位TCP/UDP端口号
    struct in_addr    sin_addr;     //32位IP地址
    char             sin_zero[8];       //不使用
}
int main(int argc, char **argv)
{
    struct hostent *hp;
    int ch, hold, packlen;
    int socket_errno;
    u_char *packet;
    char *target;
//把预编译的东西折叠不看
#ifdef USE_IDN...
#else...
#endif

    char rspace[3 + 4 * NROUTES + 1];   /* record route space */
    limit_capabilities();

#ifdef USE_IDN...
#endif
        enable_capability_raw();
    //创建icmp套接字
    icmp_sock = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP);
    socket_errno = errno;
    disable_capability_raw();
    source.sin_family = AF_INET;
    preload = 1;

    //根据ping的选项来设置
    while ((ch = getopt(argc, argv, COMMON_OPTSTR "bRT:")) != EOF) {
        switch(ch) {
        case 'b':...
        case 'Q':...
        case 'R':...
        case 'T':...
        case 'I':...
        case 'M':...
        case 'V':
            printf("ping utility, iputils-%s\n", SNAPSHOT);
            exit(0);
        //如果是一般选项,则交给common_option函数处理
        COMMON_OPTIONS
            common_options(ch);
            break;
        default:
            usage();
        }
    }
    argc -= optind; //处理完选项参数,参数个数减少
    argv += optind; //为获取下个参数,参数指针增加
        //如果后面没参数了,打印用法(usage)
    if (argc == 0)
        usage();
    //参数个数大于1,根据选项进行设置,跳过
    if (argc > 1) {...
    }
        // 定义目标
    while (argc > 0) {
        //获取输入的目标地址
        target = *argv;
        memset((char *)&whereto, 0, sizeof(whereto));
        whereto.sin_family = AF_INET;
        if (inet_aton(target, &whereto.sin_addr) == 1) {
            hostname = target;
            if (argc == 1)
                options |= F_NUMERIC;
        } else {
            char *idn;
            //通过域名获取ip地址
            hp = gethostbyname2(idn, AF_INET);
            if (!hp) {
                fprintf(stderr, "ping: unknown host %s\n", target);
                exit(2);
            }
                        hostname = hnamebuf;
        }
        if (argc > 1)
            route[nroute++] = whereto.sin_addr.s_addr;
        argc--;
        argv++;
    }
    // 判断是否是广播地址,并对目标IP尝试连接
    if (source.sin_addr.s_addr == 0) {
    socklen_t alen;
    struct sockaddr_in dst = whereto;
        int probe_fd = socket(AF_INET, SOCK_DGRAM, 0);
    if (probe_fd < 0) {
        perror("socket");
        exit(2);
        }
        close(probe_fd);
    } while (0);

     // 如果目标ip地址为0,则赋值为127.0.0.1,本地回环地址
    if (whereto.sin_addr.s_addr == 0)
    whereto.sin_addr.s_addr = source.sin_addr.s_addr;
    // icmp套接字创建失败
    if (icmp_sock < 0) {
        errno = socket_errno;
        perror("ping: icmp open socket");
        exit(2);
    }
    // -I选择的设置,跳过
    if (device) {....
    }
    // -b选项的设置,跳过
    if (broadcast_pings || IN_MULTICAST(ntohl(whereto.sin_addr.s_addr))) {...
    }
    // -M选项的设置,跳过
    if (pmtudisc >= 0) {...
    }
    // -I选项的设置,跳过
    if ((options&F_STRICTSOURCE) && bind(icmp_sock, (struct sockaddr*)&source, sizeof(source)) == -1) {
    }
    // 其他一些选项的设置,跳过
    ...
    
    // 设置套接字接受和发送缓冲区的大小
    hold = datalen + 8;
    hold += ((hold+511)/512)*(optlen + 20 + 16 + 64 + 160);
    sock_setbufs(icmp_sock, hold);
    
    // 以下就是ping的过程中我们能看到的打印信息了
    // 如 PING baidu.com (39.156.69.79)
    printf("PING %s (%s) ", hostname, inet_ntoa(whereto.sin_addr));
    if (device || (options&F_STRICTSOURCE))
    printf("from %s %s: ", inet_ntoa(source.sin_addr), device ?: "");
    // 56(84) bytes of data.
    // datalen默认是(64-8)= 56
    printf("%d(%d) bytes of data.\n", datalen, datalen+8+optlen+20);
    
    // 重点1
    setup(icmp_sock);
    // 重点2
    main_loop(icmp_sock, packet, packlen);
}

5、setup函数

void setup(int icmp_sock)
{
    ....
    //以上为选项设置 跳过
    //获取进程ID,识别包要用
    ident = htons(getpid() & 0xFFFF);
    
    set_signal(SIGINT, sigexit);
    set_signal(SIGALRM, sigexit);
    set_signal(SIGQUIT, sigstatus);

    sigemptyset(&sset);
    sigprocmask(SIG_SETMASK, &sset, NULL);

    gettimeofday(&start_time, NULL);

    if (deadline) {
    struct itimerval it;

    it.it_interval.tv_sec = 0;
    it.it_interval.tv_usec = 0;
    it.it_value.tv_sec = deadline;
    it.it_value.tv_usec = 0;
    setitimer(ITIMER_REAL, &it, NULL);
    }

    if (isatty(STDOUT_FILENO)) {
    struct winsize w;

    if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) != -1) {
        if (w.ws_col > 0)
            screen_width = w.ws_col;
    }
    }
}

6、main_loop函数

main_loop里面是个死循环,根据时间间隔发包->收包->解析包->发包...

void main_loop(int icmp_sock, __u8 *packet, int packlen)
{
    char addrbuf[128];
    char ans_data[4096];
    struct iovec iov;
    struct msghdr msg;
    struct cmsghdr *c;
    int cc;
    int next;
    int polling;

    iov.iov_base = (char *)packet;

    for (;;) {
        /* 检查退出情况 */
        if (exiting)
            break;
        if (npackets && nreceived + nerrors >= npackets)
            break;
        if (deadline && nerrors)
            break;
        /* 检查状态并作出回应。status_snapshot在setup时被修改过
                 * 丢包、拒收或超时等情况 */
        if (status_snapshot)
            status();

        /* 发包 */
        do {
            next = pinger();
            next = schedule_exit(next);
        } while (next <= 0);

        /* "next"如果为正,则为发下个probe的时间
         * 如果 next<=0 则表示现在尽快发出 */

        /* Technical part. Looks wicked. Could be dropped,
         * if everyone used the newest kernel. :-)
         * Its purpose is:
         * 1. Provide intervals less than resolution of scheduler.
         *    Solution: spinning.
         * 2. Avoid use of poll(), when recvmsg() can provide
         *    timed waiting (SO_RCVTIMEO). */
        polling = 0;
                // 控制发送时间间隔
        if ((options & (F_ADAPTIVE|F_FLOOD_POLL)) || nextcmsg_level != SOL_SOCKET ||
                        c->cmsg_type != SO_TIMESTAMP)
                        continue;
                    if (c->cmsg_len < CMSG_LEN(sizeof(struct timeval)))
                        continue;
                    recv_timep = (struct timeval*)CMSG_DATA(c);
                }
#endif

                if ((options&F_LATENCY) || recv_timep == NULL) {
                    if ((options&F_LATENCY) ||
                        ioctl(icmp_sock, SIOCGSTAMP, &recv_time))
                        gettimeofday(&recv_time, NULL);
                    recv_timep = &recv_time;
                }
                                // 解析收到的包
                not_ours = parse_reply(&msg, cc, addrbuf, recv_timep);
            }

            /* See? ... someone runs another ping on this host. */
            if (not_ours)
                install_filter();

            /* If nothing is in flight, "break" returns us to pinger. */
            if (in_flight() == 0)
                break;

            /* Otherwise, try to recvmsg() again. recvmsg()
             * is nonblocking after the first iteration, so that
             * if nothing is queued, it will receive EAGAIN
             * and return to pinger. */
        }
    }
    finish();
}

6、pinger函数

组成并传送一个ICMP ECHO请求包。
ID是UNIX进程的ID,sequence number是一个递增的整数。
data段的头8个字节装UNIX的时间戳,用来计算往返时间。
具体装包发包是由send_probe函数实现的。

int pinger(void)
{
    static int oom_count;
    static int tokens;
    int i;

    /* 如果发够了就随机返回一个正数 */
    if (exiting || (npackets && ntransmitted >= npackets && !deadline))
        return 1000;

    /* Check that packets < rate*time + preload */
    if (cur_time.tv_sec == 0) {
        gettimeofday(&cur_time, NULL);
        tokens = interval*(preload-1);
    } else {
        long ntokens;
        struct timeval tv;

        gettimeofday(&tv, NULL);
        ntokens = (tv.tv_sec - cur_time.tv_sec)*1000 +
            (tv.tv_usec-cur_time.tv_usec)/1000;
        if (!interval) {
            /* Case of unlimited flood is special;
             * if we see no reply, they are limited to 100pps */
            if (ntokens < MININTERVAL && in_flight() >= preload)
                return MININTERVAL-ntokens;
        }
        ntokens += tokens;
        if (ntokens > interval*preload)
            ntokens = interval*preload;
        if (ntokens < interval)
            return interval - ntokens;

        cur_time = tv;
        tokens = ntokens - interval;
    }

    if (options & F_OUTSTANDING) {
        if (ntransmitted > 0 && !rcvd_test(ntransmitted)) {
            print_timestamp();
            printf("no answer yet for icmp_seq=%lu\n", (ntransmitted % MAX_DUP_CHK));
            fflush(stdout);
        }
    }

resend:
    i = send_probe();    // 处理工作做完,发包
        // 发送成功
    if (i == 0) {
        oom_count = 0;
        advance_ntransmitted();
        if (!(options & F_QUIET) && (options & F_FLOOD)) {
            /* Very silly, but without this output with
             * high preload or pipe size is very confusing. */
            if ((preload < screen_width && pipesize < screen_width) ||
                in_flight() < screen_width)
                write_stdout(".", 1);
        }
        return interval - tokens;
    }

    /* 发送失败,处理各种错误 */
    if (i > 0) {
        /* Apparently, it is some fatal bug. */
        abort();
    } else if (errno == ENOBUFS || errno == ENOMEM) {
        int nores_interval;

        /* Device queue overflow or OOM. Packet is not sent. */
        tokens = 0;
        /* Slowdown. This works only in adaptive mode (option -A) */
        rtt_addend += (rtt < 8*50000 ? rtt/8 : 50000);
        if (options&F_ADAPTIVE)
            update_interval();
        nores_interval = SCHINT(interval/2);
        if (nores_interval > 500)
            nores_interval = 500;
        oom_count++;
        if (oom_count*nores_interval < lingertime)
            return nores_interval;
        i = 0;
        /* Fall to hard error. It is to avoid complete deadlock
         * on stuck output device even when dealine was not requested.
         * Expected timings are screwed up in any case, but we will
         * exit some day. :-) */
    } else if (errno == EAGAIN) {
        /* Socket buffer is full. */
        tokens += interval;
        return MININTERVAL;
    } else {
        if ((i=receive_error_msg()) > 0) {
            /* An ICMP error arrived. */
            tokens += interval;
            return MININTERVAL;
        }
        /* Compatibility with old linuces. */
        if (i == 0 && confirm_flag && errno == EINVAL) {
            confirm_flag = 0;
            errno = 0;
        }
        if (!errno)
            goto resend;
    }

    /* Hard local error. Pretend we sent packet. */
    advance_ntransmitted();

    if (i == 0 && !(options & F_QUIET)) {
        if (options & F_FLOOD)
            write_stdout("E", 1);
        else
            perror("ping: sendmsg");
    }
    tokens = 0;
    return SCHINT(interval);
}

7、send_probe函数

int send_probe()
{
        // ICMP报文头部
    struct icmphdr *icp;
    int cc;
    int i;

    icp = (struct icmphdr *)outpack;
    icp->type = ICMP_ECHO;    // 请求类型:request
    icp->code = 0;
    icp->checksum = 0;            //校验和
    icp->un.echo.sequence = htons(ntransmitted+1);  //
    icp->un.echo.id = ident;     // 进程ID

    rcvd_clear(ntransmitted+1);

    if (timing) {
        if (options&F_LATENCY) {
            struct timeval tmp_tv;
            gettimeofday(&tmp_tv, NULL);
            memcpy(icp+1, &tmp_tv, sizeof(tmp_tv));
        } else {
            memset(icp+1, 0, sizeof(struct timeval));
        }
    }

    cc = datalen + 8;           /* skips ICMP portion */

    /* compute ICMP checksum here */
    icp->checksum = in_cksum((u_short *)icp, cc, 0);

    if (timing && !(options&F_LATENCY)) {
        struct timeval tmp_tv;
        gettimeofday(&tmp_tv, NULL);
        memcpy(icp+1, &tmp_tv, sizeof(tmp_tv));
        icp->checksum = in_cksum((u_short *)&tmp_tv, sizeof(tmp_tv), ~icp->checksum);
    }
        
        // 发出去之前得把包封装成msghdr类型
    do {
        static struct iovec iov = {outpack, 0};
        static struct msghdr m = { &whereto, sizeof(whereto),
                           &iov, 1, &cmsg, 0, 0 };
        m.msg_controllen = cmsg_len;
        iov.iov_len = cc;

        i = sendmsg(icmp_sock, &m, confirm);  // 在这里发包
        confirm = 0;
    } while (0);

    return (cc == i ? 0 : i);
}

8、parse_reply函数

打印收到的ICMP包,就是一个拆包的过程

int
parse_reply(struct msghdr *msg, int cc, void *addr, struct timeval *tv)
{
    struct sockaddr_in *from = addr;
    __u8 *buf = msg->msg_iov->iov_base;
    struct icmphdr *icp;
    struct iphdr *ip;
    int hlen;
    int csfailed;

    /* 检查IP包头部 */
    ip = (struct iphdr *)buf;
    hlen = ip->ihl*4; // IP包头长度
    if (cc < hlen + 8 || ip->ihl < 5) {
        if (options & F_VERBOSE)
            fprintf(stderr, "ping: packet too short (%d bytes) from %s\n", cc,
                pr_addr(from->sin_addr.s_addr));
        return 1;
    }

    /* ICMP包部分 */
    cc -= hlen;
        //指针往右走hlen就是ICMP包的起始
    icp = (struct icmphdr *)(buf + hlen);    
        //检查校验和
    csfailed = in_cksum((u_short *)icp, cc, 0);
        // 收到ICMP包如果是回应类型
    if (icp->type == ICMP_ECHOREPLY) {
                // 先对比进程ID,确定是自己要的包
        if (icp->un.echo.id != ident)
            return 1;           /* 'Twas not our ECHO */
                // 计算来回的时间
        if (gather_statistics((__u8*)icp, sizeof(*icp), cc,
                      ntohs(icp->un.echo.sequence),
                      ip->ttl, 0, tv, pr_addr(from->sin_addr.s_addr),
                      pr_echo_reply)) {
            fflush(stdout);
            return 0;    // 回到main_loop
        }
    } else {
                /* 当收到一个重定向或源抑制包时进入
        /* We fall here when a redirect or source quench arrived.
         * Also this branch processes icmp errors, when IP_RECVERR
         * is broken. */

        switch (icp->type) {
        case ICMP_ECHO:    // 收到一个ICMP请求包
            /* MUST NOT */
            return 1;
        case ICMP_SOURCE_QUENCH:    // 源抑制
        case ICMP_REDIRECT:                   // 重定向
        case ICMP_DEST_UNREACH:        // 目标不可达
        case ICMP_TIME_EXCEEDED:       // 超时
        case ICMP_PARAMETERPROB:    // 参数错误   
            {
                struct iphdr * iph = (struct  iphdr *)(&icp[1]);
                struct icmphdr *icp1 = (struct icmphdr*)((unsigned char *)iph + iph->ihl*4);
                int error_pkt;
                if (cc < 8+sizeof(struct iphdr)+8 ||
                    cc < 8+iph->ihl*4+8)
                    return 1;
                if (icp1->type != ICMP_ECHO ||
                    iph->daddr != whereto.sin_addr.s_addr ||
                    icp1->un.echo.id != ident)
                    return 1;
                error_pkt = (icp->type != ICMP_REDIRECT &&
                         icp->type != ICMP_SOURCE_QUENCH);
                if (error_pkt) {
                    acknowledge(ntohs(icp1->un.echo.sequence));
                    return 0;
                }
                nerrors+=error_pkt;
            // 根据选项操作
                ...        
                }
    return 0;
}

9、常用网络编程函数:

  1. socket函数:用来创建套接字
    • 函数原型 int socket(int domain, int type, int protocol);
    • domain表示套接字要使用的协议簇
      • AF_UNIX(本机通信)
      • AF_INET(TCP/IP – IPv4)
      • AF_INET6(TCP/IP – IPv6)
    • type表示套接字类型
      • SOCK_STREAM(TCP流)
      • SOCK_DGRAM(UDP数据报)
      • SOCK_RAW(原始套接字)
    • protocol用来确定协议种类,一般为0
  2. htons函数:将端口号由主机字节序转换为网络字节序的整数值
    • 如 mysock.sin_port = htons(80)
  3. inet_addr函数:将一个IP字符串转化为一个网络字节序的整数值
    • 如 mysock.sin_addr.s_addr = inet_addr("192.168.1.0")
  4. recvmsg sendmsg函数
    • ssize_t recvmsg ( int sockfd , struct msghdr * msg , int flags )
    • ssize_t sendmsg ( int sockfd , struct msghdr * msg , int flags ) ;
    • sockfd - 套接字描述符
    • msg - 消息头部
    • flags - 套接口设置标识
  5. msghdr结构体
struct msghdr  {
    void  * msg_name ;   / *  消息的协议地址  * /
    socklen_t msg_namelen ;   / *  地址的长度  * /
    struct iovec  * msg_iov ;   / *  多io缓冲区的地址  * /
    int  msg_iovlen ;   / *  缓冲区的个数  * /
    void  * msg_control ;   / *  辅助数据的地址  * /
    socklen_t msg_controllen ;   / *  辅助数据的长度  * /
    int  msg_flags ;   / *  接收消息的标识  * /
} ;

10、感想心得

  1. 每个命令都有很多可选参数,第一遍读源码时去深究每个参数的功能实现是很难的,很容易陷入层层递进的函数,最后放弃。应该先把实现框架搞懂。
  2. 主要功能的实现依靠icmp包的封装,icmp包的解析,sendmsg和recvmsg两个函数。复杂之处在于
    • 各个参数的不同设置
    • 发包时间间隔的设置,涉及进程信号处理
  3. 整理一下parse_reply即收到包的解析
    • 首先解析IP包的头部,解析完指针往前走
    • 读取ICMP包的type段、code段判断是否是reply包
    • 检查校验和
    • 比较标识符(进程ID)
    • 根据时间戳计算来回时间
  4. 今后还需要加深对UNIX网络编程的学习。由于很多定义和函数用法不熟悉,浪费了很多时间。

你可能感兴趣的:(ping源码解析)