原文地址:http://www.189works.com/article-49101-1.html
今天我们来看看一个小例子,利用前面所学到的WinPcap编程知识来实现一个简单的还原HTTP协议的程序。相信大家对于HTTP协议一定不会陌生,我这里只简单地说一下它的报文格式,即HTTP报文有两种:请求报文和响应报文。为了让大家对于这两种报文有更直观的认识,给大家看两个简单的例子:
下面是一个典型的HTTP请求报文:
GET /somedir/page.html HTTP/1.1
Host: www.someschool.edu
Connection: close
User-agent: Mozilla/4.0
Accept-language: fr
再看一个HTTP响应报文:
HTTP/1.1 200 OK
Connection: close
Date: Thu, 03 Jul 2003 12:00:15 GMT
Server: Apache/1.3.0 (Unix)
Last-Modified: Sun, 6 May 2007 09:23:24 GMT
Content-Length: 6821
Content-Type: text/html
(data data data data data ...)
我们注意到HTTP请求报文中的第一行是以GET打头的,没错,它实际上是HTTP请求的一种方法,类似的还有POST、HEAD等等。一般熟知的大概就是GET和POST了,像Servlet编程中就有doGet和doPost两种提交HTTP请求的方法。而对于HTTP响应报文而言,第一行开头是协议的版本号,如HTTP/1.1,现在普及的也是HTTP/1.1。利用这些我们可以来判断TCP数据报文里是否保存的HTTP数据。
本程序的实现思路有很多种,我采用的是一种最笨拙的方式,即按照 判断是否是IP数据包->判断是否是TCP分组->判断是否是HTTP报文 的逻辑,最后将HTTP报文的内容打印出来。程序开始前我们需要先定义一些重要协议的包格式,因为WinPcap并没有为我们定义这些东西。
View Code
/*
* define struct of ethernet header , ip address , ip header and tcp header
*/
/* ethernet header */
typedef struct ether_header {
u_char ether_shost[ETHER_ADDR_LEN]; /* source ethernet address, 8 bytes */
u_char ether_dhost[ETHER_ADDR_LEN]; /* destination ethernet addresss, 8 bytes */
u_short ether_type; /* ethernet type, 16 bytes */
}ether_header;
/* four bytes ip address */
typedef struct ip_address {
u_char byte1;
u_char byte2;
u_char byte3;
u_char byte4;
}ip_address;
/* ipv4 header */
typedef struct ip_header {
u_char ver_ihl; /* version and ip header length */
u_char tos; /* type of service */
u_short tlen; /* total length */
u_short identification; /* identification */
u_short flags_fo; // flags and fragment offset
u_char ttl; /* time to live */
u_char proto; /* protocol */
u_short crc; /* header checksum */
ip_address saddr; /* source address */
ip_address daddr; /* destination address */
u_int op_pad; /* option and padding */
}ip_header;
/* tcp header */
typedef struct tcp_header {
u_short th_sport; /* source port */
u_short th_dport; /* destination port */
u_int th_seq; /* sequence number */
u_int th_ack; /* acknowledgement number */
u_short th_len_resv_code; /* datagram length and reserved code */
u_short th_window; /* window */
u_short th_sum; /* checksum */
u_short th_urp; /* urgent pointer */
}tcp_header;
还有一些重要的识别协议的类型,我们需要自己在代码中进行定义。
#define ETHERTYPE_IP 0x0800 /* ip protocol */
#define TCP_PROTOCAL 0x0600 /* tcp protocol */
接下来再看看刚才所说的程序的逻辑是如何实现的。
1. 判断是否是IP数据包。我们先回顾一下,在RFC 894中定义了以太网的封装格式,由目的地址(6字节)、源地址(6字节)、类型(2字节)、数据以及CRC(4字节)构成。我们只需要关注头部中类型这个字段,当它为0x0800时,表示数据保存的是IP数据报;当它为0x0806时,表示数据保存的是ARP请求/应答;当为0x8035时,数据保存的是RARP请求/应答。所以通过比较它的类型是否为0x0800,从而可以到达目的。
2. 判断是否是TCP分组。跟上面类似,可以通过判断IP首部中协议字段是否为0x0600即可。
3. 判断是否是HTTP报文。根据上面所讲解的HTTP报文格式,我们只需要判断开头是否为"GET"、"POST"、"HTTP/1.1"就可以做到了。具体程序是如何来判断的我们来看看代码吧!
View Code
/* capture packet */
while((res = pcap_next_ex(adhandle, &pheader, &pkt_data)) >= 0) {
if(res == 0)
continue; /* read time out*/
ether_header * eheader = (ether_header*)pkt_data; /* transform packet data to ethernet header */
if(eheader->ether_type == htons(ETHERTYPE_IP)) { /* ip packet only */
ip_header * ih = (ip_header*)(pkt_data+14); /* get ip header */
if(ih->proto == htons(TCP_PROTOCAL)) { /* tcp packet only */
int ip_len = ntohs(ih->tlen); /* get ip length, it contains header and body */
int find_http = false;
char* ip_pkt_data = http://www.cnblogs.com/blacksword/archive/2012/03/22/(char*)ih;
int n = 0;
char buffer[BUFFER_MAX_LENGTH];
int bufsize = 0;
for(; n<ip_len; n++)
{
/* http get or post request */
if(!find_http && ((n+3<ip_len && strncmp(ip_pkt_data+n,"GET",strlen("GET")) ==0 )
|| (n+4<ip_len && strncmp(ip_pkt_data+n,"POST",strlen("POST")) == 0)) )
find_http = true;
/* http response */
if(!find_http && i+8<ip_len && strncmp(ip_pkt_data+i,"HTTP/1.1",strlen("HTTP/1.1"))==0)
find_http = true;
/* if http is found */
if(find_http)
{
buffer[bufsize] = ip_pkt_data[n]; /* copy http data to buffer */
bufsize ++;
}
}
/* print http content */
if(find_http) {
buffer[bufsize] = '\0';
printf("%s\n", buffer);
printf("\n**********************************************\n\n");
}
}
}
}
看一下运行后的截图:
这里需要注意的是,程序本质上还没有完全还原HTTP协议的功能,对于HTTP请求数据和响应数据进行解析,真正的应该可以通过Content-Type分析数据格式,并按照相应的解析方式进行解码,还有对于中文字符的处理等等~~最后将整个程序的源码贴出来,有任何意见或建议的可以随意吐槽,虚心接受。ps: 注释写得不全请见谅~~
pheader.h头文件
View Code
#ifndef PHEADER_H_INCLUDED
#define PHEADER_H_INCLUDED
/*
*
*/
#define ETHER_ADDR_LEN 6 /* ethernet address */
#define ETHERTYPE_IP 0x0800 /* ip protocol */
#define TCP_PROTOCAL 0x0600 /* tcp protocol */
#define BUFFER_MAX_LENGTH 65536 /* buffer max length */
#define true 1 /* define true */
#define false 0 /* define false */
/*
* define struct of ethernet header , ip address , ip header and tcp header
*/
/* ethernet header */
typedef struct ether_header {
u_char ether_shost[ETHER_ADDR_LEN]; /* source ethernet address, 8 bytes */
u_char ether_dhost[ETHER_ADDR_LEN]; /* destination ethernet addresss, 8 bytes */
u_short ether_type; /* ethernet type, 16 bytes */
}ether_header;
/* four bytes ip address */
typedef struct ip_address {
u_char byte1;
u_char byte2;
u_char byte3;
u_char byte4;
}ip_address;
/* ipv4 header */
typedef struct ip_header {
u_char ver_ihl; /* version and ip header length */
u_char tos; /* type of service */
u_short tlen; /* total length */
u_short identification; /* identification */
u_short flags_fo; // flags and fragment offset
u_char ttl; /* time to live */
u_char proto; /* protocol */
u_short crc; /* header checksum */
ip_address saddr; /* source address */
ip_address daddr; /* destination address */
u_int op_pad; /* option and padding */
}ip_header;
/* tcp header */
typedef struct tcp_header {
u_short th_sport; /* source port */
u_short th_dport; /* destination port */
u_int th_seq; /* sequence number */
u_int th_ack; /* acknowledgement number */
u_short th_len_resv_code; /* datagram length and reserved code */
u_short th_window; /* window */
u_short th_sum; /* checksum */
u_short th_urp; /* urgent pointer */
}tcp_header;
#endif // PHEADER_H_INCLUDED
main.c文件
#include <stdio.h>
#include <stdlib.h>
#define HAVE_REMOTE
#include <pcap.h>
#include "pheader.h"
/*
* function: a simple program to analyze http
* author: blacksword
* date: Wed March 21 2012
*/
int main()
{
pcap_if_t* alldevs; // list of all devices
pcap_if_t* d; // device you chose
pcap_t* adhandle;
char errbuf[PCAP_ERRBUF_SIZE]; //error buffer
int i=0;
int inum;
struct pcap_pkthdr *pheader; /* packet header */
const u_char * pkt_data; /* packet data */
int res;
/* pcap_findalldevs_ex got something wrong */
if (pcap_findalldevs_ex(PCAP_SRC_IF_STRING, NULL /* auth is not needed*/, &alldevs, errbuf) == -1)
{
fprintf(stderr, "Error in pcap_findalldevs_ex: %s\n", errbuf);
exit(1);
}
/* print the list of all devices */
for(d = alldevs; d != NULL; d = d->next)
{
printf("%d. %s", ++i, d->name); // print device name , which starts with "rpcap://"
if(d->description)
printf(" (%s)\n", d->description); // print device description
else
printf(" (No description available)\n");
}
/* no interface found */
if (i == 0)
{
printf("\nNo interface found! Make sure Winpcap is installed.\n");
return -1;
}
printf("Enter the interface number (1-%d):", i);
scanf("%d", &inum);
if(inum < 1 || inum > i)
{
printf("\nInterface number out of range.\n");
pcap_freealldevs(alldevs);
return -1;
}
for(d=alldevs, i=0; i < inum-1; d=d->next, i++); /* jump to the selected interface */
/* open the selected interface*/
if((adhandle = pcap_open(d->name, /* the interface name */
65536, /* length of packet that has to be retained */
PCAP_OPENFLAG_PROMISCUOUS, /* promiscuous mode */
1000, /* read time out */
NULL, /* auth */
errbuf /* error buffer */
)) == NULL)
{
fprintf(stderr, "\nUnable to open the adapter. %s is not supported by Winpcap\n",
d->description);
return -1;
}
printf("\nListening on %s...\n", d->description);
pcap_freealldevs(alldevs); // release device list
/* capture packet */
while((res = pcap_next_ex(adhandle, &pheader, &pkt_data)) >= 0) {
if(res == 0)
continue; /* read time out*/
ether_header * eheader = (ether_header*)pkt_data; /* transform packet data to ethernet header */
if(eheader->ether_type == htons(ETHERTYPE_IP)) { /* ip packet only */
ip_header * ih = (ip_header*)(pkt_data+14); /* get ip header */
if(ih->proto == htons(TCP_PROTOCAL)) { /* tcp packet only */
int ip_len = ntohs(ih->tlen); /* get ip length, it contains header and body */
int find_http = false;
char* ip_pkt_data = http://www.cnblogs.com/blacksword/archive/2012/03/22/(char*)ih;
int n = 0;
char buffer[BUFFER_MAX_LENGTH];
int bufsize = 0;
for(; n<ip_len; n++)
{
/* http get or post request */
if(!find_http && ((n+3<ip_len && strncmp(ip_pkt_data+n,"GET",strlen("GET")) ==0 )
|| (n+4<ip_len && strncmp(ip_pkt_data+n,"POST",strlen("POST")) == 0)) )
find_http = true;
/* http response */
if(!find_http && i+8<ip_len && strncmp(ip_pkt_data+i,"HTTP/1.1",strlen("HTTP/1.1"))==0)
find_http = true;
/* if http is found */
if(find_http)
{
buffer[bufsize] = ip_pkt_data[n]; /* copy http data to buffer */
bufsize ++;
}
}
/* print http content */
if(find_http) {
buffer[bufsize] = '\0';
printf("%s\n", buffer);
printf("\n**********************************************\n\n");
}
}
}
}
return 0;
}