[root@dbrg-2 ~]# curl -I www.baidu.com
HTTP/1.1 200 OK
Date: Wed, 29 Jul 2009 02:51:19 GMT
Server: BWS/1.0
Content-Length: 3509
Content-Type: text/html
Cache-Control: private
Expires: Wed, 29 Jul 2009 02:51:19 GMT
Set-Cookie: BAIDUID=0D5F54C0853B7C38D6BD3A3E5EA63C44:FG=1; expires=Wed, 29-Jul-39 02:51:19 GMT; path=/; domain=.baidu.com
P3P: CP=" OTI DSP COR IVA OUR IND COM "
[root@dbrg-2 ~]# wget -S --spider www.baidu.com
--10:51:34-- http://www.baidu.com/
=> `index.html
那么工具是实现了,如何用c来实现呢... 其实也很简单,我刚开始还走了点弯路想的是去查看wget or curl的源码,奈何那种代码相互之间依赖性太强实在不易阅读,于是自己用wireshark抓包, so easy!!!!有兴趣的自己去抓下
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#define HTTPPORT 80
char* head =
"HEAD / HTTP/1.0\r\n"
"Accept: */*\r\n"
"User-Agent: Wget/1.10.2 (Red Hat modified)\r\n"
"Host:127.0.0.1\r\n"
"Connection: Keep-Alive\r\n\r\n";
int connect_URL(char *domain,int port)
{
int sock;
struct hostent * host;
struct sockaddr_in server;
host = gethostbyname(domain);
if (host == NULL)
{
printf("gethostbyname error\n");
return -2;
}
sock = socket(AF_INET,SOCK_STREAM,0);
if (sock < 0)
{
printf("invalid socket\n");
return -1;
}
memset(&server,0,sizeof(struct sockaddr_in));
memcpy(&server.sin_addr,host->h_addr_list[0],host->h_length);
server.sin_family = AF_INET;
server.sin_port = htons(port);
return (connect(sock,(struct sockaddr *)&server,sizeof(struct sockaddr)) <0) ? -1 : sock;
}
int main()
{
int sock;
int ret;
char buf[100];
char *domain = "127.0.0.1";
FILE* fp = fopen("test","w+");
if(NULL == fp){
printf("can't open stockcode file!\n");
return -1;
}
sock = connect_URL(domain,HTTPPORT);
if (sock <0){
printf("connetc err\n");
return -1;
}
send(sock,head,strlen(head),0);
while(1)
{
if((ret=recv(sock,buf,100-1,0))<1)
break;
buf[ret]='\0';
printf("%s", buf);
fprintf(fp,"%s",buf); //save http data
}
fclose(fp);
close(sock);
//printf("bye!\n");
return 0;
}
from:
http://linux.chinaitlab.com/server/800420_2.html