HTTP消息头的抓取

先上一个抓取百度首页的代码:

#include <stdio.h>
#include <winsock2.h>
#include <string.h>
#pragma comment(lib, "ws2_32.lib")


int main(){
    FILE *fp;
    fp = fopen("e://test.txt", "w+");
    WORD wVersionRequested;
    WSADATA wsaData;
    int err;
    wVersionRequested = MAKEWORD(2, 0);
    err = WSAStartup(wVersionRequested, &wsaData );
    if(0 != err) {
        printf("Socket2.0初始化失败,Exit!");
        return 0;
    }
    int soc;
    soc = socket(AF_INET, SOCK_STREAM, 0);
    struct sockaddr_in srv_addr;
    srv_addr.sin_port=htons(80);
    srv_addr.sin_family=AF_INET;
    srv_addr.sin_addr.s_addr=inet_addr("61.135.169.125");  // 百度的首页
    connect(soc, (LPSOCKADDR)&srv_addr, sizeof(srv_addr));
    char sz[] = "GET / HTTP/1.1\r\nConnection:keep-alive\r\n\r\n";
    send(soc, sz, strlen(sz), 0);
    static char webcon[BUFSIZ];
    while(recv(soc, webcon, BUFSIZ, 0) > 0) {
        printf("%s", webcon);
        fputs(webcon, fp);
        memset((void*)webcon, 0, BUFSIZ);
    }
    fclose(fp);
    return 0;
}


通用代码,自己理解吧,很简单

#include <stdio.h> 
#include <winsock.h> 
#include <string.h> 
#pragma comment(lib, "ws2_32.lib") 

void geturl(char *url) 
{ 
	WSADATA WSAData={0}; 
	SOCKET sockfd; 
	struct sockaddr_in addr; 
	struct hostent *pURL; 
	char myurl[BUFSIZ]; 
	char *pHost = 0, *pGET = 0; 
	char host[BUFSIZ], GET[BUFSIZ]; 
	char header[BUFSIZ] = ""; 
	static char text[BUFSIZ]; 
	int i; 

	/* 
	* windows下使用socket必须用WSAStartup初始化,否则不能调用 
	*/ 
	if(WSAStartup(MAKEWORD(2,2), &WSAData)) 
	{ 
		printf("WSA failed\n"); 
		return; 
	} 

	/* 
	* 分离url中的主机地址和相对路径 
	*/ 
	strcpy(myurl, url); 
	for (pHost = myurl; *pHost != '/' && *pHost != '\0'; ++pHost); 
	if ( (int)(pHost - myurl) == strlen(myurl) ) 
		strcpy(GET, "/"); 
	else 
		strcpy(GET, pHost); 
	*pHost = '\0'; 
	strcpy(host, myurl); 
	printf("%s\n%s\n", host, GET); 

	/* 
	* 设定socket参数,并未真正初始化 
	*/ 
	sockfd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); 
	pURL = gethostbyname(host); 
	addr.sin_family = AF_INET; 
	addr.sin_addr.s_addr = *((unsigned long*)pURL->h_addr); 
	addr.sin_port = htons(80); 

	/* 
	* 组织发送到web服务器的信息 
	* 为何要发送下面的信息请参考HTTP协议的约定 
	*/ 
	strcat(header, "GET "); 
	strcat(header, GET); 
	strcat(header, " HTTP/1.1\r\n"); 
	strcat(header, "HOST: "); 
	strcat(header, host); 
	strcat(header, "\r\nConnection:Close\r\n\r\n"); 

	printf("|||||%s|||||||\n", header);
	/* 
	* 连接到服务器,发送请求header,并接受反馈(即网页源代码) 
	*/ 
	connect(sockfd,(SOCKADDR *)&addr,sizeof(addr)); 

	send(sockfd, header, strlen(header), 0); 

	while ( recv(sockfd, text, BUFSIZ, 0) > 0) 
	{ 
		printf("%s", text); 
		strnset(text, '\0', BUFSIZ); 
	} 

	closesocket(sockfd); 

	WSACleanup(); 
} 

int main() 
{ 
	char url[256]; 
	printf("http://"); 
	scanf("%s", url); 
	geturl(url); 
	return 0; 
}


HTTP消息头的理解:http://www.cnblogs.com/jacktu/archive/2008/01/16/1041710.html

你可能感兴趣的:(HTTP消息头的抓取)