先上一个抓取百度首页的代码:
#include <stdio.h> #include <winsock2.h> #include <string.h> #pragma comment(lib, "ws2_32.lib") int main(){ FILE *fp; fp = fopen("e://test.txt", "w+"); WORD wVersionRequested; WSADATA wsaData; int err; wVersionRequested = MAKEWORD(2, 0); err = WSAStartup(wVersionRequested, &wsaData ); if(0 != err) { printf("Socket2.0初始化失败,Exit!"); return 0; } int soc; soc = socket(AF_INET, SOCK_STREAM, 0); struct sockaddr_in srv_addr; srv_addr.sin_port=htons(80); srv_addr.sin_family=AF_INET; srv_addr.sin_addr.s_addr=inet_addr("61.135.169.125"); // 百度的首页 connect(soc, (LPSOCKADDR)&srv_addr, sizeof(srv_addr)); char sz[] = "GET / HTTP/1.1\r\nConnection:keep-alive\r\n\r\n"; send(soc, sz, strlen(sz), 0); static char webcon[BUFSIZ]; while(recv(soc, webcon, BUFSIZ, 0) > 0) { printf("%s", webcon); fputs(webcon, fp); memset((void*)webcon, 0, BUFSIZ); } fclose(fp); return 0; }
通用代码,自己理解吧,很简单
#include <stdio.h> #include <winsock.h> #include <string.h> #pragma comment(lib, "ws2_32.lib") void geturl(char *url) { WSADATA WSAData={0}; SOCKET sockfd; struct sockaddr_in addr; struct hostent *pURL; char myurl[BUFSIZ]; char *pHost = 0, *pGET = 0; char host[BUFSIZ], GET[BUFSIZ]; char header[BUFSIZ] = ""; static char text[BUFSIZ]; int i; /* * windows下使用socket必须用WSAStartup初始化,否则不能调用 */ if(WSAStartup(MAKEWORD(2,2), &WSAData)) { printf("WSA failed\n"); return; } /* * 分离url中的主机地址和相对路径 */ strcpy(myurl, url); for (pHost = myurl; *pHost != '/' && *pHost != '\0'; ++pHost); if ( (int)(pHost - myurl) == strlen(myurl) ) strcpy(GET, "/"); else strcpy(GET, pHost); *pHost = '\0'; strcpy(host, myurl); printf("%s\n%s\n", host, GET); /* * 设定socket参数,并未真正初始化 */ sockfd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); pURL = gethostbyname(host); addr.sin_family = AF_INET; addr.sin_addr.s_addr = *((unsigned long*)pURL->h_addr); addr.sin_port = htons(80); /* * 组织发送到web服务器的信息 * 为何要发送下面的信息请参考HTTP协议的约定 */ strcat(header, "GET "); strcat(header, GET); strcat(header, " HTTP/1.1\r\n"); strcat(header, "HOST: "); strcat(header, host); strcat(header, "\r\nConnection:Close\r\n\r\n"); printf("|||||%s|||||||\n", header); /* * 连接到服务器,发送请求header,并接受反馈(即网页源代码) */ connect(sockfd,(SOCKADDR *)&addr,sizeof(addr)); send(sockfd, header, strlen(header), 0); while ( recv(sockfd, text, BUFSIZ, 0) > 0) { printf("%s", text); strnset(text, '\0', BUFSIZ); } closesocket(sockfd); WSACleanup(); } int main() { char url[256]; printf("http://"); scanf("%s", url); geturl(url); return 0; }
HTTP消息头的理解:http://www.cnblogs.com/jacktu/archive/2008/01/16/1041710.html