用C语言爬网页代码

                           今天下午把爬代码看了一下,也敲了,只不过我爬下来的总是乱码,现在大多数爬虫都是Python和Java,很少有人用C语言爬了;

 

 

 

#include <stdio.h>
#include <winsock2.h>
#include<string.h>
#pragma comment(lib, "ws2_32.lib")

int main()
{
	WSADATA  wd; // web API
	SOCKET c;
	char sendBuf[1000]="",recvBuf[1000]="";  // 创建字符数组
	 SOCKADDR_IN saddr;
	if(WSAStartup(MAKEWORD(2,2),&wd)!=0)
	{
		printf("初始化失败\n");
		return 0;
	}
	if(HIBYTE(wd.wVersion)!=2||LOBYTE(wd.wVersion)!=2)
	{
        printf("初始化失败\n");
		WSACleanup();
		return 1;
	}

	c=socket(AF_INET,SOCK_STREAM,0);  //创建客户端

    saddr.sin_addr.S_un.S_addr = inet_addr("119.75.217.56");//百度ip
    saddr.sin_family=AF_INET;
	saddr.sin_port=htons(80);
    
   /* 连接服务器*/
	if(connect(c, (SOCKADDR*)&saddr, sizeof(SOCKADDR))!=0)
	{
		printf("connect failed\n");
		return 1;
	}

     strnset(sendBuf,0,1024);
     //请求百度网页
	 strcat(sendBuf,"GET /index.php HTTP/1.0\r\n");
	 strcat(sendBuf,"host:www.baidu.com\r\n\r\n");
	 send(c,sendBuf,strlen(sendBuf)+1,0);
	 
	 strnset(recvBuf,0,1024);
	 while(recv(c,recvBuf,1024,0))
	 {
		 puts(recvBuf);
		 strnset(recvBuf,0,1024);
	 }

      closesocket(c);
	 return 0;
}

  

       


用C语言爬网页代码_第1张图片

 

你可能感兴趣的:(爬虫,C语言)