C++解析http爬取图片

C++解析http爬取图片_第1张图片


/*
 0921
 
 */
#include 
#include   
#include 
#include 
#pragma comment(lib,"ws2_32.lib")
using namespace std;

class Chttp{

private:
	SOCKET m_socket; //套接字
	string host;//主机名
	string object;//资源名
	string url;//网页url
	string html;//网页源代码
public:

	Chttp(string murl);
	~Chttp();
	//解析url
	bool analyurl();
	//初始化网络
	bool init();
	//发送,接收头部
	bool Send_Get_head();
	//连接服务器
	bool Connect();

	//输出网页源代码
	void download_html();

	void get_img();

	bool download_jpg(string jpgurl,string filename);

};

Chttp::Chttp(string murl):url(murl){
	 
}

Chttp::~Chttp(){
	closesocket(m_socket);
}

//初始化网络
bool Chttp::init(){
	WORD ver = MAKEWORD(2,2);//版本
	WSADATA dat;
	WSAStartup(ver, &dat);
	//1.创建套接字
	m_socket = socket(AF_INET,SOCK_STREAM,IPPROTO_TCP);
	if (m_socket == -1)
	{
		return false;
	}
	//cout<<"socket success\n";

	analyurl();
	
	//域名地址
	hostent *p=gethostbyname(host.c_str());
	if (!p)
	{
		return false;
	}

	//网址主机服务器信息
	sockaddr_in addr;
	memcpy(&addr.sin_addr,p->h_addr,4);
	addr.sin_family=AF_INET;
	addr.sin_port=htons(80);//HTTP默认端口80
	//连接
	int r = connect(m_socket,(sockaddr*)&addr,sizeof(addr));
	if (r==SOCKET_ERROR)
	{	
		//cout<<"connect ERROR\n";
		return false;
	}
	//cout<<"connect success\n";
	return true;
}
//解析url
bool Chttp::analyurl(){

	int n=url.find('/');
	n+=2;
	int pos=url.find('/',n);

	host=url.substr(n,pos-n);
	cout<<host<<endl;
	object=url.substr(pos);
	//object=url;
	cout<<object<<endl;

	return true;
}
//发送,接收头部
bool Chttp::Send_Get_head(){
	if (!init())
	{
		cout<<"init ERROR\n";
		return false;
	}
	;
	//请求头
	string headerinfo;
	headerinfo="GET "+object+" HTTP/1.1\r\n"
		+"Host: "+host+"\r\n"
		+"User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0"
		+"\nConnection: Close\r\n\r\n";
	//cout<<"\nsend GET----------------\n";
	//cout<
	//发送请求头
	int r=send(m_socket,headerinfo.c_str(),headerinfo.length(),0);
	if (r==SOCKET_ERROR)
	{	 
		//cout<<"send false"<
		return false;
	}
	//cout<<"send success\n\n";
	

	//接收头信息
	char ch;
	int nrecv=0;
	cout<<"---------------------以下为头信息\n";
	while (nrecv = recv(m_socket,&ch,1,0) > 0)
	{
		cout<<ch;
		if (ch=='\r')
		{
			nrecv = recv(m_socket,&ch,1,0);cout<<ch;
			if (nrecv>0 && ch=='\n')
			{
				nrecv = recv(m_socket,&ch,1,0);	cout<<ch;
				if (nrecv>0 && ch=='\r')
				{
					nrecv = recv(m_socket,&ch,1,0);cout<<ch;
					if (nrecv>0 && ch=='\n')
					{
						break;
					}
				}
			}
		}
	}
	return true;
}

//输出网页源代码
void Chttp::download_html(){	

	Send_Get_head();
	;
	cout<<"---------------------以下为网页源代码\n";

	int len;
	char buf[1024]={0};
	while ((len=recv(m_socket,buf,1023,0))> 0)
	{	 
		html+=buf;
		memset(buf,0,1024);
	}
	//cout<
	//re(getrecv);
}

//下载图片到本地
bool Chttp::download_jpg(string purl,string filename){
	url=purl;
	Send_Get_head();
	int len;
	char buf[20]={0};
	FILE*fp;
	fp=fopen(filename.c_str(),"wb");//二进制打开
	cout<<"开始下载 "<<filename<<endl;
	//图片写入文件
	while ((len=recv(m_socket,buf,10,0))> 0)
	{	 
		fwrite(buf,1,len,fp);//写入单字节数1,总数len个
		
	}
	fclose(fp);
	cout<<"已经下载 "<<filename<<endl;
	return true;
}

/*
size_t fwrite(const void* buffer, size_t size, size_t count, FILE* stream);
注意:这个函数以二进制形式对文件进行操作,不局限于文本文件
fwrite
返回值:返回实际写入的数据块数目
(1)buffer:是一个指针,对fwrite来说,是要获取数据的地址;
(2)size:要写入内容的单字节数;
(3)count:要进行写入size字节的数据项的个数;
(4)stream:目标文件指针;
(5)返回实际写入的数据项个数count。
 */



void Chttp::get_img()
{
	int i=0;
	while (html.length()!=0)
	{
		string t="https://t1.hddhhn.com/uploads/tu/";
		int start=html.find(t);
		if (start==-1)
		{
			break;
		}
		
		int end=html.find(".jpg");
		cout<<start<<" "<<end<<endl;

		string tmp=html.substr(start,end+4-start);
		if (tmp.length()>70)
		{
			html=html.substr(end+4);
			continue;
		}
		cout<<tmp<<endl; //目标图片路径

		string filename="./img5/"+to_string(i)+".jpg";
		i++;
		download_jpg(tmp,filename);

		html=html.substr(end+4);

	}
}

void main(){

	string url="https://www.2717.com/beautiful/";
	Chttp t(url);	   

	//创建文件夹
	CreateDirectory("./img5",NULL);
	t.download_html();
	t.get_img();
	

	system("pause");
}





你可能感兴趣的:(C++)