使用curl获取web页面

一、前言

看到公司项目中有个功能是要从web服务器上获取html页面,然后分析里面的文章抽取出可用的信息,然后看了下前人写的实现代码,除了通讯还有http协议的解析一大堆代码,看了比较烦,想到过去有看过curl这个东西,于是乎想用curl代替公司的这套东西。

curl支持很多协议:FTP, FTPS, HTTP, HTTPS, GOPHER, TELNET, DICT, FILE 以及 LDAP。curl同样支持HTTPS认证,HTTP POST方法, HTTP PUT方法, FTP上传, kerberos认证,HTTP上传, 代理服务器, cookies, 用户名/密码认证, 下载文件断点续传,上载文件断点续传,,http代理服务器管道( proxy tunneling), 甚至它还支持IPv6, socks5代理服务器,,通过http代理服务器上传文件到FTP服务器等等,功能十分强大。

而我只是想用它来实现http协议的客户端。


二、下载和安装

环境:linux

下载地址:http://curl.haxx.se/download.html


安装步骤


三、实现

因为curl库我是编译成dll动态库的,因此先要实现加载类,将curl几个常用的函数给钩出来。

curl_easy_init 用于初始化

curl_easy_setopt 用于设置配置

curl_easy_perform 用于发送请求

curl_easy_cleanup 用于清除缓存

curl_easy_strerror 用于获取错误码和错误消息

//CurlLoad.h

#pragma once
#include <curl/curl.h>
#include <iostream>

namespace firebird{
	typedef CURL*(*curl_easy_init_t)(void);
	typedef CURLcode (*curl_easy_setopt_t)(CURL *curl, CURLoption option, ...);
	typedef CURLcode (*curl_easy_perform_t)(CURL *curl);
	typedef void (*curl_easy_cleanup_t)(CURL *curl);
	typedef const char* (*curl_easy_strerror_t)(CURLcode);

	class CurlLoad{
	public:
		CurlLoad();

		curl_easy_init_t curl_easy_init;
		curl_easy_setopt_t curl_easy_setopt;
		curl_easy_perform_t curl_easy_perform;
		curl_easy_cleanup_t curl_easy_cleanup;
		curl_easy_strerror_t curl_easy_strerror;

	private:
		HINSTANCE hDll;
	};

	class single_curl_load{//使用单例模式,进程中只能实例化一个CurlLoad对象
	public:
		static CurlLoad& instance(){
			static CurlLoad dll;
			return dll;
		}
	};
}

//CurlLoad.cpp

#include "CurlLoad.h"

namespace firebird{
	CurlLoad::CurlLoad()
	{
		hDll = ::LoadLibrary("libcurl.dll");

		if (hDll == NULL) {
			throw std::runtime_error("cann't load libcurl.dll.");
			return;
		}

		curl_easy_init = (curl_easy_init_t)::GetProcAddress(hDll, "curl_easy_init");
		curl_easy_setopt = (curl_easy_setopt_t)::GetProcAddress(hDll, "curl_easy_setopt");
		curl_easy_perform = (curl_easy_perform_t)::GetProcAddress(hDll, "curl_easy_perform");
		curl_easy_cleanup = (curl_easy_cleanup_t)::GetProcAddress(hDll, "curl_easy_cleanup");
		curl_easy_strerror = (curl_easy_strerror_t)::GetProcAddress(hDll, "curl_easy_cleanup");
	}
}

//CurlClient.h http客户端

#pragma once
#include <iostream>
#include "CurlLoad.h"

namespace firebird{
	enum method{GET = 0, POST} ;//支持get和post方式请求

	class CurlClient
	{
	public:
		CurlClient();
		~CurlClient();
		CURLcode getCURLcode();
		const char* getCURLStrError();
		std::string& getCURLContent(method m, const char* url, const std::string& post_fields);
		std::string& getCURLContent();
	private:
		CURL* m_curl;
		CURLcode m_code;
		std::string m_sBuf;
		CurlLoad& m_curl_load;

		static size_t doWrite(void *ptr, size_t size, size_t nmemb, void *stream)//得到http响应后,将响应内容取出放到m_sBuf中
		{
			try{
				std::string* buf = (std::string*)stream;

				buf->append((char*)ptr, size*nmemb);
			}
			catch(std::exception& e)
			{
				std::cout << "in doWrite exception:[" << e.what() << "]" << std::endl;
			}
			catch(...)
			{
				std::cout << "in doWrite unknown exception" << std::endl;
			}
			return size*nmemb;
		}
	};
}

//CurlClient.cpp

#include <firebird/curl/CurlClient.h>

namespace firebird{
	CurlClient::CurlClient()
		:m_curl_load(single_curl_load::instance())
	{
	}

	CurlClient::~CurlClient()
	{
		if (m_curl != NULL)
		{
			m_curl_load.curl_easy_cleanup(m_curl);
			m_curl = NULL;
		}
	}

	CURLcode CurlClient::getCURLcode()
	{
		return m_code;
	}

	const char* CurlClient::getCURLStrError()
	{
		return m_curl_load.curl_easy_strerror(m_code);
	}

	std::string& CurlClient::getCURLContent(method m, const char* url, const std::string& post_fields)
	{
		m_sBuf = "";
		try{
			m_curl = m_curl_load.curl_easy_init();
			if (m_curl == NULL)
			{
				m_code = CURLE_FAILED_INIT;
				return m_sBuf;
			}

			m_curl_load.curl_easy_setopt(m_curl, CURLOPT_HEADER, 0);
			m_curl_load.curl_easy_setopt(m_curl, CURLOPT_VERBOSE, 0);  
			m_curl_load.curl_easy_setopt(m_curl, CURLOPT_FOLLOWLOCATION, 1);
			m_curl_load.curl_easy_setopt(m_curl, CURLOPT_TIMEOUT, 6);
			m_curl_load.curl_easy_setopt(m_curl, CURLOPT_URL, url);
			m_curl_load.curl_easy_setopt(m_curl, CURLOPT_WRITEFUNCTION, &CurlClient::doWrite);
			m_curl_load.curl_easy_setopt(m_curl, CURLOPT_WRITEDATA, &m_sBuf);

			if (m == POST)
			{
				m_curl_load.curl_easy_setopt(m_curl, CURLOPT_POSTFIELDS, post_fields.c_str());
				m_curl_load.curl_easy_setopt(m_curl, CURLOPT_POSTFIELDSIZE, post_fields.size());
				m_curl_load.curl_easy_setopt(m_curl, CURLOPT_POST, 1);  
				//m_curl_load.curl_easy_setopt(m_curl, CURLOPT_COOKIEFILE, "/Users/zhu/CProjects/curlposttest.cookie");
			}

			m_code = m_curl_load.curl_easy_perform(m_curl);
		}catch(std::exception& e)
		{
			std::cout << "in doWrite exception:[" << e.what() << "]" << std::endl;
		}
		catch(...)
		{
			std::cout << "in doWrite unknown exception" << std::endl;
		}

		return m_sBuf;
	}

	std::string& CurlClient::getCURLContent()
	{
		return m_sBuf;
	}
}



你可能感兴趣的:(curl,curl,curl)