开发环境 windows10 64位
- 首先需要安装
vcpkg
工具。 开始使用 vcpkg - 创建项目
- 打开CMD进行安装
curl
和libxml2
库
vcpkg install curl:x64-windows
vcpkg install libxml2:x64-windows
- 安装完成后执行
vcpkg integrate install
,让vs2022可以识别该lib库。 - 先填一下坑,使用VS2022,打开
X:\vcpkg\vcpkg\installed\x64-windows\include\iconv.h
文件,然后Ctrl+A全选代码,菜单文件
->iconv.h 另存为
-> 选择编码保存
->选择UNICODE 代码页 1200
确定保存。
- Demo
main.cpp
代码,参考地址 https://curl.se/libcurl/c/htmltitle.html
#include
#include
#include
#include
#include
#include
# pragma warning (disable:4819)
//
// Case-insensitive string comparison
//
#ifdef _MSC_VER
#define COMPARE(a, b) (!_stricmp((a), (b)))
#else
#define COMPARE(a, b) (!strcasecmp((a), (b)))
#endif
//
// libxml callback context structure
//
struct Context
{
Context() : addTitle(false) { }
bool addTitle;
std::string title;
};
//
// libcurl variables for error strings and returned data
static char errorBuffer[CURL_ERROR_SIZE];
static std::string buffer;
//
// libcurl write callback function
//
static int writer(char* data, size_t size, size_t nmemb,
std::string* writerData)
{
if (writerData == NULL)
return 0;
writerData->append(data, size * nmemb);
return size * nmemb;
}
//
// libcurl connection initialization
//
static bool init(CURL*& conn, char* url)
{
CURLcode code;
conn = curl_easy_init();
if (conn == NULL) {
fprintf(stderr, "Failed to create CURL connection\n");
exit(EXIT_FAILURE);
}
code = curl_easy_setopt(conn, CURLOPT_ERRORBUFFER, errorBuffer);
if (code != CURLE_OK) {
fprintf(stderr, "Failed to set error buffer [%d]\n", code);
return false;
}
code = curl_easy_setopt(conn, CURLOPT_URL, url);
if (code != CURLE_OK) {
fprintf(stderr, "Failed to set URL [%s]\n", errorBuffer);
return false;
}
code = curl_easy_setopt(conn, CURLOPT_FOLLOWLOCATION, 1L);
if (code != CURLE_OK) {
fprintf(stderr, "Failed to set redirect option [%s]\n", errorBuffer);
return false;
}
code = curl_easy_setopt(conn, CURLOPT_WRITEFUNCTION, writer);
if (code != CURLE_OK) {
fprintf(stderr, "Failed to set writer [%s]\n", errorBuffer);
return false;
}
code = curl_easy_setopt(conn, CURLOPT_WRITEDATA, &buffer);
if (code != CURLE_OK) {
fprintf(stderr, "Failed to set write data [%s]\n", errorBuffer);
return false;
}
return true;
}
//
// libxml start element callback function
//
static void StartElement(void* voidContext,
const xmlChar* name,
const xmlChar** attributes)
{
Context* context = static_cast(voidContext);
if (COMPARE(reinterpret_cast(name), "TITLE")) {
context->title = "";
context->addTitle = true;
}
(void)attributes;
}
//
// libxml end element callback function
//
static void EndElement(void* voidContext,
const xmlChar* name)
{
Context* context = static_cast(voidContext);
if (COMPARE(reinterpret_cast(name), "TITLE"))
context->addTitle = false;
}
//
// Text handling helper function
//
static void handleCharacters(Context* context,
const xmlChar* chars,
int length)
{
if (context->addTitle)
context->title.append(reinterpret_cast(chars), length);
}
//
// libxml PCDATA callback function
//
static void Characters(void* voidContext,
const xmlChar* chars,
int length)
{
Context* context = static_cast(voidContext);
handleCharacters(context, chars, length);
}
//
// libxml CDATA callback function
//
static void cdata(void* voidContext,
const xmlChar* chars,
int length)
{
Context* context = static_cast(voidContext);
handleCharacters(context, chars, length);
}
//
// libxml SAX callback structure
//
static htmlSAXHandler saxHandler =
{
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
StartElement,
EndElement,
NULL,
Characters,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
cdata,
NULL
};
//
// Parse given (assumed to be) HTML text and return the title
//
static void parseHtml(const std::string& html,
std::string& title)
{
htmlParserCtxtPtr ctxt;
Context context;
ctxt = htmlCreatePushParserCtxt(&saxHandler, &context, "", 0, "",
XML_CHAR_ENCODING_NONE);
htmlParseChunk(ctxt, html.c_str(), html.size(), 0);
htmlParseChunk(ctxt, "", 0, 1);
htmlFreeParserCtxt(ctxt);
title = context.title;
}
int main(int argc, char* argv[])
{
CURL* conn = NULL;
CURLcode code;
std::string title;
// Ensure one argument is given
if (argc != 2) {
fprintf(stderr, "Usage: %s \n", argv[0]);
exit(EXIT_FAILURE);
}
curl_global_init(CURL_GLOBAL_DEFAULT);
// Initialize CURL connection
if (!init(conn, argv[1])) {
fprintf(stderr, "Connection initializion failed\n");
exit(EXIT_FAILURE);
}
// Retrieve content for the URL
code = curl_easy_perform(conn);
curl_easy_cleanup(conn);
if (code != CURLE_OK) {
fprintf(stderr, "Failed to get '%s' [%s]\n", argv[1], errorBuffer);
exit(EXIT_FAILURE);
}
// Parse the (assumed) HTML code
parseHtml(buffer, title);
// Display the extracted title
printf("Title: %s\n", title.c_str());
return EXIT_SUCCESS;
}
项目->属性->如下图设置。关闭所有警告
-
测试结果