VC获取网页标题,解决乱码问题

//效果截图如下(文章后面附有VS2008本工程下载地址):

VC获取网页标题,解决乱码问题_第1张图片

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

VC获取网页标题,解决乱码问题_第2张图片

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

VC获取网页标题,解决乱码问题_第3张图片

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

主要代码如下:

//取网页标题
void CGetWebTitleDlg::OnBnClickedBtnGetTitle()
{
	m_HtmlCode.SetWindowText(_T(""));//clear

	CInternetSession mySession(NULL,0);
	CHttpFile* htmlFile=NULL;
	CString strLine,url,strHtml;
	TCHAR sRecv[1024];
	UINT CodePage=65001;//CP_UTF8:65001 CP_ACP:0
	m_Url.GetWindowText(url);

	TRY 
	{
		htmlFile=(CHttpFile*)mySession.OpenURL(url);//打开连接
		
		//获取网页编码
		while(htmlFile->ReadString(sRecv,1024))
		{
			//先用UTF8来进行转换,如果html页面编码是gbk或gb2312,转换后中文字符为
			//乱码,但英文字符显示正常,我们判断html页码编码,通过寻找英文就可以了
			int nBufferSize = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)sRecv, -1, NULL, 0);
			wchar_t *pBuffer = new wchar_t[nBufferSize+1];
			memset(pBuffer,0,(nBufferSize+1)*sizeof(wchar_t)); 
			MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)sRecv, -1 , pBuffer, nBufferSize*sizeof(wchar_t)); 
			strHtml=pBuffer;
			if (-1!=strHtml.Find(_T("charset=gbk")))
			{
				CodePage=0;
				delete pBuffer;
				break;
			}
			if (-1!=strHtml.Find(_T("charset=GBK")))//http://www.sohu.com
			{
				CodePage=0;
				delete pBuffer;
				break;
			}
			if (-1!=strHtml.Find(_T("charset=gb2312")))
			{
				CodePage=0;
				delete pBuffer;
				break;
			}
			if (-1!=strHtml.Find(_T("charset=GB2312")))
			{
				CodePage=0;
				delete pBuffer;
				break;
			}
			if (-1!=strHtml.Find(_T("charset=utf-8")))
			{
				CodePage=65001;
				delete pBuffer;
				break;
			}
			if (-1!=strHtml.Find(_T("charset=UTF-8")))
			{
				CodePage=65001;
				delete pBuffer;
				break;
			}
			delete pBuffer;
		}
		strHtml=_T("");
		
		//获取网页源码
		htmlFile=(CHttpFile*)mySession.OpenURL(url);//重新打开连接
		while(htmlFile->ReadString(sRecv,1024))
		{

			// 编码转换,可解决中文乱码问题
			//gb2312转为unicode,则用CP_ACP
			//gbk转为unicode,也用CP_ACP
			//utf-8转为unicode,则用CP_UTF8
			int nBufferSize = MultiByteToWideChar(CodePage, 0, (LPCSTR)sRecv, -1, NULL, 0);
		
			wchar_t *pBuffer = new wchar_t[nBufferSize+1];
			memset(pBuffer,0,(nBufferSize+1)*sizeof(wchar_t)); 
	
			//gb2312转为unicode,则用CP_ACP
			//gbk转为unicode,也用CP_ACP
			//utf-8转为unicode,则用CP_UTF8
			MultiByteToWideChar(CodePage, 0, (LPCSTR)sRecv, -1 , pBuffer, nBufferSize*sizeof(wchar_t)); 

			strHtml+=pBuffer;
			strHtml+="\r\n";
			delete pBuffer;
		}
		htmlFile->Close();
		mySession.Close() ;
		delete htmlFile;

		m_HtmlCode.SetWindowText(strHtml);//显示网页源码

		//获取网页标题 
		CString szTitle=strHtml.GetString();
		int nStart=szTitle.Find(_T(""));
		int nEnd=szTitle.Find(_T(""));
		szTitle=szTitle.Mid(nStart+7,nEnd-nStart-7);
		this->SetWindowText(_T("获取到的网页标题为【")+szTitle+_T("】   By︶风不冷丶"));

	}
	CATCH (CException, e)
	{
		TCHAR err[1024];
		e->GetErrorMessage(err,1024);
		m_HtmlCode.SetWindowText(err);
	}
	END_CATCH
}

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

本例子,VS2008工程下载地址:

http://download.csdn.net/detail/friendan/6288523

友情提醒:直接在VS2008中运行程序时,会获取不到网页标题,错误信息为:无法解析服务器的名称或地址

这个我现在也不知道是什么原因,知道的望告知 一二,不剩感激。不过你去生成程序的目录,直接运行程序,就不会有以上错误了。

在VS2008直接运行工程获取网页标题,错误截图如下:

VC获取网页标题,解决乱码问题_第4张图片

-------------------------------------------------------------------------------------------------------------


您的十分满意是我追求的宗旨。

您的一点建议是我后续的动力。







你可能感兴趣的:(MFC,VC,VS2008,C++)