C++ 获取网页源代码,仅供参考!!

void UTF_8ToUnicode(wchar_t* pOut,char *pText)
{   
	char* uchar = (char *)pOut; 
	uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F);
	uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F); 
} 
void Gb2312ToUnicode(wchar_t* pOut,char *gbBuffer) 
{   
	::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,pOut,1);
} 
void UTF_8ToGB2312(char*pOut, char *pText, int pLen)  
{   
	char Ctemp[4];   
	memset(Ctemp,0,4); 
	int i =0 ,j = 0; 
	while(i < pLen) 
	{  
		if(pText[i] >= 0)  
		{  
			pOut[j++] = pText[i++]; 
		} 
		else 
		{  
			WCHAR Wtemp; 
			UTF_8ToUnicode(&Wtemp,pText + i);
			UnicodeToGB2312(Ctemp,Wtemp); 
			pOut[j] = Ctemp[0];
			pOut[j + 1] = Ctemp[1];  
			i += 3;   
			j += 2;  
		}   
	} 
	pOut[j] ='\n'; 
	return; 
} 

以上是 网页内容 UTF_8 to GB2312

 

 

int main(int argc, char* argv[])
{
	SOCKET hsocket;
	SOCKADDR_IN saServer;
	WSADATA wsadata;
	LPHOSTENT lphostent;
	int nRet;
	char Dest[3000];  
	char* host_name="blog.sina.com.cn";
	char* req="GET /s/blog_44acab2f01016gz3.html HTTP/1.1\r\n"
		"User-Agent: Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET4.0C; .NET4.0E; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)\r\n"
		"Host:blog.sina.com.cn\r\n\r\n";


	// 初始化套接字  
	if(WSAStartup(MAKEWORD(2,2),&wsadata))
		printf("初始化SOCKET出错!");
	lphostent=gethostbyname(host_name);   
	if(lphostent==NULL)   
		printf("lphostent为空!");   
	hsocket = socket(AF_INET,SOCK_STREAM,IPPROTO_TCP);   
	saServer.sin_family = AF_INET;   
	saServer.sin_port = htons(80);   
	saServer.sin_addr =*((LPIN_ADDR)*lphostent->h_addr_list);   
	// 利用SOCKET连接   
	nRet = connect(hsocket,(LPSOCKADDR)&saServer,sizeof(SOCKADDR_IN));   
	if(nRet == SOCKET_ERROR)   
	{
		printf("建立连接时出错!");   
		closesocket(hsocket);
		return 0;
	}
	// 利用SOCKET发送   

	nRet = send(hsocket,req,strlen(req),0);   
	if(nRet==SOCKET_ERROR)   
	{   
		printf("发送数据包时出错!");   
		closesocket(hsocket);   
	}   
	nRet=1;   
	while(nRet>0)   
	{   
		// 接收返回数据包   
		nRet=recv(hsocket,(LPSTR)Dest,sizeof(Dest),0);   
		if(nRet>0)
			Dest[nRet]=0;
		else   
			Dest[0]=0;  
		char sDest[3000] = {0};
		UTF8_2_GB2312(sDest,nRet,Dest,nRet);
		// 显示返回数据包的大小、内容  
		//printf("\nReceived bytes:%d\n",nRet);   
		printf("Result:\n%s",sDest);   
	}
}

 

以上代码仅供参考。。。



 

你可能感兴趣的:(C++,.net,windows,socket,Stream,list)