[C/C++]_[Unicode转Utf8,Ansi转Unicode,Ansi文件转Utf8文件]

http://blog.csdn.net/infoworld/article/details/15337665

场景:

1.也就只有windows需要那么麻烦,还搞一个ANSI编码。学学mac os x,只需要一个utf8就行。

2.有时候需要把ansi文件内容转换为utf8编码,读取一行之后,把ansi字符串转换为utf8,之后写入文件。


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <windows.h>
#include <assert.h>

char* Unicode2Utf8(const char* unicode)
{
	int len;
	len = WideCharToMultiByte(CP_UTF8, 0, (const wchar_t*)unicode, -1, NULL, 0, NULL, NULL);
	char *szUtf8 = (char*)malloc(len + 1);
	memset(szUtf8, 0, len + 1);
	WideCharToMultiByte(CP_UTF8, 0, (const wchar_t*)unicode, -1, szUtf8, len, NULL,NULL);
	return szUtf8;
}

char* Ansi2Unicode(const char* str)
{
	int dwUnicodeLen = MultiByteToWideChar(CP_ACP,0,str,-1,NULL,0);
	if(!dwUnicodeLen)
	{
		return strdup(str);
	}
	size_t num = dwUnicodeLen*sizeof(wchar_t);
	wchar_t *pwText = (wchar_t*)malloc(num);
	memset(pwText,0,num);
	MultiByteToWideChar(CP_ACP,0,str,-1,pwText,dwUnicodeLen);
	return (char*)pwText;
}

char* ConvertAnsiToUtf8(const char* str)
{
	char* unicode = Ansi2Unicode(str); 
	char* utf8 = Unicode2Utf8(unicode);
	free(unicode);
	return utf8;
}

int main(int argc, char *argv[])
{
	printf("Hello, world\n");
	//1.构造一个ansi文件,内容是"中文abc",看hex编码.
	//ansi: D6 D0 CE C4 61 62 63
	//utf8: E4 B8 AD E6 96 87 61 62  63
	char ansi[] = {0xD6,0xD0,0xCE,0xC4,0x61,0x62,0x63,0};
	char utf8[] = {0xE4,0xB8,0xAD,0xE6,0x96,0x87,0x61,0x62,0x63,0};
	char* str = ConvertAnsiToUtf8(ansi);
	assert(!strcmp(str,utf8));
	free(str);
	return 0;
}


你可能感兴趣的:([C/C++]_[Unicode转Utf8,Ansi转Unicode,Ansi文件转Utf8文件])