ASCII相关文章汇总如下:
ANSI是一种字符代码,为使计算机支持更多语言,通常使用 0x00~0x7f 范围的1 个字节来表示 1 个英文字符。超出此范围的使用0x80~0xFFFF来编码,即扩展的ASCII编码。
在简体中文Windows操作系统中,ANSI 编码代表 GBK 编码;在繁体中文Windows操作系统中,ANSI编码代表Big5;在日文Windows操作系统中,ANSI 编码代表 Shift_JIS 编码。
ASCII在内的1字节字符128个,即char型的正数,汉字2字节,第一个字节是0X80以上,即char型负数第一字节,文件开头没有标志,直接是内容。直接读取,计算机会结合本地的编码(如GBK进行显示)。
对于英文来讲,ASCII码就足以编码所有字符,但对于中文,则必须使用两个字节来代表一个汉字,这种表示汉字的方式习惯上称为双字节。虽然双字节可以解决中英文字符混合使用的情况,但对于不同字符系统而言,就要经过字符码转换,非常麻烦,如中英、中日、日韩混合的情况。为解决这一问题,很多公司联合起来制定了一套可以适用于全世界所有国家的字符码,不管是东方文字还是西方文字,一律用两个字节来表示,这就是UNICODE。
Unicode编码系统可分为编码方式和实现方式两个层次。
UTF-8以字节为单位对Unicode进行编码。
UTF8是变长的编码,英文字符还有1字节,汉字和其他各国字符用2字节或者3字节。
UTF8编码的分为带BOM和不带BOM的,BOM(Byte Order Mark)就是文件开头的标志了。
“汉字”的UTF-8编码需要6个字节。“汉字”的UTF-16编码需要两个WORD,大小是4个字节。“汉字”的UTF-32编码需要两个DWORD,大小是8个字节。根据字节序的不同,UTF-16可以被实现为UTF-16LE或UTF-16BE,UTF-32可以被实现为UTF-32LE或UTF-32BE。
以汉字严为例,Unicode 码是4E25,需要用两个字节存储,一个字节是4E,另一个字节是25。存储的时候,4E在前,25在后,这就是 Big endian 方式;25在前,4E在后,这是 Little endian 方式。
第一个字节在前,就是"大头方式"(Big endian),第二个字节在前就是"小头方式"(Little endian)。
MultiByteToWideChar:是一种windows API 函数,该函数映射一个字符串到一个宽字符(unicode)的字符串。由该函数映射的字符串没必要是多字节字符组。
int MultiByteToWideChar(
[in] UINT CodePage,
[in] DWORD dwFlags,
[in] _In_NLS_string_(cbMultiByte)LPCCH lpMultiByteStr,
[in] int cbMultiByte,
[out, optional] LPWSTR lpWideCharStr,
[in] int cchWideChar
);
catch (std::exception e)
{
// Save in-memory logging buffer to a log file on error.
::std::wstring wideWhat;
if (e.what() != nullptr)
{
int convertResult = MultiByteToWideChar(CP_UTF8, 0, e.what(), (int)strlen(e.what()), NULL, 0);
if (convertResult <= 0)
{
wideWhat = L"Exception occurred: Failure to convert its message text using MultiByteToWideChar: convertResult=";
wideWhat += convertResult.ToString()->Data();
wideWhat += L" GetLastError()=";
wideWhat += GetLastError().ToString()->Data();
}
else
{
wideWhat.resize(convertResult + 10);
convertResult = MultiByteToWideChar(CP_UTF8, 0, e.what(), (int)strlen(e.what()), &wideWhat[0], (int)wideWhat.size());
if (convertResult <= 0)
{
wideWhat = L"Exception occurred: Failure to convert its message text using MultiByteToWideChar: convertResult=";
wideWhat += convertResult.ToString()->Data();
wideWhat += L" GetLastError()=";
wideWhat += GetLastError().ToString()->Data();
}
else
{
wideWhat.insert(0, L"Exception occurred: ");
}
}
}
else
{
wideWhat = L"Exception occurred: Unknown.";
}
Platform::String^ errorMessage = ref new Platform::String(wideWhat.c_str());
// The session added the channel at level Warning. Log the message at
// level Error which is above (more critical than) Warning, which
// means it will actually get logged.
_channel->LogMessage(errorMessage, LoggingLevel::Error);
SaveLogInMemoryToFileAsync().then([=](StorageFile^ logFile) {
_logFileGeneratedCount++;
StatusChanged(this, ref new LoggingScenarioEventArgs(LoggingScenarioEventType::LogFileGenerated, logFile->Path->Data()));
}).wait();
}
WideCharToMultiByte:是一个函数,该函数可以映射一个unicode字符串到一个多字节字符串,执行转换的代码页、接收转换字符串、允许额外的控制等操作。
int WideCharToMultiByte(
[in] UINT CodePage,
[in] DWORD dwFlags,
[in] _In_NLS_string_(cchWideChar)LPCWCH lpWideCharStr,
[in] int cchWideChar,
[out, optional] LPSTR lpMultiByteStr,
[in] int cbMultiByte,
[in, optional] LPCCH lpDefaultChar,
[out, optional] LPBOOL lpUsedDefaultChar
);
ISDSC_STATUS DiscpUnicodeToAnsiSize(
IN __in PWCHAR UnicodeString,
OUT ULONG *AnsiSizeInBytes
)
/*++
Routine Description:
This routine will return the length needed to represent the unicode
string as ANSI
Arguments:
UnicodeString is the unicode string whose ansi length is returned
*AnsiSizeInBytes is number of bytes needed to represent unicode
string as ANSI
Return Value:
ERROR_SUCCESS or error code
--*/
{
_try
{
*AnsiSizeInBytes = WideCharToMultiByte(CP_ACP,
0,
UnicodeString,
-1,
NULL,
0, NULL, NULL);
} _except(EXCEPTION_EXECUTE_HANDLER) {
return(ERROR_NOACCESS);
}
return((*AnsiSizeInBytes == 0) ? GetLastError() : ERROR_SUCCESS);
}
wcsrtombs :将宽字符字符串转换为多字节字符串表示形式。 此函数有一个更安全的版本;请参阅 wcsrtombs_s。
size_t wcsrtombs(
char *mbstr,
const wchar_t **wcstr,
sizeof count,
mbstate_t *mbstate
);
template <size_t size>
size_t wcsrtombs(
char (&mbstr)[size],
const wchar_t **wcstr,
sizeof count,
mbstate_t *mbstate
); // C++ only
// crt_wcsrtombs.cpp
// compile with: /W3
// This code example converts a wide
// character string into a multibyte
// character string.
#include
#include
#include
#include
#define MB_BUFFER_SIZE 100
int main()
{
const wchar_t wcString[] =
{L"Every good boy does fine."};
const wchar_t *wcsIndirectString = wcString;
char mbString[MB_BUFFER_SIZE];
size_t countConverted;
mbstate_t mbstate;
// Reset to initial shift state
::memset((void*)&mbstate, 0, sizeof(mbstate));
countConverted = wcsrtombs(mbString, &wcsIndirectString,
MB_BUFFER_SIZE, &mbstate); // C4996
// Note: wcsrtombs is deprecated; consider using wcsrtombs_s
if (errno == EILSEQ)
{
printf( "An encoding error was detected in the string.\n" );
}
else
{
printf( "The string was successfuly converted.\n" );
}
}
mbsrtowcs:将当前区域设置中的多字节字符字符串转换为相应的宽字符字符串,其中重启功能位于多字节字符的中间。 提供此函数的一个更安全的版本;请参阅 mbsrtowcs_s。
size_t mbsrtowcs(
wchar_t *wcstr,
const char **mbstr,
sizeof count,
mbstate_t *mbstate
);
template <size_t size>
size_t mbsrtowcs(
wchar_t (&wcstr)[size],
const char **mbstr,
sizeof count,
mbstate_t *mbstate
); // C++ only
https://docs.microsoft.com/zh-cn/cpp/standard-library/codecvt-class?view=msvc-170
codecvt类:描述可用作区域设置方面的对象的类模板。 它可以控制用于在程序中对字符进行编码的值序列和用于对程序外部字符进行编码的值序列之间的转换。
std:codecvt的特化,std::codecvt
template <class CharType, class Byte, class StateType>
class codecvt : public locale::facet, codecvt_base;
#include
#include
#include
#include
int main()
{
// UTF-8 data. The character U+1d10b, musical sign segno, does not fit in UCS2
std::string utf8 = u8"z\u6c34\U0001d10b";
// the UTF-8 / UTF-16 standard conversion facet
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> utf16conv;
std::u16string utf16 = utf16conv.from_bytes(utf8);
std::cout << "UTF16 conversion produced " << utf16.size() << " code units:\n";
for (char16_t c : utf16)
std::cout << std::hex << std::showbase << c << '\n';
// the UTF-8 / UCS2 standard conversion facet
std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> ucs2conv;
try {
std::u16string ucs2 = ucs2conv.from_bytes(utf8);
} catch(const std::range_error& e) {
std::u16string ucs2 = ucs2conv.from_bytes(utf8.substr(0, ucs2conv.converted()));
std::cout << "UCS2 failed after producing " << std::dec << ucs2.size()<<" characters:\n";
for (char16_t c : ucs2)
std::cout << std::hex << std::showbase << c << '\n';
}
}
// codecvt_out.cpp
// compile with: /EHsc
#define _INTL
#include
#include
#include
using namespace std;
#define LEN 90
int main( )
{
char pszExt[LEN + 1];
const wchar_t* pwszInt = L"This is the wchar_t string to be converted.";
memset(&pszExt[0], 0, (sizeof(char)) * (LEN + 1));
char* pszNext;
const wchar_t* pwszNext;
mbstate_t state;
locale loc("C");//English_Britain");//German_Germany
int res = use_facet<codecvt<wchar_t, char, mbstate_t>>
(loc).out(state,
pwszInt, &pwszInt[wcslen(pwszInt)], pwszNext,
pszExt, &pszExt[wcslen(pwszInt)], pszNext);
pszExt[wcslen(pwszInt)] = 0;
cout << (res != codecvt_base::error ? "It worked: " : "It didn't work: ")
<< "The converted string is:\n ["
<< &pszExt[0]
<< "]" << '\n';
}
C++17中codecvt的几个派生类弃用了,但std::codecvt 本身还可以用。
//C++11中ANSI、Unicode、UTF-8字符串之间的互转
namespace StrConvert {
// string的编码方式为utf8,则采用:
std::string wstring2utf8string(const std::wstring& str)
{
static std::wstring_convert<std::codecvt_utf8<wchar_t> > strCnv;
return strCnv.to_bytes(str);
}
std::wstring utf8string2wstring(const std::string& str)
{
static std::wstring_convert< std::codecvt_utf8<wchar_t> > strCnv;
return strCnv.from_bytes(str);
}
// string的编码方式为除utf8外的其它编码方式,可采用:其中locale = "chs", "CHS", "zh-cn",或.936
std::string wstring2string(const std::wstring& str, const std::string& locale)
{
typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> F;
static std::wstring_convert<F> strCnv(new F(locale));
return strCnv.to_bytes(str);
}
std::wstring string2wstring(const std::string& str, const std::string& locale)
{
typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> F;
static std::wstring_convert<F> strCnv(new F(locale));
return strCnv.from_bytes(str);
}
}
using fs = filesystem;
string gbk_str;
fs::path path{gbk_str, locale("zh_CN.gbk")};
u8string utf_str = path.u8string();
/***************************************************************
* @file FxEncodeUtil.h
* @brief C++实现GB2312(ANSI)、UTF8、Unicode字符编码互转
* @author 爱看书的小沐
* @version 1.0
* @date 2022-5-18
* @platform Visual Studio 2017 / Win10 win64
* @languages C++
**************************************************************/
#pragma once
#include
#include
#include
#include
class FxEncodeUtil
{
public:
static std::string UnicodeToUTF8(const std::wstring & wstr);
static std::wstring UTF8ToUnicode(const std::string & str);
static std::string UnicodeToANSI(const std::wstring & wstr);
static std::wstring ANSIToUnicode(const std::string & str);
static std::string UTF8ToANSI(const std::string & str);
static std::string ANSIToUTF8(const std::string & str);
static std::wstring ANSIToUnicode2(const std::string & str);
static std::string UnicodeToANSI2(const std::wstring & str);
static std::wstring UTF8ToUnicode2(const std::string & str);
static std::string UnicodeToUTF82(const std::wstring & str);
static std::string UTF8ToANSI2(const std::string & str);
static std::string ANSIToUTF82(const std::string & str);
};
/***************************************************************
* @file FxEncodeUtil.cpp
* @brief C++实现GB2312(ANSI)、UTF8、Unicode字符编码互转
* @author 爱看书的小沐
* @version 1.0
* @date 2022-5-18
* @platform Visual Studio 2017 / Win10 win64
* @languages C++
**************************************************************/
#include "FxEncodeUtil.h"
#include "gtest/gtest.h"
#include "FxUnicode.h"
#define _AMD64_
#include
std::string FxEncodeUtil::UnicodeToUTF8(const std::wstring &wstr)
{
std::string ret;
try
{
std::wstring_convert<std::codecvt_utf8<wchar_t>> wcv;
ret = wcv.to_bytes(wstr);
}
catch (const std::exception &e)
{
std::cerr << e.what() << std::endl;
}
return ret;
}
std::wstring FxEncodeUtil::UTF8ToUnicode(const std::string &str)
{
std::wstring ret;
try
{
std::wstring_convert<std::codecvt_utf8<wchar_t>> wcv;
ret = wcv.from_bytes(str);
}
catch (const std::exception &e)
{
std::cerr << e.what() << std::endl;
}
return ret;
}
std::string FxEncodeUtil::UnicodeToANSI(const std::wstring &wstr)
{
char *curLocale = setlocale(LC_ALL, NULL);
setlocale(LC_ALL, "chs");
std::string ret;
std::mbstate_t state = {};
const wchar_t *src = wstr.data();
size_t len = std::wcsrtombs(nullptr, &src, 0, &state);
if (static_cast<size_t>(-1) != len)
{
std::unique_ptr<char[]> buff(new char[len + 1]);
len = std::wcsrtombs(buff.get(), &src, len, &state);
if (static_cast<size_t>(-1) != len)
{
ret.assign(buff.get(), len);
}
}
setlocale(LC_ALL, curLocale);
return ret;
}
std::wstring FxEncodeUtil::ANSIToUnicode(const std::string &str)
{
char *curLocale = setlocale(LC_ALL, NULL);
setlocale(LC_ALL, "chs");
std::wstring ret;
std::mbstate_t state = {};
const char *src = str.data();
size_t len = std::mbsrtowcs(nullptr, &src, 0, &state);
if (static_cast<size_t>(-1) != len)
{
std::unique_ptr<wchar_t[]> buff(new wchar_t[len + 1]);
len = std::mbsrtowcs(buff.get(), &src, len, &state);
if (static_cast<size_t>(-1) != len)
{
ret.assign(buff.get(), len);
}
}
setlocale(LC_ALL, curLocale);
return ret;
}
std::string FxEncodeUtil::UTF8ToANSI(const std::string &str)
{
return UnicodeToANSI(UTF8ToUnicode(str));
}
std::string FxEncodeUtil::ANSIToUTF8(const std::string &str)
{
return UnicodeToUTF8(ANSIToUnicode(str));
}
///
///
///
std::wstring FxEncodeUtil::ANSIToUnicode2(const std::string &str)
{
size_t len = str.length();
int iTextLen = ::MultiByteToWideChar(CP_ACP, 0, str.c_str(), -1, NULL, 0);
wchar_t *pUnicodeText;
pUnicodeText = new wchar_t[iTextLen + 1];
memset(pUnicodeText, 0, (iTextLen + 1) * sizeof(wchar_t));
::MultiByteToWideChar(CP_ACP, 0, str.c_str(), -1, (LPWSTR)pUnicodeText, iTextLen);
std::wstring ret;
ret = (wchar_t *)pUnicodeText;
delete pUnicodeText;
return ret;
}
std::string FxEncodeUtil::UnicodeToANSI2(const std::wstring &str)
{
char *pAnsiText;
int iTextLen = WideCharToMultiByte(CP_ACP, 0, str.c_str(), -1, NULL, 0, NULL, NULL);
pAnsiText = new char[iTextLen + 1];
memset((void *)pAnsiText, 0, sizeof(char) * (iTextLen + 1));
::WideCharToMultiByte(CP_ACP, 0, str.c_str(), -1, pAnsiText, iTextLen, NULL, NULL);
std::string ret;
ret = pAnsiText;
delete[] pAnsiText;
return ret;
}
std::wstring FxEncodeUtil::UTF8ToUnicode2(const std::string &str)
{
size_t len = str.length();
int iTextLen = ::MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, NULL, 0);
wchar_t *pUnicode;
pUnicode = new wchar_t[iTextLen + 1];
memset(pUnicode, 0, (iTextLen + 1) * sizeof(wchar_t));
::MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, (LPWSTR)pUnicode, iTextLen);
std::wstring ret;
ret = (wchar_t *)pUnicode;
delete[] pUnicode;
return ret;
}
std::string FxEncodeUtil::UnicodeToUTF82(const std::wstring &str)
{
char *pUtf8Text;
int iTextLen = WideCharToMultiByte(CP_UTF8, 0, str.c_str(), -1, NULL, 0, NULL, NULL);
pUtf8Text = new char[iTextLen + 1];
memset((void *)pUtf8Text, 0, sizeof(char) * (iTextLen + 1));
::WideCharToMultiByte(CP_UTF8, 0, str.c_str(), -1, pUtf8Text, iTextLen, NULL, NULL);
std::string strText;
strText = pUtf8Text;
delete[] pUtf8Text;
return strText;
}
std::string FxEncodeUtil::UTF8ToANSI2(const std::string &str)
{
return UnicodeToANSI2(UTF8ToUnicode2(str));
}
std::string FxEncodeUtil::ANSIToUTF82(const std::string &str)
{
return UnicodeToUTF82(ANSIToUnicode2(str));
}
///
///
///
TEST(FxEncodeUtilTest, StringComparison)
{
/*LCID lciid = GetUserDefaultLCID();
wchar_t szLocName[255] = L"\0";
int len = 255;
GetUserDefaultLocaleName(szLocName, len);
char *curLocale = setlocale(LC_ALL, NULL);
// setlocale(LC_ALL, NULL);
// setlocale( LC_ALL, "en-US" );
// setlocale(LC_ALL, "chs");
setlocale(LC_ALL, "zh-CN");*/
char strZhong[] = {-28, -72, -83, 0}; //中
ASSERT_STREQ(strZhong, FxEncodeUtil::ANSIToUTF8("中").c_str());
ASSERT_STREQ(strZhong, FxEncodeUtil::ANSIToUTF82("中").c_str());
EXPECT_EQ(FxEncodeUtil::ANSIToUTF82("中国"), FxEncodeUtil::ANSIToUTF82("中国"));
EXPECT_EQ(FxEncodeUtil::ANSIToUTF82("你好世界,2022"), FxEncodeUtil::ANSIToUTF82("你好世界,2022"));
}
http://gnuwin32.sourceforge.net/packages/libiconv.htm
iconv是linux下的编码转换的工具,它提供命令行的使用和函数接口支持。
-f, --from-code=名称 原始文本编码
-t, --to-code=名称 输出编码
-l, --list 列举所有已知的字符集
-c 从输出中忽略无效的字符
-o, --output=FILE 输出文件
-s, --silent 关闭警告
--verbose 打印进度信息
iconv -f encoding -t encoding inputfile
iconv -f utf-8 -t unicode utf8file.txt> unicodefile.txt
iconv -f utf-8 -t gb2312 /server_test/reports/t1.txt > /server_test/reports/t2.txt
iconv -f utf8 -t gb18030 -oresult.xls result_tmp.xls
iconv -f utf8 -t gb18030 result_tmp.xls> result.xls
iconv -f utf8 -t gb2312 result_tmp.xls> result.xls
iconv -f latin1 -t ascii//TRANSLIT file
iconv -f UTF-8 -t ascii//TRANSLIT file
iconv -f "ISO_8859-1" -t "GBK" ./test
cmake -G "Visual Studio 15 2017 Win64" ..
https://www.gnu.org/software/libiconv/
//***********************************************************************
// Purpose: 基于libiconv库的C++实现字符编码互转(测试用例1)
// Author: 爱看书的小沐
// Date: 2022-5-19
// Languages: C++
// Platform: Visual Studio 2017
// OS: Win10 win64
// ***********************************************************************
#include
#include "libiconv/iconv.h"
#pragma comment(lib, "libiconv.lib")
int test_libiconv1()
{
char *curLocale = setlocale(LC_ALL, NULL);
setlocale(LC_ALL, "chs");
/* 源编码 */
const char *encFrom = "GBK";
/* 目的编码, TRANSLIT:遇到无法转换的字符就找相近字符替换
* IGNORE :遇到无法转换字符跳过*/
//const char *encTo = "UNICODE//TRANSLIT";
//const char *encTo = "GBK//TRANSLIT";
//const char *encTo = "latin1//TRANSLIT";
//const char *encTo = "ISO_8859-1//TRANSLIT";
//const char *encTo = "UNICODE//IGNORE";
const char *encTo = "UTF-8//TRANSLIT";
/* 获得转换句柄
*@param encTo 目标编码方式
*@param encFrom 源编码方式
*
* */
iconv_t cd = iconv_open(encTo, encFrom);
if (cd == (iconv_t)-1)
{
perror("iconv_open");
return -1;
}
/* 需要转换的字符串 */
char inbuf[1024] = "螺丝中123abc";
size_t inlen = strlen(inbuf);
/* 存放转换后的字符串 */
size_t outlen = 1024;
char outbuf[1024] = "\0";
memset(outbuf, 0, sizeof(outbuf));
/* 由于iconv()函数会修改指针,所以要保存源指针 */
char *srcstart = inbuf;
char *tempoutbuf = outbuf;
size_t ret = iconv(cd, (const char **)&srcstart, &inlen, &tempoutbuf, &outlen);
if (ret == -1)
{
perror("iconv");
return -1;
}
printf("inbuf=%s, inlen=%d, outbuf=%s, outlen=%d\n", inbuf, inlen, outbuf, outlen);
for (int i = 0; i < strlen(outbuf); i++)
{
printf("%x\n", outbuf[i]);
}
/* 关闭句柄 */
iconv_close(cd);
return 0;
}
//***********************************************************************
// Purpose: 基于libiconv库的C++实现字符编码互转(测试用例2)
// Author: 爱看书的小沐
// Date: 2022-5-19
// Languages: C++
// Platform: Visual Studio 2017
// OS: Win10 win64
// ***********************************************************************
#include
#include "libiconv/iconv.h"
#pragma comment(lib, "libiconv.lib")
char * test_libiconv2(const char *encFrom, const char *encTo, const char * inStr)
{
char *curLocale = setlocale(LC_ALL, NULL);
setlocale(LC_ALL, "chs");
char buf_out[1024];
char *str_in, *str_out;
int len_in, len_out, ret;
iconv_t c_pt;
if ((c_pt = iconv_open(encTo, encFrom)) == (iconv_t)-1)
{
printf("iconv_open false: %s ==> %s\n", encFrom, encTo);
return NULL;
}
iconv(c_pt, NULL, NULL, NULL, NULL);
len_in = strlen(inStr) + 1;
len_out = 1024;
str_in = (char *)inStr;
str_out = buf_out;
ret = iconv(c_pt, (const char**)&str_in, (size_t *)&len_in, &str_out, (size_t *)&len_out);
if (ret == -1)
{
perror("iconv");
return NULL;
}
iconv_close(c_pt);
return buf_out;
}
int main()
{
char* p = NULL;
p = test_libiconv2("GBK", "UTF-8//TRANSLIT", "螺丝中123abc+-=,.?*$%#@");
if(p) std::cout << p << std::endl;
p = test_libiconv2("GBK", "UTF-8//TRANSLIT", "开发者还提供了一张GeForce RTX 3090电脑的截图,");
if (p) std::cout << p << std::endl;
p = test_libiconv2("GBK", "UTF-8//TRANSLIT", "该电脑在与 英特尔 Core i9-12900K的组合下,跑出了35fps的帧速。");
if (p) std::cout << p << std::endl;
}
//***********************************************************************
// Purpose: 基于libiconv库的C++实现字符编码互转(测试用例3)
// Author: 爱看书的小沐
// Date: 2022-5-19
// Languages: C++
// Platform: Visual Studio 2017
// OS: Win10 win64
// ***********************************************************************
#include
#include "libiconv/iconv.h"
#pragma comment(lib, "libiconv.lib")
bool test_libiconv3(const char *encFrom, const char *encTo, const char *inbuf, size_t *inlen, char *outbuf, size_t *outlen)
{
/* 目的编码, TRANSLIT:遇到无法转换的字符就找相近字符替换
* IGNORE :遇到无法转换字符跳过*/
//const char *encTo = "UTF-8//IGNORE";
/* 源编码 */
//const char *encFrom = "UNICODE";
/* 获得转换句柄
*@param encTo 目标编码方式
*@param encFrom 源编码方式
*
* */
iconv_t cd = iconv_open(encTo, encFrom);
if (cd == (iconv_t)-1)
{
perror("iconv_open");
}
/* 需要转换的字符串 */
printf("in_buf=%s\n", inbuf);
/* 打印需要转换的字符串的长度 */
printf("in_len=%d\n", *inlen);
/* 由于iconv()函数会修改指针,所以要保存源指针 */
char *tmpin = (char *)inbuf;
char *tmpout = outbuf;
size_t insize = *inlen;
size_t outsize = *outlen;
size_t ret = iconv(cd, &tmpin, inlen, &tmpout, outlen);
if (ret == -1)
{
perror("iconv");
}
/* 存放转换后的字符串 */
printf("out_buf=%s\n", outbuf);
//存放转换后outbuf占用的空间
int outlen_real = outsize - (*outlen);
*outlen = outlen_real;
printf("out_len=%d\n", outlen_real);
for (int i = 0; i < outlen_real; i++)
{
//printf("%2c", outbuf[i]);
//printf("%x\n", outbuf[i]);
}
/* 关闭句柄 */
iconv_close(cd);
return outlen_real;
}
//unicode转UTF-8
bool unicode_to_utf8(const char *inbuf, size_t *inlen, char *outbuf, size_t *outlen)
{
return test_libiconv3("UCS-2LE", "UTF-8//IGNORE", inbuf, inlen, outbuf, outlen);
}
//UTF-8转unicode
bool utf8_to_unicode(const char *inbuf, size_t *inlen, char *outbuf, size_t *outlen)
{
return test_libiconv3("UTF-8", "UCS-2LE//IGNORE", inbuf, inlen, outbuf, outlen);
}
//gbk转unicode,"UCS-2LE"代表unicode小端模式
bool gbk_to_unicode(const char *inbuf, size_t *inlen, char *outbuf, size_t *outlen)
{
return test_libiconv3("gb2312", "UCS-2LE//IGNORE", inbuf, inlen, outbuf, outlen);
}
//unicode转gbk
bool unicode_to_gbk(const char *inbuf, size_t *inlen, char *outbuf, size_t *outlen)
{
return test_libiconv3("UCS-2LE", "gb2312//IGNORE", inbuf, inlen, outbuf, outlen);
}
//gbk转UTF-8
bool gbk_to_utf8(const char *inbuf, size_t *inlen, char *outbuf, size_t *outlen)
{
return test_libiconv3("GBK", "UTF-8//IGNORE", inbuf, inlen, outbuf, outlen);
}
//UTF-8转gbk
bool utf8_to_gbk(const char *inbuf, size_t *inlen, char *outbuf, size_t *outlen)
{
return test_libiconv3("UTF-8", "GBK//IGNORE", inbuf, inlen, outbuf, outlen);
}
//以十六进制打印字符串
void printChars(const char *buffer, int len)
{
for (int i = 0; i < len; i++)
{
printf("%0x,", *buffer++);
}
printf("\n");
}
测试代码如下:
int main()
{
// gbk_to_utf8
printf("\n******【gbk_to_utf8】*******\n");
char inbuf[1024] = "111螺丝中=+-()abc222";
size_t inlen = strlen(inbuf);
char outbuf[1024] = {};
size_t outlen = sizeof(outbuf);
gbk_to_utf8(inbuf, &inlen, outbuf, &outlen);
printf("result:\n%s\n", outbuf);
printChars(outbuf, outlen);
// utf8_to_unicode
printf("\n*******【utf8_to_unicode】*******\n");
inlen = outlen;
outlen = sizeof(outbuf);
memcpy(inbuf, outbuf, inlen);
memset(outbuf, 0, outlen);
utf8_to_unicode(inbuf, &inlen, outbuf, &outlen);
printf("result:\n%s\n", outbuf);
printChars(outbuf, outlen);
// unicode_to_utf8
printf("\n******【unicode_to_utf8】*******\n");
inlen = outlen;
outlen = sizeof(outbuf);
memcpy(inbuf, outbuf, inlen);
memset(outbuf, 0, outlen);
unicode_to_utf8(inbuf, &inlen, outbuf, &outlen);
printf("result:\n%s\n", outbuf);
printChars(outbuf, outlen);
// utf8_to_gbk
printf("\n******【utf8_to_gbk】*******\n");
inlen = outlen;
outlen = sizeof(outbuf);
memcpy(inbuf, outbuf, inlen);
memset(outbuf,0, outlen);
utf8_to_gbk(inbuf, &inlen, outbuf, &outlen);
printf("result:\n%s\n", outbuf);
printChars(outbuf, outlen);
// gbk_to_unicode
printf("\n*******【gbk_to_unicode】*******\n");
inlen = outlen;
outlen = sizeof(outbuf);
memcpy(inbuf, outbuf, inlen);
memset(outbuf, 0, outlen);
gbk_to_unicode(inbuf, &inlen, outbuf, &outlen);
printf("result:\n%s\n", outbuf);
printChars(outbuf, outlen);
// unicode_to_gbk
printf("\n********【unicode_to_gbk】********\n");
inlen = outlen;
outlen = sizeof(outbuf);
memcpy(inbuf, outbuf, inlen);
memset(outbuf, 0, outlen);
unicode_to_gbk(inbuf, &inlen, outbuf, &outlen);
printf("result:\n%s\n", outbuf);
printChars(outbuf, outlen);
}
libiconv的相关测试源码见如下链接:
https://download.csdn.net/download/hhy321/85419981
如果您觉得该方法或代码有一点点用处,可以给作者点个赞,或打赏杯咖啡;╮( ̄▽ ̄)╭
如果您感觉方法或代码不咋地//(ㄒoㄒ)//,就在评论处留言,作者继续改进。o_O???
如果您需要相关功能的代码定制化开发,可以留言私聊作者。(✿◡‿◡)
感谢各位童鞋们的支持!( ´ ▽´ )ノ ( ´ ▽´)っ!!!