为了让netsurf有复制粘贴功能, 使用CF_TEXT对英文没问题,对汉字就乱码了,因此一定要使用CF_UNICODETEXT。
这里就引出几个问题:
1. netsurf内部编码是utf-8的,如何换成utf-16给windows?
2. 操作剪贴板的例程?
3. 宽字符串的操作,比如取长度?
前两点容易搜索到,第3点有几个坑需要注意:
1. printf的参数是char, wprintf的参数的wchar_t,因此使用宽字符函数时一定要检查全部入口参数;
2. sprintf()没有maxlen, swprintf有:
int swprintf(wchar_t *wcs, size_t maxlen, const wchar_t *format, ...);
这个可把我折腾了一阵子。
贴上测试代码:
/* wchar_t *wcs = L"xxx"; 在x86机器上,编译器自动把L"xxx" 从UTF-8 转换为UCS-2LE。 不同称呼: STD C Win char ansi mbcs wchar wchar_t WCHAR sizeof(wchar_t) 在win上是2,在unix glibc为4. */ #include <stdio.h> #include <stdlib.h> #include <wchar.h> #include <locale.h> #ifdef _WIN32 #include <windows.h> #define LC_STR "chs" #else #define LC_STR "zh_CN.UTF-8" #endif void dump_bytes(char *prefix, void *data, int size) { int i; unsigned char *ptr = data; printf("%s", prefix); for (i = 0; i < size; ++i){ printf(" %02x", ptr[i]); //*(unsigned char*)data + i); } printf("\n"); } wchar_t* char2wchar(char *mbcs, int length, int *len) { #ifdef _WIN32 int wlen = MultiByteToWideChar(CP_UTF8, 0, mbcs, length, NULL, 0); wchar_t* wstring = malloc(sizeof(wchar_t) * (wlen + 1)); if (wstring == NULL) { return 0; } MultiByteToWideChar(CP_UTF8, 0, mbcs, length, wstring, wlen); if(len)*len = wlen; return wstring; #else return NULL; /*todo*/ #endif } int main(int argc, char **argv) { wchar_t *w2, *w1 = L"百度一下,你就知道"; char *mbcs = "百度一下,你就知道"; char buf[1024] = ""; wchar_t wbuf[1024] = L""; int l1, l2; setlocale(LC_ALL, LC_STR); printf("sizeof(wchar_t) %d\n", sizeof(wchar_t)); l1 = wcslen(w1); wprintf(L"wcs %ls len %d\n", w1, l1); sprintf(buf, "%ls %d %ls %s\n", w1, 123, w1, "end"); printf("%s", buf); dump_bytes("L", w1, l1*2); w2 = char2wchar(mbcs, strlen(mbcs), &l2); printf("%d %ls %s\n", 123, w2, "end"); dump_bytes("C", w2, l2*2); free(w2); wchar_t w3[] = {0x0057, 0x0069, 0x006b, 0x0069, 0}; //L"WiKi"; memset(buf, 0, sizeof(buf)); sprintf(buf, "%ls==%ls", w3, w3); printf("%s\n", buf); dump_bytes("@", buf, strlen(buf)); memset(wbuf, 0, sizeof(wbuf)); swprintf(wbuf, sizeof(wbuf)/sizeof(wbuf[0]), L"%ls==%ls", w3, w3); wprintf(L"%ls\n", wbuf); dump_bytes("#", wbuf, wcslen(wbuf)*2); return 0; } /*set fileencoding=utf-8*/
最后贴上我对netsurf的修改:
void gui_get_clipboard(char **buffer, size_t *length) { HANDLE clipboard_handle; char *out = NULL; wchar_t *content = NULL; int wlen, ret = -1; ret = OpenClipboard(input_window->main); clipboard_handle = GetClipboardData(CF_UNICODETEXT); if (clipboard_handle != NULL) { content = GlobalLock(clipboard_handle); wlen = wcslen(content); ret = utf8_convert((char*)content, wlen*sizeof(wchar_t), "UCS-2LE", "UTF-8", &out); if(UTF8_CONVERT_OK == ret){ *buffer = out; *length = strlen(out); }else{ *buffer = NULL; *length = 0; } GlobalUnlock(clipboard_handle); } CloseClipboard(); } void gui_set_clipboard(const char *buffer, size_t length, nsclipboard_styles styles[], int n_styles) { HANDLE h, hnew; wchar_t *orig, *new, *wbuf = NULL; int ret1 = -1, ret2 = -1; size_t len2, len; if(!OpenClipboard(input_window->main))return; h = GetClipboardData(CF_UNICODETEXT); orig = (!h) ? L"" : GlobalLock(h); len = wcslen(orig); ret2 = utf8_convert(buffer, length, "UTF-8", "UCS-2LE", (char**)&wbuf); len2 = wcslen(wbuf); hnew = GlobalAlloc(GHND, (len + len2 + 1)*sizeof(wchar_t)); new = (wchar_t *)GlobalLock(hnew); swprintf(new, (len + len2 + 1), L"%ls%ls", orig, wbuf); if(h){ GlobalUnlock(h); EmptyClipboard(); } GlobalUnlock(hnew); SetClipboardData(CF_UNICODETEXT, hnew); free(wbuf); CloseClipboard(); }