宽字符和netsurf 中剪贴板的实现

为了让netsurf有复制粘贴功能, 使用CF_TEXT对英文没问题,对汉字就乱码了,因此一定要使用CF_UNICODETEXT。

这里就引出几个问题:

1. netsurf内部编码是utf-8的,如何换成utf-16给windows?

2. 操作剪贴板的例程?

3. 宽字符串的操作,比如取长度?

 

前两点容易搜索到,第3点有几个坑需要注意:

1. printf的参数是char, wprintf的参数的wchar_t,因此使用宽字符函数时一定要检查全部入口参数;

2. sprintf()没有maxlen, swprintf有:
int swprintf(wchar_t *wcs, size_t maxlen,   const wchar_t *format, ...);
这个可把我折腾了一阵子。

 

贴上测试代码:

/*
wchar_t *wcs = L"xxx";
在x86机器上,编译器自动把L"xxx" 从UTF-8 转换为UCS-2LE。

不同称呼:

STD    C        Win
char   ansi     mbcs
wchar  wchar_t  WCHAR

sizeof(wchar_t) 在win上是2,在unix glibc为4.



*/
#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
#include <locale.h>

#ifdef _WIN32
#include <windows.h>
#define LC_STR "chs"
#else
#define LC_STR "zh_CN.UTF-8"
#endif

void dump_bytes(char *prefix,  void *data, int size)
{
	int i;
	unsigned char *ptr = data;
	printf("%s", prefix);
	for (i = 0; i < size; ++i){
		printf(" %02x", ptr[i]); //*(unsigned char*)data + i);
	}
	printf("\n");
}

wchar_t* char2wchar(char *mbcs, int length, int *len)
{
	#ifdef _WIN32
	int wlen = MultiByteToWideChar(CP_UTF8, 0, mbcs, length, NULL, 0);
	wchar_t* wstring = malloc(sizeof(wchar_t) * (wlen + 1));
	if (wstring == NULL) {
		return 0;
	}
	MultiByteToWideChar(CP_UTF8, 0, mbcs, length, wstring, wlen);
	if(len)*len = wlen;
	return wstring;
	#else
	return NULL; /*todo*/
	#endif
}


int main(int argc, char **argv)
{
	wchar_t *w2, *w1 = L"百度一下,你就知道";
	char *mbcs = "百度一下,你就知道";
	char buf[1024] = "";
	wchar_t wbuf[1024] = L"";
	int l1, l2;

	setlocale(LC_ALL, LC_STR);

	printf("sizeof(wchar_t) %d\n", sizeof(wchar_t));

	l1 = wcslen(w1);

	wprintf(L"wcs %ls len %d\n", w1, l1);

	
	sprintf(buf, "%ls %d %ls %s\n", w1, 123, w1, "end");
	printf("%s", buf);
	dump_bytes("L", w1, l1*2);



	w2 = char2wchar(mbcs, strlen(mbcs), &l2);
	printf("%d %ls %s\n", 123, w2, "end");
	dump_bytes("C", w2, l2*2);
	free(w2);
	
	wchar_t w3[] = {0x0057, 0x0069, 0x006b, 0x0069, 0}; //L"WiKi";
	memset(buf, 0, sizeof(buf));
	sprintf(buf, "%ls==%ls", w3, w3);
	printf("%s\n", buf);
	dump_bytes("@", buf, strlen(buf));

	memset(wbuf, 0, sizeof(wbuf));
	swprintf(wbuf, sizeof(wbuf)/sizeof(wbuf[0]), L"%ls==%ls", w3, w3);
	wprintf(L"%ls\n", wbuf);
	dump_bytes("#", wbuf, wcslen(wbuf)*2);
	

	return 0;
}
/*set fileencoding=utf-8*/


最后贴上我对netsurf的修改:

void gui_get_clipboard(char **buffer, size_t *length)
{
	HANDLE clipboard_handle;
	char *out = NULL;
	wchar_t *content = NULL; 
	int wlen, ret = -1;

	ret = OpenClipboard(input_window->main);
	clipboard_handle = GetClipboardData(CF_UNICODETEXT);
	
	if (clipboard_handle != NULL) {
		content = GlobalLock(clipboard_handle);

		wlen = wcslen(content);
		ret = utf8_convert((char*)content, wlen*sizeof(wchar_t), "UCS-2LE", "UTF-8", &out);
		if(UTF8_CONVERT_OK == ret){
			*buffer = out;
			*length = strlen(out);
		}else{
			*buffer = NULL;
			*length = 0;
		}
		
		GlobalUnlock(clipboard_handle);
	}

	CloseClipboard();
}


void gui_set_clipboard(const char *buffer, size_t length,
		nsclipboard_styles styles[], int n_styles)
{
	HANDLE h, hnew;
	wchar_t *orig, *new,  *wbuf = NULL;
	int ret1 = -1, ret2 = -1;
	size_t len2, len;

	if(!OpenClipboard(input_window->main))return;
	h = GetClipboardData(CF_UNICODETEXT);
	orig = (!h) ? L"" : GlobalLock(h);
	
	len = wcslen(orig);
	ret2 = utf8_convert(buffer, length, "UTF-8", "UCS-2LE", (char**)&wbuf);
	len2 = wcslen(wbuf);
	hnew = GlobalAlloc(GHND, (len + len2 + 1)*sizeof(wchar_t));
	
	new = (wchar_t *)GlobalLock(hnew);

	swprintf(new, (len + len2 + 1), L"%ls%ls", orig, wbuf);
	
	if(h){
		GlobalUnlock(h);
		EmptyClipboard();
	}
	GlobalUnlock(hnew);
	
	SetClipboardData(CF_UNICODETEXT, hnew);

	free(wbuf);
	CloseClipboard();
}


 

 

你可能感兴趣的:(宽字符)