处理 无效的 utf-16 编码

const replacement = '\ufffd';
// https://gist.github.com/mathiasbynens/bbe7f870208abcfec860
const loneSurrogates = /[\uD800-\uDBFF](?![\uDC00-\uDFFF])|([^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]/g;

// string sanitizes the given str by replacing invalid UTF-16 code unit
// sequences with the unicode replacement character. Returns a new string.
//用 \ufffd 取代无效的增补字节的,无效的高位或者低位
exports.string = function(str) {
    return str.replace(loneSurrogates, '$1' + replacement);
}



原模块: https://github.com/felixge/node-unicode-sanitize/blob/master/index.js

你可能感兴趣的:(处理 无效的 utf-16 编码)