PHPCMS V9 的str_cut函数相比substr可以截断UTF-8而不出现乱码,但它会将<和>都转义,从而让最终的html标记直接显示了出来,而如果用 strip_tags 预处理,也只是去掉了所有的html标记。我希望又能输出html源代码,又能正常截断。于是将str_cut修改了一下。
直接上代码,主要思路是将<和>替换为<.和.>,这样可以避免与html标记冲突。
/** * 字符截取 支持UTF8/GBK 但保留HTML格式 * @param $string * @param $length * @param $dot */ function html_cut($string, $length, $dot = '...') { $strlen = strlen($string); if($strlen <= $length) return $string; $string = str_replace( array(' ',' ', '&', '"', ''', '“', '”', '—', '<', '>', '·', '…'), array('∵',' ', '&', '"', "'", '“', '”', '—', '<.', '.>', '·', '…'), //<.和.>是为了保证不与HTML的尖括号冲突 $string); $strcut = ''; if(strtolower(CHARSET) == 'utf-8') { $length = intval($length-strlen($dot)-$length/3); $n = $tn = $noc = 0; while($n < strlen($string)) { $t = ord($string[$n]); if($t == 9 || $t == 10 || (32 <= $t && $t <= 126)) { $tn = 1; $n++; $noc++; } elseif(194 <= $t && $t <= 223) { $tn = 2; $n += 2; $noc += 2; } elseif(224 <= $t && $t <= 239) { $tn = 3; $n += 3; $noc += 2; } elseif(240 <= $t && $t <= 247) { $tn = 4; $n += 4; $noc += 2; } elseif(248 <= $t && $t <= 251) { $tn = 5; $n += 5; $noc += 2; } elseif($t == 252 || $t == 253) { $tn = 6; $n += 6; $noc += 2; } else { $n++; } if($noc >= $length) { break; } } if($noc > $length) { $n -= $tn; } if($n + 1 <= strlen($string)) { $cross_word = substr($string, $n - 1, 2); if($cross_word == '<.' || $cross_word == '.>') { $n += 1; //确保截断后包含完整的<.和.> } } $strcut = substr($string, 0, $n); $strcut = str_replace( array('∵', '&', '"', "'", '“', '”', '—', '<.', '.>', '·', '…'), array(' ', '&', '"', ''', '“', '”', '—', '<', '>', '·', '…'), $strcut); } else { $dotlen = strlen($dot); $maxi = $length - $dotlen - 1; $current_str = ''; $search_arr = array('&',' ', '"', "'", '“', '”', '—', '<.', '.>', '·', '…','∵'); $replace_arr = array('&',' ', '"', ''', '“', '”', '—', '<', '>', '·', '…',' '); $search_flip = array_flip($search_arr); for ($i = 0; $i < $maxi; $i++) { $current_str = ord($string[$i]) > 127 ? $string[$i].$string[++$i] : $string[$i]; if($i + 1 < strlen($string)) { $cross_word = substr($string, $i, 2); if($cross_word == '<.' || $cross_word == '.>') { $current_str .= $string[++$i]; //确保截断后包含完整的<.和.> } } if (in_array($current_str, $search_arr)) { $key = $search_flip[$current_str]; $current_str = str_replace($search_arr[$key], $replace_arr[$key], $current_str); } $strcut .= $current_str; } } return $strcut.$dot; }