问题:
由字符串 s1 通过下列三种操作
1、插入一个字符;
2、删除一个字符;
3、改变一个字符
变换到字符串 s2 所需要的最少操作次数(亦即最短编辑距离问题)
php代码实现如下:
<?php class levenshtein_distance { private $source; private $target; public function __construct($source, $target) { $this->source = $source; $this->target = $target; } public function get_ld() { $source = $this->source; $target = $this->target; $cell = mb_strlen($source, 'gbk'); $row = mb_strlen($target, 'gbk'); if ($cell == 0) { return $row; } if ($row == 0) { return $cell; } $matrix = array(); for ($i = 0; $i <= $row; $i++) { for ($j = 0; $j <= $cell; $j++) { if($i == 0) { $matrix[0][$j] = $j; } else if($i > 0 && $j == 0) { $matrix[$i][0] = $i; } else { $matrix[$i][$j] = 0; } } } for ($m = 0; $m < $row; $m++) { for ($n = 0; $n < $cell; $n++) { if ($source[$n] == $target[$m]) { $tmp = 0; } else { $tmp = 1; } $matrix[$m + 1][$n + 1] = min($matrix[$m][$n] + $tmp, $matrix[$m + 1][$n] + 1, $matrix[$m][$n + 1] + 1); } } return $matrix[$row][$cell]; } } if(!function_exists('mb_strlen')) { function mb_strlen($str, $charset) { $charsets["utf-8"] = $charsets["utf8"] = "/[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xff][\x80-\xbf]{3}/"; $charsets["gb2312"] = "/[\x01-\x7f]|[\xb0-\xf7][\xa0-\xfe]/"; $charsets["gbk"] = "/[\x01-\x7f]|[\x81-\xfe][\x40-\xfe]/"; $charsets["big5"] = "/[\x01-\x7f]|[\x81-\xfe]([\x40-\x7e]|\xa1-\xfe])/"; preg_match_all($charsets[$charset], $str, $matches); #按照指定编码将字符串切分成一个数组 return count($matches[0]); } } function read() { fscanf(STDIN, "%s\n", $input); $input = trim($input); return $input; } function test() { $source = 'abc'; $target = 'ab'; $ld = new levenshtein_distance($source, $target); echo '以上两个字符串的最短编辑距离为:'.$ld->get_ld(); } function main() { echo iconv('utf-8', 'gbk', "请输入源字符串\n"); $source = read(); echo iconv('utf-8', 'gbk', "请输入目标字符串\n"); $target = read(); $ld = new levenshtein_distance($source, $target); echo iconv('utf-8', 'gbk', '以上两个字符串的最短编辑距离为:'.$ld->get_ld()."\n"); } //test(); main();