php实现简单的基于DFA算法的敏感词过滤

最近一个学Go语言朋友,给我发了一篇文章http://blog.csdn.net/chenssy/article/details/26961957,讲的是使用DFA(即Deterministic Finite Automaton,有穷自动机)算法实现敏感词的过滤。问我能不能使用php来实现,感谢chenssy的精彩文章,下面是我仿照版本的php实现。

2018-05-11新增:
这个做了增强版—增强版GitHub源码



header("Content-type:text/html; charset=utf-8");

class MyMap
{
    public function get($key)
    {
        return isset($this->$key) ? $this->$key : null;
    }

    public function put($key, $value)
    {
        $this->$key = $value;
    }
}

class MyFilter
{
    public $map = null;

    public function addWordToMap($word)
    {
        $len = mb_strlen($word);
        if (is_null($this->map)) {
            $map = new MyMap();
            $map->put('isEnd', 0);
        } else {
            $map = $this->map;
        }
        $tmp = $map;

        for ($i = 0; $i < $len; $i++) {
            $nowWord = mb_substr($word, $i, 1);

            $nowMap = $map->get($nowWord);

            if (!is_null($nowMap)) {
                $map = $nowMap;
            } else {
                $newMap = new MyMap();
                $newMap->put('isEnd', 0);
                $map->put($nowWord, $newMap);
                $map = $newMap;
            }

            if ($i == ($len - 1)) {
                $map->put('isEnd', 1);
            }
        }
        $this->map = $tmp;
    }

    //仅支持最大匹配
    public function searchFromMap($string)
    {
        $len = mb_strlen($string);
        $tmp = $this->map;
        $map = $this->map;
        $str = '';
        $result = [];
        for ($i = 0; $i < $len; $i++) {
            $nowWord = mb_substr($string, $i, 1);
            $nowMap = $map->get($nowWord);

            if (!is_null($nowMap)) {
                $str .= $nowWord;
                if ($nowMap->get('isEnd')) {
                    array_push($result, $str);
                    $str = '';
                    $map = $tmp;
                } else {
                    $map = $nowMap;
                }
            } else {
                if (!empty($str)) {
                    $i--;
                }   
                $str = '';
                $map = $tmp;
            }
        }
        return $result;
    }
}

$example = new MyFilter();

$example->addWordToMap('中国人');
$example->addWordToMap('中国男人');
$example->addWordToMap('女人');


$result = $example->searchFromMap('我是中国人,我爱中国,中国男人是最优秀的,中国女人是最漂亮的');
//var_dump($example->map);
var_dump($result);

个人博客地址
github源码地址

你可能感兴趣的:(PHP,数据结构与算法,程序设计练习,php,DFA算法,敏感词过滤)