PHP 搜索分词实现代码

<?php 

/**

 * @author: xiaojiang 2014-01-08

 * php 建立分词树

 * */

class Tree{



    public $w = '';

    public $subT = array();

    public $isEnd = false;

    

    public function __construct($w= '' , $isEnd = false){

        if(!empty($w)){

            $this->w = $w;

            $this->isEnd = $isEnd;

        }

    }

    public function insert( $str ){

    

        $len = strlen($str);

        if(!$len) return ;

        $scope = $this;

        for( $i = 0; $i< $len; $i++ ){

            //判断汉字

            $cStr = $str[$i];

            if( ord( $cStr ) > 127 ){

                $cStr = substr($str, $i, 3);

                $i += 2;

            }

            $scope = $scope->insertNode( $cStr );

        }

        $scope->isEnd = true;

    }

    

    private function &insertNode(  $w ){

        $t = $this->hasTree( $w );

        if( !$t ){

            $t =  new Tree( $w );

            array_push($this->subT, $t );

        }

        return $t;

    }

    

    public function &hasTree($w){

        foreach ($this->subT as $t){

            if($t->w == $w)

                return $t;

        }

        return false;

    }



}





class myStr{

    

    private $str = '';

    private $arr = array();

    private $len = 0;

    public function __construct( $str){

        $this->str = $str;

        $len = strlen($str);

        for ($i = 0; $i < $len; $i++ ){

            $cStr = $str[$i];

            if(ord($cStr) > 127){

                $cStr = substr($str, $i , 3);

                $i += 2;

            }

            array_push($this->arr, $cStr);

        }

        $this->len = count($this->arr);

    }

    

    public function getIndex( $idx ){

        return $this->arr[$idx];

    }

    

    public function getLength(){

        return $this->len;

    }

}



$tIns = new Tree();

$tIns->insert('中华');

$tIns->insert('人民');

$tIns->insert('共和国');

$tIns->insert('baidu');



$strIns = new myStr("cc中华的人民共和国和中国啊啊www.baidua.com");



for ($i = 0; $i < $strIns->getLength(); $i++ ){

    

    $j = $i;

    $curW = $strIns->getIndex($i);

    $stIns = $tIns->hasTree( $curW );

    if(!$stIns) continue;

    

    $sw = '';

    while ( $stIns ){

        $sw .= $stIns->w; 

        $_isEnd = $stIns->isEnd;

        $stIns = $stIns->hasTree( $strIns->getIndex( ++$j ) );

        if( !$stIns && !$_isEnd)

            $sw = '';

    }

    if($sw)

        echo $sw."<br>";

}





?>

 输出:

中华
人民
共和国
baidu

你可能感兴趣的:(PHP)