计算用户相似度:两个用户相似度就简单等同两者一致性评分次数除以两者共同评分过的条目
02data.php
<?php $data = array( 'Frank'=>array( 'Tears'=>5, 'La'=>4, 'Robinson'=>5, 'Yesterday'=>4, 'Wizard'=>5, 'Mozart'=>5, 'Bethoven'=>5, ), 'Constantine'=>array( 'Tears'=>5, 'Fiddler'=>5, 'Robinson'=>5, 'Wonderful World'=>4, 'Wizard'=>4, 'Let It Be'=>5, 'Mozart'=>5 ), 'Catherine'=>array( 'Tears'=>1, 'Robinson'=>2, 'Yesterday'=>2, 'Beethoven'=>3, 'Sunnday'=>1, 'Let It Be'=>2, ), 'David'=>array( 'Tears'=>1, 'Robinson'=>2, 'Yesterday'=>2, 'Let It Be'=>2, 'Bethoven'=>5, 'love'=>1, 'world'=>1 ), 'lei'=>array( 'love'=>1, 'Bethoven'=>5 ) ); ?>
<?php /** * * 基于相似用户的推荐 * @author lei * */ class SimilarUsers { public function estimateUserBasedRating($user, $anotherUser) { $pMatrix = array(); $sMatrix = array(); foreach ($anotherUser as $key=>$value){ $sim = $this->getSimilar($user, $value); if($sim!=0){ //遍历对相似度不为0的其它用户,并对$user没有打分的项进行预测 foreach ($value as $key2=>$value2){ if(!isset($user[$key2])){ //根据相似度对分值加权 isset($pMatrix[$key2])?($pMatrix[$key2]+=$sim*$value2):($pMatrix[$key2]=$sim*$value2); isset($sMatrix[$key2])?($sMatrix[$key2]+=$sim):($sMatrix[$key2]=$sim); } } } } foreach ($pMatrix as $key=>$value){ //以加权值与直接和之比作为预测分值 $estimatedRating[$key] = (double)$pMatrix[$key]/(double)$sMatrix[$key]; } arsort($estimatedRating); return $estimatedRating; } private function simMatrix($user1, $user2) { //假设网站打分区间1-5分 $matrix = array_fill(1, 5, array_fill(1, 5, 0)); foreach ($user1 as $key1=>$value1){ if (isset($user2[$key1])) $matrix[$value1][$user2[$key1]]++; } return $matrix; } /* * 两个用户相似度就简单等于两者一致性评分的次数除以两者共同评分过的条目 */ private function getSimilar($user1, $user2) { $sim = 0; $matrix = $this->simMatrix($user1, $user2); $total = $this->getTotalCount($matrix); $agreement = $this->getAgreementCount($matrix); if($total!=0) $sim = (double)$agreement/(double)$total; return $sim; } /* * 不同计数器辅助方法 */ private function getTotalCount($matrix) { $ratingCount = 0; foreach ($matrix as $key=>$value){ $ratingCount += array_sum($value); } return $ratingCount; } private function getAgreementCount($matrix) { $ratingCount = 0; $n = count($matrix); for($i=1; $i<=$n; $i++){ $ratingCount += $matrix[$i][$i]; } return $ratingCount; } } include_once '02data.php'; $user = $data['Frank']; unset($data['Frank']); $another = $data; $estimate = new SimilarUsers; $p = $estimate->estimateUserBasedRating($user,$data); var_dump($p); ?>