基于用户评分Kmeans聚类的协同过滤推荐算法实现
一:基于用户评分Kmeans聚类的协同过滤推荐算法实现步骤
1、构建用户-电影评分矩阵:
public Object readFile(String fileName){
List
double[][] weight = new double[user_num][keyword_num];
Listnew ArrayList
try {
File file = getFile(fileName);
FileReader fr = new FileReader(file);
BufferedReader br = new BufferedReader(fr);
String line = "";
while (br.ready()) {
line = br.readLine();
String[] data = line.split(" ");
String[] str = data[1].split(";");
user.add(data[0]);
for (int i = 0; i < str.length; i++) {
String[] s = str[i].split(":");
weight[Integer.parseInt(data[0])-1][Integer.parseInt(s[0])-1] = Double.parseDouble(s[1]);
}
}
obj.add(user);
obj.add(weight);
br.close();
} catch (Exception e) {
e.printStackTrace();
}
return obj;
}
2、根据用户评分聚类:
public class GenerateGroup implements Base{
private List
private List
public static List
public GenerateGroup(List
players = list;
initPlayers = new ArrayList
clusterHeart = new ArrayList
for (int i = 0; i < KMeans; i++) {
initPlayers.add(players.get(i));
}
}
public GenerateGroup(){
super();
}
public List
List
boolean centerchange = true;
while(centerchange){//指导聚类中心不再改变,跳出循环
centerchange = false;
for (int i = 0; i < KMeans; i++) {
results[i] = new ArrayList
}
for(int i=0;i<players.size();i++){//循环每个用户,找出每个用户与聚类中心的距离
User p = players.get(i);
double[] dists = new double[KMeans];
for(int j=0;j<initPlayers.size();j++){
User initP = initPlayers.get(j);
double dist = distance(initP, p);
dists[j] = dist;
}
int dist_index = computOrder(dists);//找出距离最小的用户的下标
results[dist_index].add(p);
}
for(int i=0;i<KMeans;i++){//判断新旧聚类中心是否一样,不一样则创建新聚类中心
User player_new = findNewCenter(results[i]);
User player_old = initPlayers.get(i);
if (!IsPlayerEqual(player_new, player_old)) {
centerchange = true;
initPlayers.set(i, player_new);
clusterHeart.clear();
}else{
clusterHeart.add(player_new);//保存簇心
}
}
}
return results;
}
//比较新旧聚类中心是否相等
public boolean IsPlayerEqual(User p1, User p2) {
if (p1 == p2) {
return true;
}
if (p1 == null || p2 == null) {
return false;
}
boolean flag = true;
double[] s1=p1.getWeights();
double[] s2=p2.getWeights();
for (int i = 0; i < s2.length; i++) {
if(s1[i]!=s2[i]){
flag = false;
break;
}
}
return flag;
}
//找出新的聚类中心
public User findNewCenter(List
User t = new User();
if (ps == null || ps.size() == 0) {
return t;
}
double[] ds= new double[ps.get(0).getWeights().length];
for (int i = 0; i < ps.get(0).getWeights().length; i++) {
for (int j = 0; j < ps.size(); j++) {
ds[i]+= ps.get(j).getWeights()[i];
}
}
for (int i = 0; i < ps.get(0).getWeights().length; i++) {
ds[i]=ds[i]/ps.size();
}
t.setWeights(ds);
return t;
}
//比较距离,找出最小距离下标
public int computOrder(double[] dists) {
double min = 0;
int index = 0;
for (int i = 0; i < dists.length - 1; i++) {
double dist0 = dists[i];
if (i == 0) {
min = dist0;
index = 0;
}
double dist1 = dists[i + 1];
if (min > dist1) {
min = dist1;
index = i + 1;
}
}
return index;
}
//判断距离,欧几里得算法,最快
public double distance(User p0,User p1){
double dis = 0;
try{
double[] s1 = p0.getWeights();
double[] s2 = p1.getWeights();
for (int i = 0; i < s2.length; i++) {
dis+=Math.pow(s1[i]-s2[i],2);
}
}catch(Exception exception){}
return Math.sqrt(dis);
}
}
3、计算用户之间的相似度:
public double[] generateSimilarityMatrix2(String userId,List
List
for (int i = 0; i < list.size(); i++) {
user.add(list.get(i).getUserId());
}
double[] similarityMatrix = new double[user.size()];
for (int i = 0; i < user.size(); i++) {//循环核心用户
if(user.get(i).equals(userId)){
similarityMatrix[i]=1;
continue;
}
similarityMatrix[i] = new ComputeSimilarity().computeSimilarity(weight[user.indexOf(userId)], weight[user.indexOf(user.get(i))]);
}
return similarityMatrix;
}
4、获取最近邻和计算推荐结果:
public Listdouble[] similarityMatrix,double[][] weight,String userId,List
String[] userIds = new String[list.size()];
for(int i=0;i
userIds[i] = list.get(i);
}
double[] similarity = new double[similarityMatrix.length];
for(int i=0;i
similarity[i] = similarityMatrix[i];
}
for(int i=0;i
for(int j=0;j
if(similarity[j]
double temp = similarity[j];
similarity[j] = similarity[j+1];
similarity[j+1] = temp;
String tag = userIds[j];
userIds[j] = userIds[j+1];
userIds[j+1] = tag;
}
}
}
int n = 0;
for(int i=0;i
if(similarity[i]==0.0)
break;
n++;
}
int num = n>NUM?NUM:n;
List
List
List
for(int i=0;i
for(int j=0;j
if(weight[Integer.parseInt(userId)-1][j]==0.0&&weight[Integer.parseInt(userIds[i])-1][j]!=0.0){
if(list_user_temp.size()==0||!list_user_temp.contains(j)){
list_user_temp.add(j);
list_simi_sum.add(similarity[i]);
list_simi_weight_sum.add(similarity[i]*weight[Integer.parseInt(userIds[i])-1][j]);
}else{
int index = list_user_temp.indexOf(j);
double d1 = list_simi_sum.get(index);
double d2 = list_simi_weight_sum.get(index);
list_simi_sum.set(index, d1+similarity[i]);
list_simi_weight_sum.set(index, d2+similarity[i]*weight[Integer.parseInt(userIds[i])-1][j]);
}
}
}
}
List
for(int i=0;i
list_result.add(list_simi_sum.get(i)!=0.0?list_simi_weight_sum.get(i)/list_simi_sum.get(i):0);
}
Object[] obj = list_result.toArray();
Object[] obj2 = list_user_temp.toArray();
for(int i=0;i
for(int j=0;j
if((Double)obj[j]<(Double)obj[j+1]){
Object o = obj[j];
obj[j] = obj[j+1];
obj[j+1] = o;
o = obj2[j];
obj2[j] = obj2[j+1];
obj2[j+1] = o;
}
}
}
Listnew ArrayList
result.add(obj);
result.add(obj2);
result.add(similarity);
result.add(userIds);
result.add(num);
return result;
}
二:推荐结果:
1、聚类结果:
===========类别1================
1
===========类别2================
2 95 193 288 306 404
===========类别3================
3 11 12 13 14 15 16 17 18 19
20 21 22 23 24 25 26 27 28 29
30 31 32 34 35 36 37 38 39 40
41 42 43 44 45 46 47 48 49 50
52 53 54 55 56 57 58 59 60 61
62 63 64 65 66 67 68 69 70 71
72 73 74 76 77 78 79 80 81 82
83 84 85 86 87 88 89 90 91 92
93 94 96 97 98 100 101 102 103 104
105 106 107 108 109 110 111 112 113 114
115 116 117 118 119 120 121 122 123 124
125 126 127 129 130 131 132 133 134 135
136 138 139 140 141 142 143 144 145 146
147 148 149 150 151 152 153 154 155 156
157 158 159 160 161 162 163 164 165 166
167 168 169 170 171 172 173 174 175 176
177 178 179 180 181 182 183 184 185 186
187 188 189 190 191 192 194 195 196 198
199 200 201 202 203 204 205 206 207 208
209 210 211 212 213 214 215 216 217 218
219 220 221 222 223 224 225 226 227 228
229 230 231 232 233 234 235 236 237 238
239 240 241 242 243 244 245 246 247 248
249 250 251 252 253 254 255 256 257 258
259 260 261 262 263 264 265 266 267 268
269 270 271 272 273 274 275 276 277 278
279 280 281 282 283 284 285 286 287 290
291 292 293 294 295 296 297 298 300 301
302 303 304 305 307 308 309 310 311 312
313 314 315 316 317 318 319 320 321 322
323 324 325 326 327 328 329 330 331 333
334 335 336 337 338 339 340 341 342 343
344 345 346 347 348 349 350 351 352 353
354 355 356 357 358 359 360 361 362 363
364 365 366 367 368 369 370 371 372 373
374 375 376 377 378 379 380 381 382 383
384 385 386 387 388 389 390 391 392 393
394 395 396 397 398 399 400 401 402 403
405 406 407 408 409 410 411 412 413 414
415 416 417 418 419 420 421 422 423 424
425 426 427 428 429 430 431 432 433 434
435 436 437 438 439 440 441 442 443 444
445 446 447 448 449 450 451 452 453 454
455 456 457 458 459 460 461 462 463 464
465 466 467 468 469 470 471 472 474 475
476 477 478 479 480 481 482 483 484 485
486 488 489 490 491 492 493 494 495 496
497 498 499 500
===========类别4================
4 51 137 197
===========类别5================
5 99 128 289 299
===========类别6================
6 332
===========类别7================
7
===========类别8================
8 33 75 473 487
===========类别9================
9
===========类别10================
10
2、最近邻:
===============TOP-N 10个==============
478:0.3177413723944363 499:0.3156693955485105 177:0.31544323919777684 226:0.31313536250109436 22:0.3106645329420879
342:0.31016327270390476 470:0.3099875760697812 414:0.3097300678691507 464:0.30873879229693146 143:0.3084047430145349
3、推荐结果:
================推荐关键字====================
568 预测权重:0.815 880 预测权重:0.775 350 预测权重:0.720 1399 预测权重:0.716 954 预测权重:0.626
1386 预测权重:0.607 343 预测权重:0.575 1173 预测权重:0.559 417 预测权重:0.529 1412 预测权重:0.526
471 预测权重:0.525 1733 预测权重:0.518 1677 预测权重:0.515 662 预测权重:0.493 73 预测权重:0.408
1289 预测权重:0.393 282 预测权重:0.382 283 预测权重:0.330 594 预测权重:0.327 437 预测权重:0.266
79 预测权重:0.262 761 预测权重:0.262 1322 预测权重:0.258 738 预测权重:0.251 1892 预测权重:0.247
1787 预测权重:0.242 280 预测权重:0.238 577 预测权重:0.234 1732 预测权重:0.231 373 预测权重:0.227
1757 预测权重:0.211 911 预测权重:0.193 1462 预测权重:0.189 1631 预测权重:0.177 843 预测权重:0.175
129 预测权重:0.175 1526 预测权重:0.168 962 预测权重:0.160 1662 预测权重:0.158 752 预测权重:0.142
488 预测权重:0.137 848 预测权重:0.135 1640 预测权重:0.134 631 预测权重:0.103 675 预测权重:0.103
983 预测权重:0.090 4 预测权重:0.089 862 预测权重:0.077 1063 预测权重:0.065 1026 预测权重:0.053
885 预测权重:0.048 719 预测权重:0.046 1539 预测权重:0.038 1361 预测权重:0.020
项目源代码:https://download.csdn.net/download/u011291472/11967809