package com.panguoyuan.mahout.itemcf; import java.io.File; import java.io.IOException; import java.util.List; import org.apache.mahout.cf.taste.common.TasteException; import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood; import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity; import org.apache.mahout.cf.taste.model.DataModel; import org.apache.mahout.cf.taste.recommender.RecommendedItem; import org.apache.mahout.cf.taste.recommender.Recommender; import org.apache.mahout.cf.taste.similarity.UserSimilarity; public class UserCF { final static int NEIGHBORHOOD_NUM = 2; final static int RECOMMENDER_NUM = 3; public static void main(String[] args) throws IOException, TasteException { String file = "inputdata/item.csv"; DataModel model = new FileDataModel(new File(file)); UserSimilarity user = new EuclideanDistanceSimilarity(model); NearestNUserNeighborhood neighbor = new NearestNUserNeighborhood(NEIGHBORHOOD_NUM, user, model); Recommender r = new GenericUserBasedRecommender(model, neighbor, user); LongPrimitiveIterator iter = model.getUserIDs(); while (iter.hasNext()) { long uid = iter.nextLong(); List
list = r.recommend(uid, RECOMMENDER_NUM); System.out.printf("uid:%s", uid); for (RecommendedItem ritem : list) { System.out.printf("(%s,%f)", ritem.getItemID(), ritem.getValue()); } System.out.println(); } } }
package com.panguoyuan.mahout.itemcf;
import java.io.File;
import java.util.List;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
public class BasedUserBookRecommender2 {
public static void main(String[] args) throws Exception {
long userId = 188;
//构建数据模型
DataModel model = new FileDataModel(new File("inputdata/rating.csv"));
//创建相似度
UserSimilarity itemSimilarity = new PearsonCorrelationSimilarity(model);
//UserSimilarity itemSimilarity = new EuclideanDistanceSimilarity(model);
//GenericUserSimilarity genericItemSimilarity = new GenericUserSimilarity(itemSimilarity, model);
//构建近邻算法
UserNeighborhood neighborhood = new NearestNUserNeighborhood(3, itemSimilarity, model);
//构建推荐模型
UserBasedRecommender userBasedRecommender = new GenericUserBasedRecommender(model, neighborhood, itemSimilarity);
//计算并返回图书推荐结果
List recommendations = userBasedRecommender.recommend(188, 5);
//打印推荐结果
showItems(userId, recommendations, true);
}
public static void showItems(long uid,
List recommendations, boolean skip) {
if (skip || recommendations.size() > 0) {
System.out.printf("userId:%s,", uid);
for (RecommendedItem r : recommendations) {
System.out.printf("(%s,%f)", r.getItemID(), r.getValue());
}
System.out.println();
}
}
}
4、输出结果
userId:188,(885,9.500000)(396,7.000000)(688,6.000000)
5、用R语言对推荐结果进行人工分析
ratings=read.csv("F:\workspace1\mahout\inputdata\rating.csv",FALSE)
users=read.csv("F:\workspace1\mahout\inputdata\user.csv",FALSE)
(2)修改列名
ratings=data.frame('userid'=ratings$V1,'bookid'=ratings$V2,'grade'=ratings$V3)
users=data.frame('userid'=users$V1,'sex'=users$V2,'age'=users$V3)
(3)查看用户188都看了哪些书
> ratings[c(ratings$userid==188),]
userid bookid grade
3760 188 798 6
3761 188 653 3
3762 188 426 6
3763 188 742 7
3764 188 549 2
3765 188 520 8
3766 188 312 2
3767 188 213 10
3768 188 954 5
3769 188 121 10
3770 188 204 9
3771 188 684 3
3772 188 493 4
3773 188 452 1
3774 188 622 3
3775 188 298 8
(4)图书885推荐分数最高,下面查看该图书有哪些人评过分
ratings[c(ratings$bookid==885),]
userid bookid grade
182 9 885 8
1225 60 885 10
3691 184 885 9
(5)查看这用户9,用户60,用户184,用户188的信息
> users[c(9,60,184,188),] userid sex age
9 9 M 50
60 60 F 49
184 184 M 27
188 188 F 24
(6)查看这用户9,用户60,用户184与用户188都共同看了哪些图书
> rating188=ratings[which(ratings$userid==188),]
> rating9=ratings[which(ratings$userid==9),]
> rating60=ratings[which(ratings$userid==60),]
> rating184=ratings[which(ratings$userid==184),]
> intersect(rating188$bookid,rating9$bookid)
integer(0)
> intersect(rating188$bookid,rating60$bookid)
[1] 312 298
> intersect(rating188$bookid,rating184$bookid)
[1] 121 684
从上面可以看出用户188与用户60共同看了312和298这两本书,与用户184共同看了121和684这两本书,他们都有共同的偏好,所以给用户188推荐图书885是合理的。
package com.panguoyuan.mahout.itemcf;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
public class BookFilterGenderRecommender3 {
public static void main(String[] args) throws Exception {
DataModel model = new FileDataModel(new File("inputdata/rating.csv"));
ItemSimilarity otherSimilarity = new EuclideanDistanceSimilarity(model);
GenericItemSimilarity similarity = new GenericItemSimilarity(otherSimilarity, model);
ItemBasedRecommender recommender = new GenericItemBasedRecommender(model, similarity);
filterRecommender(188, recommender, model);
}
public static void showItems(long uid,List recommendations, boolean skip) {
if (skip || recommendations.size() > 0) {
System.out.printf("userId:%s,", uid);
for (RecommendedItem r : recommendations) {
System.out.printf("Item:(%s,%f)", r.getItemID(), r.getValue());
System.out.println();
}
}
}
/**
* 对用户性别进行过滤
*/
public static void filterRecommender(long uid, ItemBasedRecommender recommender, DataModel dataModel) throws TasteException, IOException {
Set userids = getMale("datafile/book/user.csv");
//计算男性用户打分过的图书
Set bookids = new HashSet();
for (long uids : userids) {
LongPrimitiveIterator iter = dataModel.getItemIDsFromUser(uids).iterator();
while (iter.hasNext()) {
long bookid = iter.next();
bookids.add(bookid);
}
}
IDRescorer rescorer = new FilterRescorer(bookids);
List list = recommender.recommend(uid, 10, rescorer);
showItems(uid, list, false);
}
/**
* 返回所有男性id
*/
public static Set getMale(String file) throws IOException {
BufferedReader br = new BufferedReader(new FileReader(new File(file)));
Set userids = new HashSet();
String s = null;
while ((s = br.readLine()) != null) {
String[] cols = s.split(",");
if (cols[1].equals("M")) {
userids.add(Long.parseLong(cols[0]));
}
}
br.close();
return userids;
}
}
/**
* 对结果重计算
*/
class FilterRescorer implements IDRescorer {
final private Set userids;
public FilterRescorer(Set userids) {
this.userids = userids;
}
@Override
public double rescore(long id, double originalScore) {
return isFiltered(id) ? Double.NaN : originalScore;
}
@Override
public boolean isFiltered(long id) {
return userids.contains(id);
}
}
3、打印推荐结果
SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.
userId:188,Item:(365,8.800000)
Item:(725,8.583333)
Item:(427,8.000000)
Item:(403,7.987013)
Item:(734,7.676371)
Item:(256,7.533333)
Item:(300,7.428571)
Item:(743,7.333333)
Item:(356,6.875000)
Item:(579,6.777778)
4、人工对数据进行分析
(1)查看图书365都有哪些用户评过分
> ratings[c(ratings$bookid==365),]
userid bookid grade
1046 51 365 9
2206 111 365 9
2632 134 365 4
> users[c(51,111,134),]
userid sex age
51 51 F 18
111 111 F 40
134 134 F 74
(2)利用intersect函数把用户188与25,45,65这三个用户共同评分过的图书汇集出来
说明:intersect(A,B)是一个数据框都在A和B这些行
>rating188=ratings[which(ratings$userid==188),]
>rating51=ratings[which(ratings$userid==51),]
>rating111=ratings[which(ratings$userid==111),]
>rating134=ratings[which(ratings$userid==134),]
> intersect(rating188$bookid,rating51$bookid)
integer(0)> intersect(rating188$bookid,rating134$bookid)
[1] 204
> intersect(rating188$bookid,rating111$bookid)
[1] 742
(3)从上面可以看出用户188与用户134共同看了204图书,与111共同看了742图书
> rating188
userid bookid grade
3760 188 798 6
3761 188 653 3
3762 188 426 6
3763 188 742 7
3764 188 549 2
3765 188 520 8
3766 188 312 2
3767 188 213 10
3768 188 954 5
3769 188 121 10
3770 188 204 9
3771 188 684 3
3772 188 493 4
3773 188 452 1
3774 188 622 3
3775 188 298 8
综上所述把图书365推荐给用户188是合理的。