先上皮尔逊算法公式图
流程是这样的:
1.先定义一些训练集(key-value的方式),可以是用户=购买数,电影=评分等数据;
2.针对某一个用户(例如代码中的p5),计算与其它用户的皮尔逊值,均不空才计算,空的话为0;
3.选择皮尔逊值最靠前的M个相似邻居,获取他们近7天的购买列表
4.用皮尔逊值*各个邻居购买明细,得出疑似推荐商品的分值
5.选择分值靠前的商品,再与p5的已购买商品进行去重,即可获取相关的推荐商品
代码如下:(ps:其中基于欧几里德的计算方法,有实现,但是未使用)
public static void main(String[] args) {
Map> userPerfMap = new HashMap>();
Map pref1 = new HashMap();
pref1.put("A", 3);
pref1.put("B", 4);
pref1.put("C", 3);
pref1.put("D", 5);
pref1.put("E", 1);
pref1.put("F", 4);
userPerfMap.put("p1", pref1);
Map pref2 = new HashMap();
pref2.put("A", 2);
pref2.put("B", 4);
pref2.put("C", 4);
pref2.put("D", 5);
pref2.put("E", 3);
pref2.put("F", 2);
userPerfMap.put("p2", pref2);
Map pref3 = new HashMap();
pref3.put("A", 3);
pref3.put("B", 5);
pref3.put("C", 4);
pref3.put("D", 5);
pref3.put("E", 2);
pref3.put("F", 1);
userPerfMap.put("p3", pref3);
Map pref4 = new HashMap();
pref4.put("A", 2);
pref4.put("B", 2);
pref4.put("C", 3);
pref4.put("D", 4);
pref4.put("E", 3);
pref4.put("F", 2);
userPerfMap.put("p4", pref4);
Map pref5 = new HashMap();
pref5.put("A", 4);
pref5.put("B", 4);
pref5.put("C", 4);
pref5.put("D", 5);
pref5.put("E", 1);
pref5.put("F", 0);
userPerfMap.put("p5", pref5);
Map simUserSimMap = new HashMap();
String output1 = "皮尔逊相关系数:", output2 = "欧几里得距离:";
for (Entry> userPerfEn : userPerfMap.entrySet()) {
String userName = userPerfEn.getKey();
if (!"p5".equals(userName)) {
double sim = getUserSimilar(pref5, userPerfEn.getValue());
double distance = getEuclidDistance(pref5, userPerfEn.getValue());
output1 += "p5与" + userName + "之间的相关系数:" + sim + ",";
output2 += "p5与" + userName + "之间的距离:" + distance + ",";
simUserSimMap.put(userName, sim);
}
}
System.out.println(output1);
System.out.println(output2);
Map> simUserObjMap = new HashMap>();
Map pobjMap1 = new HashMap();
pobjMap1.put("一夜惊喜", 3);
pobjMap1.put("环太平洋", 4);
pobjMap1.put("变形金刚", 3);
pobjMap1.put("开心麻花", 5);
pobjMap1.put("完美恋人", 2);
simUserObjMap.put("p1", pobjMap1);
Map pobjMap2 = new HashMap();
pobjMap2.put("一夜惊喜", 5);
pobjMap2.put("环太平洋", 1);
pobjMap2.put("变形金刚", 2);
pobjMap2.put("开心麻花", 5);
pobjMap2.put("完美恋人", 2);
simUserObjMap.put("p2", pobjMap2);
Map pobjMap3 = new HashMap();
pobjMap3.put("一夜惊喜", 2);
pobjMap3.put("环太平洋", 5);
pobjMap3.put("变形金刚", 5);
pobjMap3.put("开心麻花", 2);
pobjMap3.put("完美恋人", 4);
simUserObjMap.put("p3", pobjMap3);
System.out.println("根据系数推荐:" + getRecommend(simUserObjMap, simUserSimMap));
}
/**
*
* @Description获取两个用户之间的皮尔逊相似度,相关系数的绝对值越大,相关度越大
*/
public static double getUserSimilar(Map pm1, Map pm2) {
int n = 0;// 数量n
int sxy = 0;// Σxy=x1*y1+x2*y2+....xn*yn
int sx = 0;// Σx=x1+x2+....xn
int sy = 0;// Σy=y1+y2+...yn
int sx2 = 0;// Σx2=(x1)2+(x2)2+....(xn)2
int sy2 = 0;// Σy2=(y1)2+(y2)2+....(yn)2
for (Entry pme : pm1.entrySet()) {
String key = pme.getKey();
Integer x = pme.getValue();
Integer y = pm2.get(key);
if (x != null && y != null) {
n++;
sxy += x * y;
sx += x;
sy += y;
sx2 += Math.pow(x, 2);
sy2 += Math.pow(y, 2);
}
}
// p=(Σxy-Σx*Σy/n)/Math.sqrt((Σx2-(Σx)2/n)(Σy2-(Σy)2/n));
double sd = sxy - sx * sy / n;
double sm = Math.sqrt((sx2 - Math.pow(sx, 2) / n) * (sy2 - Math.pow(sy, 2) / n));
// Math.abs是取绝对值的意思,Math.sqrt是取平方的意思
return Math.abs(sm == 0 ? 1 : sd / sm);
}
/**
*
* @Description获取两个用户之间的欧几里得距离,距离越小越好
* 此函数只实现并未使用,只做简单的数据输出
*/
public static double getEuclidDistance(Map pm1, Map pm2) {
double totalscore = 0.0;
for (Entry test : pm1.entrySet()) {
String key = test.getKey();
Integer a1 = pm1.get(key);
Integer b1 = pm2.get(key);
if (a1 != null && b1 != null) {
//因为只有一维,y为0,所以计算的规则为:sqrt( (x1-x2)^2+(y1-y2)^2 )
double a = Math.pow(a1 - b1, 2);
totalscore += Math.abs(a);
}
}
return Math.sqrt(totalscore);
}
/**
*
* @Title getRecommend
* @Class testRecommend
* @return String
* @param simUserObjMap
* @param simUserSimMap
* @return
* @Description根据相关系数得到推荐物品
* @author qinshijiang
* @Date 2013-9-4
*/
public static String getRecommend(Map> simUserObjMap, Map simUserSimMap) {
Map objScoreMap = new HashMap();
for (Entry> simUserEn : simUserObjMap.entrySet()) {
String user = simUserEn.getKey();
double sim = simUserSimMap.get(user);
for (Entry simObjEn : simUserEn.getValue().entrySet()) {
double objScore = sim * simObjEn.getValue();
String objName = simObjEn.getKey();
if (objScoreMap.get(objName) == null) {
objScoreMap.put(objName, objScore);
}else {
double totalScore = objScoreMap.get(objName);
objScoreMap.put(objName, totalScore + objScore);
}
}
}
List> enList = new ArrayList>(objScoreMap.entrySet());
Collections.sort(enList, new Comparator>() {
public int compare(Map.Entry o1, Map.Entry o2) {
Double a = o1.getValue() - o2.getValue();
if (a == 0) {
return 0;
}else if (a > 0) {
return 1;
}else {
return -1;
}
}
});
return enList.get(enList.size() - 1).getKey();
}