基于皮尔逊算法的简单推荐系统

先上皮尔逊算法公式图
基于皮尔逊算法的简单推荐系统_第1张图片
image.png

流程是这样的:
1.先定义一些训练集(key-value的方式),可以是用户=购买数,电影=评分等数据;
2.针对某一个用户(例如代码中的p5),计算与其它用户的皮尔逊值,均不空才计算,空的话为0;
3.选择皮尔逊值最靠前的M个相似邻居,获取他们近7天的购买列表
4.用皮尔逊值*各个邻居购买明细,得出疑似推荐商品的分值
5.选择分值靠前的商品,再与p5的已购买商品进行去重,即可获取相关的推荐商品
代码如下:(ps:其中基于欧几里德的计算方法,有实现,但是未使用)

 public static void main(String[] args) {  
    Map> userPerfMap = new HashMap>();  
    Map pref1 = new HashMap();  
    pref1.put("A", 3);  
    pref1.put("B", 4);  
    pref1.put("C", 3);  
    pref1.put("D", 5);  
    pref1.put("E", 1);  
    pref1.put("F", 4);  
    userPerfMap.put("p1", pref1);  
    Map pref2 = new HashMap();  
    pref2.put("A", 2);  
    pref2.put("B", 4);  
    pref2.put("C", 4);  
    pref2.put("D", 5);  
    pref2.put("E", 3);  
    pref2.put("F", 2);  
    userPerfMap.put("p2", pref2);  
    Map pref3 = new HashMap();  
    pref3.put("A", 3);  
    pref3.put("B", 5);  
    pref3.put("C", 4);  
    pref3.put("D", 5);  
    pref3.put("E", 2);  
    pref3.put("F", 1);  
    userPerfMap.put("p3", pref3);  
    Map pref4 = new HashMap();  
    pref4.put("A", 2);  
    pref4.put("B", 2);  
    pref4.put("C", 3);  
    pref4.put("D", 4);  
    pref4.put("E", 3);  
    pref4.put("F", 2);  
    userPerfMap.put("p4", pref4);  
    Map pref5 = new HashMap();  
    pref5.put("A", 4);  
    pref5.put("B", 4);  
    pref5.put("C", 4);  
    pref5.put("D", 5);  
    pref5.put("E", 1);  
    pref5.put("F", 0);  
    userPerfMap.put("p5", pref5);  
    Map simUserSimMap = new HashMap();  
    String output1 = "皮尔逊相关系数:", output2 = "欧几里得距离:";  
    for (Entry> userPerfEn : userPerfMap.entrySet()) {  
        String userName = userPerfEn.getKey();  
        if (!"p5".equals(userName)) {  
        double sim = getUserSimilar(pref5, userPerfEn.getValue());  
        double distance = getEuclidDistance(pref5, userPerfEn.getValue());  
        output1 += "p5与" + userName + "之间的相关系数:" + sim + ",";  
        output2 += "p5与" + userName + "之间的距离:" + distance + ",";  
        simUserSimMap.put(userName, sim);  
        }  
    }  
    System.out.println(output1);  
    System.out.println(output2);  
    Map> simUserObjMap = new HashMap>();  
    Map pobjMap1 = new HashMap();  
    pobjMap1.put("一夜惊喜", 3);  
    pobjMap1.put("环太平洋", 4);  
    pobjMap1.put("变形金刚", 3); 
    pobjMap1.put("开心麻花", 5); 
    pobjMap1.put("完美恋人", 2); 
    simUserObjMap.put("p1", pobjMap1);  
    Map pobjMap2 = new HashMap();  
    pobjMap2.put("一夜惊喜", 5);  
    pobjMap2.put("环太平洋", 1);  
    pobjMap2.put("变形金刚", 2);  
    pobjMap2.put("开心麻花", 5);  
    pobjMap2.put("完美恋人", 2);  
    simUserObjMap.put("p2", pobjMap2);  
    Map pobjMap3 = new HashMap();  
    pobjMap3.put("一夜惊喜", 2);  
    pobjMap3.put("环太平洋", 5);  
    pobjMap3.put("变形金刚", 5); 
    pobjMap3.put("开心麻花", 2);  
    pobjMap3.put("完美恋人", 4); 
    simUserObjMap.put("p3", pobjMap3);  
    System.out.println("根据系数推荐:" + getRecommend(simUserObjMap, simUserSimMap));  
    }  
  
    /** 
     *  
     * @Description获取两个用户之间的皮尔逊相似度,相关系数的绝对值越大,相关度越大 
     */  
    public static double getUserSimilar(Map pm1, Map pm2) {  
    int n = 0;// 数量n  
    int sxy = 0;// Σxy=x1*y1+x2*y2+....xn*yn  
    int sx = 0;// Σx=x1+x2+....xn  
    int sy = 0;// Σy=y1+y2+...yn  
    int sx2 = 0;// Σx2=(x1)2+(x2)2+....(xn)2  
    int sy2 = 0;// Σy2=(y1)2+(y2)2+....(yn)2  
    for (Entry pme : pm1.entrySet()) {  
        String key = pme.getKey();  
        Integer x = pme.getValue();  
        Integer y = pm2.get(key);  
        if (x != null && y != null) {  
        n++;  
        sxy += x * y;  
        sx += x;  
        sy += y;  
        sx2 += Math.pow(x, 2);  
        sy2 += Math.pow(y, 2);  
        }  
    }  
    // p=(Σxy-Σx*Σy/n)/Math.sqrt((Σx2-(Σx)2/n)(Σy2-(Σy)2/n));  
    double sd = sxy - sx * sy / n;  
    double sm = Math.sqrt((sx2 - Math.pow(sx, 2) / n) * (sy2 - Math.pow(sy, 2) / n));  
    // Math.abs是取绝对值的意思,Math.sqrt是取平方的意思
    return Math.abs(sm == 0 ? 1 : sd / sm);  
    }  
  
    /** 
     *  
     * @Description获取两个用户之间的欧几里得距离,距离越小越好 
     *  此函数只实现并未使用,只做简单的数据输出
     */  
    public static double getEuclidDistance(Map pm1, Map pm2) {  
    double totalscore = 0.0;  
    for (Entry test : pm1.entrySet()) {  
        String key = test.getKey();  
        Integer a1 = pm1.get(key);  
        Integer b1 = pm2.get(key);  
        if (a1 != null && b1 != null) {  
        //因为只有一维,y为0,所以计算的规则为:sqrt( (x1-x2)^2+(y1-y2)^2 )
        double a = Math.pow(a1 - b1, 2);  
        totalscore += Math.abs(a);  
        }  
    }  
    return Math.sqrt(totalscore);  
    }  
  
    /** 
     *  
     * @Title getRecommend 
     * @Class testRecommend 
     * @return String 
     * @param simUserObjMap 
     * @param simUserSimMap 
     * @return 
     * @Description根据相关系数得到推荐物品 
     * @author qinshijiang 
     * @Date 2013-9-4 
     */  
    public static String getRecommend(Map> simUserObjMap, Map simUserSimMap) {  
    Map objScoreMap = new HashMap();  
    for (Entry> simUserEn : simUserObjMap.entrySet()) {  
        String user = simUserEn.getKey();  
        double sim = simUserSimMap.get(user);  
        for (Entry simObjEn : simUserEn.getValue().entrySet()) {  
        double objScore = sim * simObjEn.getValue();  
        String objName = simObjEn.getKey();  
        if (objScoreMap.get(objName) == null) {  
            objScoreMap.put(objName, objScore);  
        }else {  
            double totalScore = objScoreMap.get(objName);  
            objScoreMap.put(objName, totalScore + objScore);  
        }  
        }  
    }  
    List> enList = new ArrayList>(objScoreMap.entrySet());  
    Collections.sort(enList, new Comparator>() {  
        public int compare(Map.Entry o1, Map.Entry o2) {  
        Double a = o1.getValue() - o2.getValue();  
        if (a == 0) {  
            return 0;  
        }else if (a > 0) {  
            return 1;  
        }else {  
            return -1;  
        }  
        }  
    });  
    return enList.get(enList.size() - 1).getKey();  
    }  

你可能感兴趣的:(基于皮尔逊算法的简单推荐系统)