概念介绍
1.1 推荐系统
京东的热门推荐
爱奇艺的猜你喜欢,qq好友推荐,相亲网站的用户推荐,招聘网站的职业推荐等等。
1.2 Mahout介绍
2. 协同过滤实现过程
2.1 收集用户偏好数据
2.2 数据减噪与归一处理
2.3 算出相似的物品或者用户
2.4 将相似商品推荐给用户
1
2
3
4
5
6
|
CREATE TABLE `tb_item` (
`pid` bigint ( 11 ) NOT NULL AUTO_INCREMENT ,
` name ` varchar ( 2000 ) CHARACTER SET latin 1 DEFAULT NULL ,
` types ` varchar ( 2000 ) CHARACTER SET latin 1 DEFAULT NULL ,
PRIMARY KEY ( `pid` )
) ENGINE = InnoDB AUTO_INCREMENT = 65134 DEFAULT CHARSET = utf 8 ;
|
1
2
3
4
5
6
7
8
|
CREATE TABLE `user_pianhao_data 1 ` (
` id ` bigint ( 11 ) NOT NULL AUTO_INCREMENT ,
`uid` bigint ( 11 ) DEFAULT NULL ,
`pid` bigint ( 11 ) DEFAULT NULL ,
`val` bigint ( 11 ) DEFAULT NULL ,
`time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ,
PRIMARY KEY ( ` id ` )
) ENGINE = InnoDB AUTO_INCREMENT = 1001 DEFAULT CHARSET = utf 8 ;
|
3.2 搭建springboot工程
3.3 编写application.properties配置文件
01
02
03
04
05
06
07
08
09
10
11
|
#DB Configuration:
spring.datasource.driverClassName = com.mysql.jdbc.Driver
spring.datasource. url = jdbc : mysql : / / 127.0 . 0.1 : 3306 / recommend?useUnicode = true & characterEncoding = utf 8
spring.datasource.username = root
spring.datasource. password = 123456
#spring集成Mybatis环境
#pojo别名扫描包
mybatis.type - aliases - package = cn.itcast.domain
#加载Mybatis映射文件
mybatis.mapper - locations = classpath : mapper / * Mapper.xml
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
package cn.itcast.domain;
public class Item {
private Long pid;
private String name ;
private String types ;
public Long getPid ( ) {
return pid;
}
public void setPid ( Long pid ) {
this.pid = pid;
}
public String getName ( ) {
return name ;
}
public void setName ( String name ) {
this. name = name ;
}
public String getTypes ( ) {
return types ;
}
public void setTypes ( String types ) {
this. types = types ;
}
@Override
public String toString ( ) {
return "Item{" +
"pid=" + pid +
", name='" + name + '\'' +
", types='" + types + '\'' +
' } ';
}
}
|
01
02
03
04
05
06
07
08
09
10
|
package cn.itcast.dao;
import cn.itcast.domain.Item;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import java.util.List;
@Mapper
public interface ItemMapper {
public List < Item > findAllByIds ( @Param ( "Ids" ) List < Long > Ids ) ;
}
|
01
02
03
04
05
06
07
08
09
10
11
12
|
< ?xml version = "1.0" encoding = "utf-8" ? >
< !DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd" >
< mapper namespace = "cn.itcast.dao.ItemMapper" >
< select id = "findAllByIds" resultType = "item" >
select * from tb_item
WHERE pid in
< foreach collection = "Ids" item = "id" open = "(" close = ")" separator = "," >
#{id}
< / foreach >
< / select >
< / mapper >
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
|
package cn.itcast.myconfig;
import com.mysql.jdbc.jdbc 2. optional.MysqlDataSource;
import org.apache.mahout.cf.taste.impl.model.jdbc.MySQLJDBCDataModel;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.JDBCDataModel;
import org.springframework. context .annotation.Bean;
import org.springframework. context .annotation.Configuration;
@Configuration
public class MyConfig {
@Bean
public DataModel getMySQLDataModel ( ) {
MysqlDataSource dataSource = new MysqlDataSource ( ) ;
dataSource.setServerName ( "localhost" ) ;
dataSource.setUser ( "root" ) ;
dataSource.setPassword ( "123456" ) ;
dataSource.setDatabaseName ( "recommend" ) ; / / 数据库名字
/ / 参数 1 :mysql数据源信息,参数 2 :表名,参数 3 :用户列字段,参数 4 :商品列字段,参数 5 :偏好值字段,参数 6 :时间戳
JDBCDataModel dataModel = new MySQLJDBCDataModel ( dataSource , "user_pianhao_data1" , "uid" , "pid" , "val" , "time" ) ;
/ * *
* DataModel可基于数据也可基于文件
* 文件汇总数据格式
* 用户 id : : 商品 id : : 偏好分值 : : 时间戳
* 1 : : 122 : : 5 : : 838985046
* 1 : : 185 : : 5 : : 838983525
* 1 : : 231 : : 5 : : 838983392
* .........
* /
/ / File file = new File ( "E:\\initData.dat" ) ;
/ / try {
/ / DataModel dataModel = new GroupLensDataModel ( file ) ;
/ / } catch ( IOException e ) {
/ / e.printStackTrace ( ) ;
/ / }
return dataModel;
}
}
|
01
02
03
04
05
06
07
08
09
10
11
|
package cn.itcast.service;
import cn.itcast.domain.Item;
import java.util.List;
public interface RecommendService {
/ / 基于用户的商品推荐
List < Item > getRecommendItemsByUser ( Long userId , int howMany ) ;
/ / 基于内容的商品推荐
List < Item > getRecommendItemsByItem ( Long userId , Long itemId , int howMany ) ;
}
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
package cn.itcast.service.impl;
import cn.itcast.dao.ItemMapper;
import cn.itcast.domain.Item;
import cn.itcast.service.RecommendService;
import com.mysql.jdbc.jdbc 2. optional.MysqlDataSource;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.jdbc.MySQLJDBCDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.JDBCDataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
@Service
public class RecommendServiceImpl implements RecommendService {
@Autowired
private ItemMapper itemMapper;
@Autowired
private DataModel dataModel;
@Override
public List < Item > getRecommendItemsByUser ( Long userId , int howMany ) {
List < Item > list = null;
try {
/ / 计算相似度,相似度算法有很多种,采用基于皮尔逊相关性的相似度
UserSimilarity similarity = new PearsonCorrelationSimilarity ( dataModel ) ;
/ / 计算最近邻域,邻居有两种算法,基于固定数量的邻居和基于相似度的邻居,这里使用基于固定数量的邻居
UserNeighborhood userNeighborhood = new NearestNUserNeighborhood ( 100 , similarity , dataModel ) ;
/ / 构建推荐器,基于用户的协同过滤推荐
Recommender recommender = new GenericUserBasedRecommender ( dataModel , userNeighborhood , similarity ) ;
long start = System.currentTimeMillis ( ) ;
/ / 推荐商品
List < RecommendedItem > recommendedItemList = recommender.recommend ( userId , howMany ) ;
List < Long > itemIds = new ArrayList < Long > ( ) ;
for ( RecommendedItem recommendedItem : recommendedItemList ) {
System.out.println ( recommendedItem ) ;
itemIds. add ( recommendedItem.getItemID ( ) ) ;
}
System.out.println ( "推荐出来的商品id集合" + itemIds ) ;
/ / 根据商品 id 查询商品
if ( itemIds! = null & & itemIds.size ( ) > 0 ) {
list = itemMapper.findAllByIds ( itemIds ) ;
} else {
list = new ArrayList < > ( ) ;
}
System.out.println ( "推荐数量:" + list .size ( ) + "耗时:" + ( System.currentTimeMillis ( ) - start ) ) ;
} catch ( TasteException e ) {
e.printStackTrace ( ) ;
}
return list ;
}
@Override
public List < Item > getRecommendItemsByItem ( Long userId , Long itemId , int howMany ) {
List < Item > list = null;
try {
/ / 计算相似度,相似度算法有很多种,采用基于皮尔逊相关性的相似度
ItemSimilarity itemSimilarity = new PearsonCorrelationSimilarity ( dataModel ) ;
/ / 4 ) 构建推荐器,使用基于物品的协同过滤推荐
GenericItemBasedRecommender recommender = new GenericItemBasedRecommender ( dataModel , itemSimilarity ) ;
long start = System.currentTimeMillis ( ) ;
/ / 物品推荐相似度,计算两个物品同时出现的次数,次数越多任务的相似度越高。
List < RecommendedItem > recommendedItemList = recommender.recommendedBecause ( userId , itemId , howMany ) ;
/ / 打印推荐的结果
List < Long > itemIds = new ArrayList < Long > ( ) ;
for ( RecommendedItem recommendedItem : recommendedItemList ) {
System.out.println ( recommendedItem ) ;
itemIds. add ( recommendedItem.getItemID ( ) ) ;
}
System.out.println ( "推荐出来的商品id集合" + itemIds ) ;
/ / 根据商品 id 查询商品
if ( itemIds! = null & & itemIds.size ( ) > 0 ) {
list = itemMapper.findAllByIds ( itemIds ) ;
} else {
list = new ArrayList < > ( ) ;
}
System.out.println ( "推荐数量:" + list .size ( ) + "耗时:" + ( System.currentTimeMillis ( ) - start ) ) ;
} catch ( TasteException e ) {
e.printStackTrace ( ) ;
}
return list ;
}
}
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
package cn.itcast.controller;
import cn.itcast.domain.Item;
import cn.itcast.service.RecommendService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import java.util.List;
@RestController
public class RecommendController {
@Autowired
private RecommendService recommendService;
/ * *
* 基于用户的推荐
* @param userId 用户 id
* @param num 推荐数量
* @ return
* /
@RequestMapping ( "recommendByUser" )
public List < Item > getRecommendItemsByUser ( Long userId , int num ) {
List < Item > items = recommendService.getRecommendItemsByUser ( userId , num ) ;
return items ;
}
/ * *
* 基于内容的推荐
* @param userId 用户 id
* @param itemId 商品 id
* @param num 推荐数量
* @ return
* /
@RequestMapping ( "recommendByItem" )
public List < Item > getRecommendItemsByItem ( Long userId , Long itemId , int num ) {
List < Item > items = recommendService.getRecommendItemsByItem ( userId , itemId , num ) ;
return items ;
}
}
|
1
|
[ { "pid" : 50 , "name" : "Usual Suspects, The (1995)" , "types" : "Crime|Mystery|Thriller" } , { "pid" : 260 , "name" : "Star Wars: Episode IV - A New Hope (a.k.a. Star Wars) (1977)" , "types" : "Action|Adventure|Sci-Fi" } , { "pid" : 590 , "name" : "Dances with Wolves (1990)" , "types" : "Adventure|Drama|Western" } , { "pid" : 1732 , "name" : "Big Lebowski, The (1998)" , "types" : "Comedy|Crime|Mystery|Thriller" } , { "pid" : 2335 , "name" : "Waterboy, The (1998)" , "types" : "Comedy" } , { "pid" : 2478 , "name" : "Three Amigos (1986)" , "types" : "Comedy|Western" } , { "pid" : 4027 , "name" : "O Brother, Where Art Thou? (2000)" , "types" : "Adventure|Comedy|Crime" } , { "pid" : 4226 , "name" : "Memento (2000)" , "types" : "Crime|Drama|Mystery|Thriller" } , { "pid" : 5481 , "name" : "Austin Powers in Goldmember (2002)" , "types" : "Comedy" } , { "pid" : 5502 , "name" : "Signs (2002)" , "types" : "Sci-Fi|Thriller" } ]
|
1
|
[ { "pid" : 253 , "name" : "Interview with the Vampire: The Vampire Chronicles (1994)" , "types" : "Drama|Horror" } , { "pid" : 592 , "name" : "Batman (1989)" , "types" : "Action|Crime|Sci-Fi|Thriller" } ]
|