推荐系统之基于二部图的个性化推荐系统原理及C++实现

推荐系统之基于二部图的个性化推荐系统原理及C++实现

 1 #include <iostream>
 2 #include <fstream>
 3 #include <vector>
 4 #include <string>
 5 #include <vector>
 6 #include <algorithm>
 7 #include <iomanip>
 8 using namespace std;  9 
 10 //从TXT中读入数据到矩阵(二维数组)
 11 template <typename T>
 12 vector<vector<T> > txtRead(string FilePath,int row,int col)  13 {  14  ifstream input(FilePath);  15     if (!input.is_open())  16  {  17         cerr << "File is not existing, check the path: \n" <<  FilePath << endl;  18         exit(1);  19  }  20     vector<vector<T> > data(row, vector<T>(col,0));  21     for (int i = 0; i < row; ++i)  22  {  23         for (int j = 0; j < col; ++j)  24  {  25             //因为这里针对的情况是用户只给出对items的喜欢与不喜欢的情况,而movielens  26             //是一个1-5的评分数据,所以把分数达到3的看作是喜欢,标记为1,小于3的视为  27             // 不喜欢,置为0
 28             input >> data[i][j];  29             if (data[i][j] >= 3)  30                 data[i][j] = 1;  31             else
 32                 data[i][j] = 0;  33  }  34  }  35     return data;  36 }  37 
 38 //把矩阵中的数据写入TXT文件
 39 template<typename T>
 40 void txtWrite(vector<vector<T> > Matrix, string dest)  41 {  42  ofstream output(dest);  43     vector<vector<T> >::size_type row = Matrix.size();  44     vector<T>::size_type col = Matrix[0].size();  45     for (vector<vector<T> >::size_type i = 0; i < row; ++i)  46  {  47         for (vector<T>::size_type j = 0; j < col; ++j)  48  {  49             
 50             output << setprecision(3)<< Matrix[i][j] << "\t";  51  }  52         output << endl;  53  }  54 }  55 
 56 // 求两个向量的内积
 57 double InnerProduct(std::vector<double> A, std::vector<double> B)  58 {  59     double res = 0;  60     for(std::vector<double>::size_type i = 0; i < A.size(); ++i)  61  {  62         res += A[i] * B[i];  63  }  64     return res;  65 }  66 
 67 //矩阵转置操作
 68 template<typename T>//  69 vector<vector<T> > Transpose(vector<vector<T> > Matrix)  70 {  71     unsigned row = Matrix.size();  72     unsigned col = Matrix[0].size();  73     vector<vector<T> > Trans(col,vector<T>(row,0));  74     for (unsigned i = 0; i < col; ++i)  75  {  76         for (unsigned j = 0; j < row; ++j)  77  {  78             Trans[i][j] = Matrix[j][i];  79  }  80  }  81     return Trans;  82 }  83 
 84 //求一个向量中所有元素的和
 85 template<typename T>
 86 T SumVector(vector<T> vec)  87 {  88     T res = 0;  89 
 90     for (vector<T>::size_type i = 0; i < vec.size(); ++i)  91         res += vec[i];  92     return res;  93 }  94 
 95 //对一个向量中的元素进行降序排列,返回重排后的元素在原来  96 //向量中的索引
 97 bool IsBigger(double a, double b)  98 {  99     return a >= b; 100 } 101 vector<unsigned> DescendVector(vector<double> vec) 102 { 103     vector<double> tmpVec = vec; 104  sort(tmpVec.begin(), tmpVec.end(), IsBigger); 105     vector<unsigned> idx; 106     for (vector<double>::size_type i = 0; i < tmpVec.size(); ++i) 107  { 108         for (vector<double>::size_type j = 0; j < vec.size(); ++j) 109  { 110             if (tmpVec[i] == vec[j]) 111  idx.push_back(j); 112  } 113  } 114     return idx; 115 } 116 
117 
118 //基于概率传播(ProbS)的二部图的推荐函数,data是训练数据
119 vector<vector<double> > ProbS(vector<vector<double> > data) 120 { 121     auto row = data.size(); 122     auto col = data[0].size(); 123     vector<vector<double> > transData = Transpose(data); 124 
125     //第一步利用概率传播机制计算权重矩阵
126     vector<vector<double> > weights(col, vector<double>(col, 0)); 127     for (vector<double>::size_type i = 0; i < col; ++i) 128  { 129         for (vector<double>::size_type j = 0; j < col; ++j) 130  { 131             double degree = SumVector<double>(transData[j]); 132             double sum = 0; 133             for (vector<double>::size_type k = 0; k < row; ++k) 134  { 135                 sum += transData[i][k] * transData[j][k] / SumVector<double>(data[k]); 136  } 137             if (degree) 138                 weights[i][j] = sum / degree; 139  } 140  } 141 
142     //第二步利用权重矩阵和训练数据集针对每个用户对每一个item评分
143     vector<vector<double> > scores(row, vector<double>(col, 0)); 144     for (vector<double>::size_type i = 0; i < row; ++i) 145  { 146         for (vector<double>::size_type j = 0; j < col; ++j) 147  { 148             //等于0的地方代表user i 还木有评价过item j,需要预测
149             if (0 == data[i][j]) 150                 scores[i][j] = InnerProduct(weights[j],data[i]); 151  } 152  } 153     return scores; 154 } 155 
156 //计算推荐结果的命中率:推荐的items中用户确实喜欢的items数量/推荐的items数量 157 //用户确实喜欢的items是由测试集给出,length表示推荐列表最长为多少,这里将测出 158 //推荐列表长度由1已知增加到length过程中,推荐命中率的变化
159 vector<vector<double> > ComputeHitRate(vector<vector<double> > scores, vector<vector<double> > test, 160  unsigned length) 161 { 162     auto usersNum = test.size(); 163     auto itemsNum = test[0].size(); 164     
165     vector<vector<unsigned> > sortedIndex; 166     //因为只是对测试集中的用户和items进行测试,于是选取与测试集大小一样的预测数据
167     vector<vector<double> > selectedScores(usersNum, vector<double>(itemsNum,0)); 168     vector<double> line; 169     for (unsigned i = 0; i < usersNum; ++i) 170  { 171         for (unsigned j = 0; j < itemsNum; ++j) 172  { 173  line.push_back(scores[i][j]); 174  } 175  sortedIndex.push_back(DescendVector(line)); 176  line.clear(); 177  } 178     //hitRate的第一列存储推荐列表的长度,第二列存储对应的命中率
179     vector<vector<double> > hitRate(length); 180     for (unsigned k = 1; k <= length; ++k) 181  { 182         hitRate[k-1].push_back(k); 183         double Counter = 0; 184         for (unsigned i = 0; i < usersNum; ++i) 185  { 186             for (unsigned j = 0; j < k; ++j) 187  { 188                 unsigned itemIndex = sortedIndex[i][j]; 189                 if (test[i][itemIndex]) 190                     ++Counter; 191  } 192  } 193         hitRate[k-1].push_back(Counter / (k * usersNum)); 194  } 195     return hitRate; 196 } 197 int main() 198 { 199     string FilePath1("data.txt"); 200     //string FilePath2("E:\\Matlab code\\recommendation system\\data\\movielens\\test.txt");
201     
202     int row = 10; 203     int col = 10; 204     cout << "数据读取中..." << endl; 205     vector<vector<double> > train = txtRead<double>(FilePath1, row, col); 206     //vector<vector<double> > test = txtRead<double>(FilePath2, 462, 1591);
207 
208     cout << "利用二部图网络进行评分预测..." << endl; 209     vector<vector<double> > predictScores = ProbS(train); 210     txtWrite(predictScores, "predictScores.txt"); 211     /* 212  cout << "计算命中率..." << endl; 213  vector<vector<double> > hitRate = ComputeHitRate(predictScores, test, 1591); 214 215  txtWrite(hitRate, "hitRate.txt"); 216  cout << "命中率结果保存完毕!" << endl; 217 */
218     return 0; 219 }

你可能感兴趣的:(推荐系统之基于二部图的个性化推荐系统原理及C++实现)