再给出评价预测好坏的计算RMSE的头文件
1 #ifndef EVALUATE_H 2 #define EVALUATE_H 3 #include <cmath> 4 #include <vector> 5 6 double ComputeRMSE(vector<vector<double> > predict, vector<vector<double> > test) 7 { 8 int Counter = 0; 9 double sum = 0; 10 for (vector<vector<double> >::size_type i = 0; i < test.size(); ++i) 11 { 12 for (vector<double>::size_type j = 0; j < test[0].size(); ++j) 13 { 14 if (predict[i][j] && test[i][j]) 15 { 16 ++Counter; 17 sum += pow((test[i][j] - predict[i][j]), 2); 18 } 19 } 20 } 21 return sqrt(sum / Counter); 22 } 23 24 #endif 最后给出主函数: 1 #include "load.h" 2 #include "evaluate.h" 3 #include <vector> 4 #include <string> 5 #include <cmath> 6 #include <assert.h> 7 using namespace std; 8 9 double norm(vector<double> A) 10 { 11 double res = 0; 12 for(vector<double>::size_type i = 0; i < A.size(); ++i) 13 { 14 res += pow(A[i], 2); 15 } 16 return sqrt(res); 17 } 18 19 double InnerProduct(vector<double> A, vector<double> B) 20 { 21 double res = 0; 22 for(vector<double>::size_type i = 0; i < A.size(); ++i) 23 { 24 res += A[i] * B[i]; 25 } 26 return res; 27 } 28 29 double ComputeSim(vector<double> A, vector<double> B, int method) 30 { 31 switch (method) 32 { 33 case 0://欧氏距离 34 { 35 vector<double> C; 36 for(vector<double>::size_type i = 0; i < A.size(); ++i) 37 { 38 C.push_back((A[i] - B[i])); 39 } 40 return 1 / (1 + norm(C)); 41 break; 42 } 43 case 1://皮尔逊相关系数 44 { 45 double A_mean = 0; 46 double B_mean = 0; 47 for(vector<double>::size_type i = 0; i < A.size(); ++i) 48 { 49 A_mean += A[i]; 50 B_mean += B[i]; 51 } 52 A_mean /= A.size(); 53 B_mean /= B.size(); 54 vector<double> C(A); 55 vector<double> D(B); 56 for(vector<double>::size_type i = 0; i < A.size(); ++i) 57 { 58 C[i] = A[i] - A_mean; 59 D[i] = B[i] - B_mean; 60 } 61 assert(norm(C) * norm(D)); 62 return InnerProduct(C,D) / (norm(C) * norm(D)); 63 break; 64 } 65 case 2: 66 { 67 assert(norm(A) * norm(B)); 68 return InnerProduct(A,B) / (norm(A) * norm(B)); 69 break; 70 } 71 default: 72 { 73 cout << " Choose method:" << endl; 74 cout << "0:欧氏距离\n1:皮尔逊相关系数\n2:余弦相似度\n"; 75 return -1; 76 } 77 } 78 79 } 80 81 void FindCommon(vector<double> A, vector<double> B, vector<double> &C, vector<double> &D) 82 { 83 for(vector<double>::size_type i = 0; i < A.size(); ++i) 84 { 85 if (A[i] && B[i]) 86 { 87 C.push_back(A[i]); 88 D.push_back(B[i]); 89 } 90 } 91 } 92 93 94 vector<vector<double> > UserBasedCF(vector<vector<double> > train, int usersNum, int itemsNum) 95 { 96 vector<vector<double> > predict(usersNum, vector<double>(itemsNum, 0)); 97 for (int i = 0; i < usersNum; ++i) //对每个用户进行预测 98 { 99 //找出user i未评分的item j,预测user i 对item j的评分 100 for (int j = 0; j < itemsNum; ++j) 101 { 102 103 104 if (train[i][j]) 105 continue; 106 //如果item j没有被user i评过分,找出对 item j评过分的用户 107 else 108 { 109 vector<double> sim; 110 vector<double> historyScores; 111 for (int k = 0; k < usersNum; ++k) 112 { 113 //如果user k对item j 评过分,计算user k与user i的相似度 114 115 if (train[k][j])//找出对item j 评过分的user k 116 { 117 // 为了计算user k与user i的相似度,必须找出二者共同评过分的items 118 // 把二者对共同评过分的items的评分分别存储在两个vector中 119 vector<double> commonA,commonB; 120 FindCommon(train[i], train[k], commonA, commonB); 121 //如果二者存在共同评过分的items,计算相似度 122 if (!commonA.empty()) 123 { 124 sim.push_back(ComputeSim(commonA, commonB, 2)); 125 // 把user k对item j 的历史评分记录下来 126 historyScores.push_back(train[k][j]); 127 } 128 } 129 130 } 131 // 计算出所有与user i存在共同评过分的items的users与user i之间的相似度, 132 // 保存在sim中,这些users对目标items j(即user i没有评过分)的历史评分记 133 // 录在historyScores中。利用这两个vector,计算出相似度加权平均分作为预 134 // 测user i对item j的评分 135 double SimSum = 0; 136 if (!sim.empty()) 137 { 138 for(vector<double>::size_type m = 0; m < sim.size(); ++m) 139 { 140 SimSum += sim[m]; 141 } 142 predict[i][j] = InnerProduct(sim, historyScores) / (SimSum); 143 cout << "User "<< i << " 对第 " << j << " 个Item的评分为 " << predict[i][j] << endl; 144 } 145 } 146 } 147 } 148 return predict; 149 } 150 151 int main() 152 { 153 string FilePath1("E:\\Matlab code\\recommendation system\\data\\movielens\\train.txt"); 154 string FilePath2("E:\\Matlab code\\recommendation system\\data\\movielens\\test.txt"); 155 156 int row = 943; 157 int col = 1682; 158 vector<vector<double> > train = txtRead<double>(FilePath1, row, col); 159 vector<vector<double> > predict = UserBasedCF(train, row, col); 160 txtWrite(predict, "predict.txt"); 161 vector<vector<double> > test = txtRead<double>(FilePath2, 462, 1591); 162 double rmse = ComputeRMSE(predict,test); 163 cout << "RMSE is " << rmse <<endl; 164 return 0; 165 } |
4.运行
由于程序没有优化,循环比较多,时间比较长,程序没写好,如果读者有兴趣帮我优化,请联系我,多谢,欢迎有兴趣的可以自己构造一个小点的数据集试一试,以前我用这个数据在Matlab中运行的RMSE是1左右,所以如果读者运行结果得到测试集上的RMSE是0.9-1.3之间问题应该不大,如果偏离太多,程序设计可能就有问题。