与同学们的合作很愉快,期待下一次合作!
/*============================================================================= # FileName: Eclat.cpp # Desc: # Author: zhuting # Email: [email protected] # HomePage: my.oschina.net/locusxt # Version: 0.0.1 # CreatTime: 2013-12-07 18:54:34 # LastChange: 2013-12-07 18:54:34 # History: =============================================================================*/ #include <cstdio> #include <cstdlib> #include <bitset> #include <string> #include <cstring> #include <time.h> #include <vector> #define item 1001 #define line 100000 #define datafile "T10I4D100K.dat" #define outputfile "r_ans.txt" #define out_put false using namespace std; int min_num = 0;/*最小要求项集出现次数*/ char outputfilename[500] = outputfile; FILE* fw = fopen(outputfilename, "w"); class mymap { public: bitset <item> it_bs;/*频繁项集*/ int it_set;/*支持度*/ bitset <line> l_bs;/*交易*/ mymap() { it_set = 0; } }; vector <mymap> mmp1; vector <mymap> mmp2; int cur_beg = 0, cur_end = -1, cur_mmp = -1; /*onemap用来记录一维的频繁项集*/ class onemap { public: int it_set;/*支持度*/ bitset <line> l_bs;/*出现在哪些交易中*/ onemap() { it_set = 0; } }; onemap om[item]; /*扫描数据库, 获取一维频繁项集*/ void input() { char filename[500] = datafile; FILE* fp = fopen(filename, "r"); char ch_temp[500]; int cur_line = 0; char ch_num[500]; int num_cur = -1; int i_temp = 0; while(cur_line < line) { ++cur_line; fscanf(fp, "%[^\n]", ch_temp); fscanf(fp, "\n"); int len = strlen(ch_temp); for (int i = 0; i < len; ++i) { if (ch_temp[i] == ' ') { ch_num[++num_cur] = '\0'; sscanf(ch_num, "%d", &i_temp); om[i_temp].l_bs[cur_line] = true; om[i_temp].it_set += 1; num_cur = -1; } else ch_num[++num_cur] = ch_temp[i]; } } for (int i = 1; i < item; ++i) { if (om[i].it_set >= min_num)/*将一维频繁项集放入mymap*/ { mymap mp; mp.it_bs[i] = true; mp.it_set = om[i].it_set; mp.l_bs = om[i].l_bs; ++cur_mmp; mmp1.push_back(mp); if (out_put) { for (int j = 0; j < item; ++j) { if (mp.it_bs[j]) fprintf(fw, "%d ", j); } fprintf(fw, ":%d\n", mp.it_set);/*输出支持度*/ } } } fclose(fp); return; } bool conmp(int a, int b, vector <mymap> & vm1, vector <mymap> & vm2) { bool afind = 0, bfind = 0; bitset <item> a_bs = vm1[a].it_bs; bitset <item> b_bs = vm1[b].it_bs; for (int i = item - 1; i >= 0; --i)/*判断两个项集是否只相差最后一个项,即是否可以生成k+1项*/ { /* 从后向前找到第一个bitset值为1的位置,并将其置0. * 如果两个项集只相差最后一个项不同,则分别置0后,两个项集应该一样*/ if (!afind && a_bs[i]) { afind = true; a_bs[i] = 0; } if (!bfind && b_bs[i]) { bfind = true; b_bs[i] = 0; } if (afind && bfind) { break; } } if (a_bs != b_bs) return false;/*分别置0操作后,若不一样,则无法生成k+1项*/ bitset <line> bs_temp = vm1[a].l_bs & vm1[b].l_bs;/*由k项生成k+1项*/ int i_temp = bs_temp.count();/*支持度*/ if (i_temp >= min_num) { mymap mp; mp.it_bs = vm1[a].it_bs | vm1[b].it_bs; mp.l_bs = bs_temp; mp.it_set = i_temp; vm2.push_back(mp); ++cur_mmp; if (out_put) { for (int j = 0; j < item; ++j) { if (mp.it_bs[j]) fprintf(fw, "%d ", j); } fprintf(fw, " : %d\n", mp.it_set); } } return true; } void connect(vector <mymap> & vm1, vector <mymap> & vm2) { for (int i = 0; i < vm1.size(); ++i) for (int j = i + 1; j < vm1.size(); ++j) { bool b_temp = conmp(i, j, vm1, vm2); if(!b_temp) break; } vm1.clear(); if(vm2.size() >= 2) connect(vm2, vm1);/*充分利用空间,两个vector交替使用*/ return; } int main() { float minsup;/*阈值,小数形式.eg.%5, 阈值为0.05*/ printf("input limit...\n"); scanf("%f", &minsup); min_num = (int)((float)(line) * minsup) + 1;/*min_num为最小要求的个数*/ if (out_put) printf("%d\n", min_num); if (out_put) printf("now load date...\n"); int start = clock();/*开始计时*/ input(); if (out_put) printf("now connect...\n"); connect(mmp1, mmp2); int finish = clock();/*结束计时*/ printf("total time: %f\n", (float)((float)(finish - start) / (float)CLOCKS_PER_SEC)); printf("%d\n", cur_mmp + 1);/*输出总的频繁项集数*/ //if(out_put) fprintf(fw, "%d\n", cur_mmp + 1); fclose(fw); return 0; }