阿里巴巴大数据竞赛-天池

本人菜鸟,对天池数据写点小理解。天池数据不同于一般的评分数据,有几点比较主要:天池数据存在同用户对同物品的不同操作,这和SVD相背,所以数据处理这块非常重要。又根据啊里的背景来看,会发现数据存在这样一个内部关系。购买的物品必然受到点击到购物车,那么这个时候购物车的分析意义有多大?值得商榷。同时那么多的点击次数是因为对找个商品有购买欲望?还是因为购买过来看看价格波动?同时淘宝的数据存在这样一个问题,例如我买的小零食可以出现多次购买,但是购买例如电脑、冰箱这种大物件基本存在二次购买的几率比较小。而数据经过加密处理,这种情况怎么考虑?等等。其这篇文章主要还是一贯作风,贴代码,留个纪念。效果不是很好,F1只有6.4.仅当纪念。

#include 
#include 
#include 
#include 
#include 

using namespace std;

void load(string);
void train(vector<int>,vector<int>,vector<int>,int);
void ComM(vector<int>,vector<int>,vector<int>);
void ComR(vector<int>,vector<int>,vector<int>);
void TopN(vectorint>,double> >);
double W1 = 1;
double W2 = 1;

mapint>,double> predictScore(int,vector<int>,vector<int>,int);
vector<int> U,I,R;
vector<int> MinRc,MaxRc,MaxRv,MinRv,MaxRp,MinRp,MaxRg,MinRg;
mapint>,int> RCui,RVui,RPui,RGui;
vectorint>,double> > result;


int step = 1;

int main()
{
    string trains_file = "/home/ja/CADATA/ALI/data/New/trains_tmp.dat";
    load(trains_file);
    ComM(U,I,R);


    string train_file = "/home/ja/CADATA/ALI/data/New/trains.dat";

    load(train_file);
    ComR(U,I,R);

    for(size_t i=0;i){
        train(U,I,R,W1);
    }

    U.clear();
    I.clear();
    R.clear();
    RCui.clear();
    RVui.clear();
    RPui.clear();
    RGui.clear();

    string train_file_1 = "/home/ja/CADATA/ALI/data/New/test_s.dat";

    load(train_file_1);
    ComR(U,I,R);

    for(size_t i=0;i){
        train(U,I,R,W2);
    }


    TopN(result);

    return 0;
}

void TopN(vectorint>,double> > r){
    ofstream out("/home/ja/CADATA/ALI/data/New/result/result.dat");
    for(size_t i=0;i){
        for(mapint>,double>::iterator it=r[i].begin();it!=r[i].end();++it){
            out << it->first[0] << "\t" << it->first[1] << "\t" << it->second << endl;
        }
    }
}

mapint>,double> predictScore(int user,vector<int> item,vector<int> rating,int index,int w){
    mapint>,double> TMPc,TMPv,TMPp,TMPg,Tmpresult;
    for(mapint>,int>::iterator it=RCui.begin();it!=RCui.end();++it){
        if(it->first[0] == index){
            double t = w * it->second / (MaxRc[index] * 1.0);
            //double t = it->second / (MaxRc[index] * 1.0);
            vector<int> tmp;
            tmp.push_back(index);
            tmp.push_back(it->first[1]);
            TMPc[tmp] += t;
            //cout << it->second<< "--" << MaxRc[index]<< "--" << t <
        }
    }

    for(mapint>,int>::iterator it=RVui.begin();it!=RVui.end();++it){
        if(it->first[0] == index){
            double t = w * it->second / (MaxRv[index]* 1.0);
            vector<int> tmp;
            tmp.push_back(index);
            tmp.push_back(it->first[1]);
            TMPv[tmp] += t;
        }
    }

    for(mapint>,int>::iterator it=RPui.begin();it!=RPui.end();++it){
        if(it->first[0] == index){
            double t = w * it->second / (MaxRp[index] * 1.0);
            //double t = it->second / (MaxRp[index] * 1.0);
            vector<int> tmp;
            tmp.push_back(index);
            tmp.push_back(it->first[1]);
            TMPp[tmp] += t;
        }
    }

    for(mapint>,int>::iterator it=RGui.begin();it!=RGui.end();++it){
        if(it->first[0] == index){
            double t = w * it->second / (MaxRg[index] * 1.0);
            //double t = it->second / (MaxRg[index] * 1.0);
            vector<int> tmp;
            tmp.push_back(index);
            tmp.push_back(it->first[1]);
            TMPg[tmp] += t;
        }

    }

    mapint>,double> TmpVv,TmpPp,TmpGg,TmpPpp,TmpGgg,TmpGggg;

    for(mapint>,double>::iterator it=TMPc.begin();it!=TMPc.end();++it){
        double score = it->second;
        for(mapint>,double>::iterator itt=TMPv.begin();itt!=TMPv.end();++itt){
            if(it->first[0] == itt->first[0] ){
                if(it->first[1] == itt->first[1]){
                    score += itt->second;
                }
                else{
                    vector<int> tmp;
                    tmp.push_back(itt->first[0]);
                    tmp.push_back(itt->first[1]);
                    TmpVv[tmp] = itt->second;
                    score += 0;
                }
            }
        }

        for(mapint>,double>::iterator itt=TMPp.begin();itt!=TMPp.end();++itt){
            if(it->first[0] == itt->first[0] ){
                if(it->first[1] == itt->first[1]){
                    score += itt->second;
                }
                else{
                    vector<int> tmp;
                    tmp.push_back(itt->first[0]);
                    tmp.push_back(itt->first[1]);
                    TmpPp[tmp] = itt->second;
                    score += 0;
                }
            }
        }

        for(mapint>,double>::iterator itt=TMPg.begin();itt!=TMPg.end();++itt){
            if(it->first[0] == itt->first[0] ){
                if(it->first[1] == itt->first[1]){
                    score += itt->second;
                }
                else{
                    vector<int> tmp;
                    tmp.push_back(itt->first[0]);
                    tmp.push_back(itt->first[1]);
                    TmpGg[tmp] = itt->second;
                    score += 0;
                }
            }
        }
        vector<int> tmp;
        tmp.push_back(it->first[0]);
        tmp.push_back(it->first[1]);
        Tmpresult[tmp] += score;
    }

    double size = Tmpresult.size();

    if(TmpVv.size() != 0){
        for(mapint>,double >::iterator it = TmpVv.begin();it!=TmpVv.end();++it){
            double score = it->second;
            for(mapint>,double>::iterator itt=TmpPp.begin();itt!=TmpPp.end();++itt){
                if(it->first[0] == itt->first[0]){
                    if(it->first[1] == itt->first[1]){
                        score += itt->second;
                    }
                    else{
                        vector<int> tmp;
                        tmp.push_back(itt->first[0]);
                        tmp.push_back(itt->first[1]);
                        TmpPpp[tmp] = itt->second;
                        score += 0;
                    }
                }
            }

            for(mapint>,double>::iterator itt=TmpGg.begin();itt!=TmpGg.end();++itt){
                if(it->first[0] == itt->first[0] ){
                    if(it->first[1] == itt->first[1]){
                        score += itt->second;
                    }
                    else{
                        vector<int> tmp;
                        tmp.push_back(itt->first[0]);
                        tmp.push_back(itt->first[1]);
                        TmpGgg[tmp] = itt->second;
                        score += 0;
                    }
                }
            }
            vector<int> tmp;
            tmp.push_back(it->first[0]);
            tmp.push_back(it->first[1]);
            Tmpresult[tmp] += score;
        }
    }




    if(TmpPp.size() != 0){
        for(mapint>,double >::iterator it = TmpPpp.begin();it!=TmpPpp.end();++it){
            double score = it->second;
            for(mapint>,double>::iterator itt=TmpGgg.begin();itt!=TmpGgg.end();++itt){
                if(it->first[0] == itt->first[0] ){
                    if(it->first[1] == itt->first[1]){
                        score += itt->second;
                    }
                    else{
                        vector<int> tmp;
                        tmp.push_back(itt->first[0]);
                        tmp.push_back(itt->first[1]);
                        TmpGggg[tmp] = itt->second;
                        score += 0;
                    }
                }
            }
            vector<int> tmp;
            tmp.push_back(it->first[0]);
            tmp.push_back(it->first[1]);
            Tmpresult[tmp] += score;
        }
    }






    if(TmpGggg.size() != 0){

        for(mapint>,double >::iterator it = TmpGggg.begin();it!=TmpGggg.end();++it){
            double score = it->second;
            vector<int> tmp;
            tmp.push_back(it->first[0]);
            tmp.push_back(it->first[1]);
            Tmpresult[tmp] += score;
        }
    }
/*
    if(size != result.size()){
        cout << (size - result.size()) << endl;
    }
*/
    return Tmpresult;
}

void train(vector<int> User,vector<int> Item,vector<int> Rating,int W){
    mapint>,double> Score;
    //for(size_t i=0;i
    for(size_t i=0;i<884;i++){
        Score = predictScore(User[i],Item,Rating,i,W);
        result.push_back(Score);
    }
}

void ComR(vector<int> User,vector<int> Item,vector<int> Rating){
    for(size_t i=0;i1;i++){
        if(Rating[i] == 0){
            if(User[i] == User[i+1] && Item[i] == Item[i+1]){
                vector<int> tmp;
                tmp.push_back(User[i]);
                tmp.push_back(Item[i]);
                RCui[tmp] += 1;
            }
            else{
                vector<int> tmp;
                tmp.push_back(User[i]);
                tmp.push_back(Item[i]);
                RCui[tmp] += 1;
            }
        }
        if(i == User.size()-2){
            vector<int> tmp;
            tmp.push_back(User[i+1]);
            tmp.push_back(Item[i+1]);
            RCui[tmp] += 1;
        }
    }

    for(size_t i=0;i1;i++){
        if(Rating[i] == 1){
            if(User[i] == User[i+1] && Item[i] == Item[i+1]){
                vector<int> tmp;
                tmp.push_back(User[i]);
                tmp.push_back(Item[i]);
                RVui[tmp] += 1;
            }
            else{
                vector<int> tmp;
                tmp.push_back(User[i]);
                tmp.push_back(Item[i]);
                RVui[tmp] += 1;
            }
        }
    }

    for(size_t i=0;i1;i++){
        if(Rating[i] == 2){
            if(User[i] == User[i+1] && Item[i] == Item[i+1]){
                vector<int> tmp;
                tmp.push_back(User[i]);
                tmp.push_back(Item[i]);
                RPui[tmp] += 1;
            }
            else{
                vector<int> tmp;
                tmp.push_back(User[i]);
                tmp.push_back(Item[i]);
                RPui[tmp] += 1;
            }
        }
    }

    for(size_t i=0;i1;i++){
        if(Rating[i] == 3){
            if(User[i] == User[i+1] && Item[i] == Item[i+1]){
                vector<int> tmp;
                tmp.push_back(User[i]);
                tmp.push_back(Item[i]);
                RGui[tmp] += 1;
            }
            else{
                vector<int> tmp;
                tmp.push_back(User[i]);
                tmp.push_back(Item[i]);
                RGui[tmp] += 1;
            }
        }
    }
}

void ComM(vector<int> User,vector<int> Item,vector<int> Rating){
    int max = 0;
    int num = 1;
    int min = 1000;
/*
    for(size_t un=0;un<884;un++){
        for(size_t i=0;i max){
                        max = num;
                    }
                }
                else{
                    if(num > max){
                        max = num;
                    }
                    num = 1;
                }
            }
        }
        MaxRc.push_back(max);
        num = 1;
        max = 0;
    }
    */
    for(size_t un=0;un<884;un++){
        for(size_t i=0;i1;i++){
            if(Rating[i] == 0 && User[i] == un){
                if(User[i] == User[i+1] && Rating[i] == Rating[i+1] && Item[i] == Item[i+1]){
                    num += 1;
                    if(num > max){
                        max = num;
                    }
                }
                else{
                    if(num > max){
                        max = num;
                    }
                    num = 1;
                }
            }
        }
        MaxRc.push_back(max);
        num = 1;
        max = 0;
    }

    for(size_t un=0;un<884;un++){
        for(size_t i=0;i1;i++){
            if(Rating[i] == 1 && User[i] == un){
                //cout << i << " " << Rating[i] << " " << un << endl;
                if(User[i] == User[i+1] && Rating[i] == Rating[i+1] && Item[i] == Item[i+1]){
                    num += 1;
                    if(num > max){
                        max = num;
                    }
                }
                else{
                    num = 1;
                    if(num > max){
                        max = num;
                    }
                }
            }
        }
        MaxRv.push_back(max);
        num = 1;
        max = 0;
    }

    for(size_t un=0;un<884;un++){
        for(size_t i=0;i1;i++){
            if(Rating[i] == 2 && User[i] == un){
                //cout << i << " " << Rating[i] << " " << un << endl;
                if(User[i] == User[i+1] && Rating[i] == Rating[i+1] && Item[i] == Item[i+1]){
                    num += 1;
                    if(num > max){
                        max = num;
                    }
                }
                else{
                    num = 1;
                    if(num > max){
                        max = num;
                    }
                }
            }
        }
        MaxRp.push_back(max);
        num = 1;
        max = 0;
    }

    for(size_t un=0;un<884;un++){
        for(size_t i=0;i1;i++){
            if(Rating[i] == 3 && User[i] == un){
                //cout << i << " " << Rating[i] << " " << un << endl;
                if(User[i] == User[i+1] && Rating[i] == Rating[i+1] && Item[i] == Item[i+1]){
                    num += 1;
                    if(num > max){
                        max = num;
                    }
                }
                else{
                    num = 1;
                    if(num > max){
                        max = num;
                    }
                }
            }
        }
        MaxRg.push_back(max);
        num = 1;
        max = 0;
    }

}

void load(string file){
    ifstream fin(file.c_str());
    if(!fin){
        cout << "error for fileName" << endl;
    }

    int userId,itemId,rating;
    while(fin >> userId >> itemId >> rating){
        U.push_back(userId);
        I.push_back(itemId);
        R.push_back(rating);
    }
    fin.close();
}

 

转载于:https://www.cnblogs.com/wn19910213/p/3661891.html

你可能感兴趣的:(阿里巴巴大数据竞赛-天池)