马上就找工作了,正好认认真真把软赛复盘一下。
赛题本质上为在大规模稀疏有向图中找3-7的环路,即给定有向图G = (V,E),寻找出所有满足长度范围在[3, 7]并满足前后路径权值浮动在一定范围内(如A-B的权重为X,B-C的权重为Y,则需满足0.2 <= Y/X <= 3)的简单环路,数据满足以下条件:
要求输出满足:
赛题比较简单,同热身赛以运行时间为评价指标,主要考察算法、程序优化以及面向鲲鹏处理器的特点(Cache、多核等)进行优化。
由于赛题比较简单,所以大部分选手的算法都大同小异。主要有两个方向:
两个方向对于不同的数据集效果不同,经测验,在随机图中6+3性能优于4+3,而在完全图和菊花图中,4+3性能优于6+3。初赛的线上数据集6+3比4+3快一些,也是我所采用的算法,复赛线上数据集4+3比6+3快一些,不过相差不大。
程序主要包含以下3个部分:数据读取以及正反向图构建、多线程找环运动、结果输出,以下将是3个部分的详解以及其中用到的一些优化点讲解。
因为赛题的输入输出数据量都比较大,所以IO时间至关重要。在数据读取部分,主要利用了多线程以及数据结构上的优化。
1 数据读取
//记录所有数据
int* dataAll;
int dataAlln = 0;
char *buf = NULL;
int fd = open(testFile.c_str(),O_RDONLY);
if(fd < 0) {
cout << "open testFile error" << endl;
return false;
}
long dataSize = lseek(fd, 0, SEEK_END); //总的字符数
buf = (char *)mmap(NULL, dataSize, PROT_READ, MAP_PRIVATE, fd, 0);
dataAll = (int*)malloc(DATASIZE * 3 * sizeof(int));//存放所以数据
int* tmp = (int*)malloc(DATASIZE * sizeof(int));//存放不重复的数据
BitMap* NumMap = new BitMap();//用来记录压入状态
int u = 0, v = 0, m = 0;
int n = 0;
int tmpn = 0;
while(n < dataSize){
__builtin_prefetch(buf + 1024, 0); //数据预取
u = 0;
v = 0;
m = 0;
while(*buf != ',') u = u * 10 + (*buf++ - '0'), ++n;
++buf;
++n;
while(*buf != ',') v = v * 10 + (*buf++ - '0'), ++n;
++buf;
++n;
//\r
while(*buf >= '0') m = m * 10 + (*buf++ - '0'), ++n;
while(*buf < '0') ++buf, ++n;
//压入
*(dataAll + dataAlln++) = u;
*(dataAll + dataAlln++) = v;
*(dataAll + dataAlln++) = m;
//根据状态确定是否压入
if(!NumMap->test(u)){
*(tmp + tmpn++) = u;
NumMap->set(u);
}
if(!NumMap->test(v)){
*(tmp + tmpn++) = v;
NumMap->set(v);
}
}
这里的主要优化点有:
//bitmap
#define WORD 32
#define SHIFT 5 //移位5
#define MASK 0x1F //16进制下的31
#define MAXN 2147483647
class BitMap{
private:
int *bitmap;
public:
BitMap(){
bitmap = new int[1 + MAXN / WORD];
}
void set(int i){
bitmap[i >> SHIFT] |= (1 << (i & MASK));
}
int test(int i){
return bitmap[i >> SHIFT] & (1 << (i & MASK));
}
};
2 建立映射及数据预处理
//转字符串 第一位放长度
char *idsComma = new char[11 * DATASIZE];
//映射
unordered_map<int, int> graphmap;
//建立映射
int mapId = 0, id = 0;
graphmap.reserve(datasize);
for(int i = 0; i < tmpn; ++i){
id = tmp[i];
int2char(id,idsComma,mapId);
graphmap[id]=mapId++;
}
//多线程转换
int splitNum = dataAlln / 3;
splitNum /= 4;
int start[5] = {0, splitNum * 3, 2 * splitNum * 3, 3 * splitNum * 3, dataAlln};
vector<thread> spTreads;
for(int i = 0; i < THREADNUM; i++){
spTreads.push_back(thread(toMapId,
start[i], start[i+1]));
}
for(auto iter = spTreads.begin(); iter != spTreads.end(); iter++){
iter->join();
}
//计算一次出入度
for(int i=0;i<dataAlln;i+=3){
u=dataAll[i];
v=dataAll[i+1];
outNum[u] += 2;
inNum[v] += 2;
}
这里的主要优化点有:
3 建立正反向图
这里以反向图为例,正向图的建立方法同反向图
//反向图建图
void buildMapBack(){
#ifdef TEST
timeval tStart, cTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
//根据出入度确定地址
int graphBackIdx = 0;
for(int i = 0; i < datasize; i++){
inAddr[i] = graphBackIdx;
graphBackIdx += inNum[i];
inNum[i] = 0;
}
//设置最大节点地址
inAddr[datasize] = graphBackIdx;
int u, v, m;
int pos;
for(int i=0;i<dataAlln;i+=3){
u=dataAll[i];
v=dataAll[i+1];
m=dataAll[i+2];
pos = inAddr[v] + inNum[v];
graph_back[pos]=u;
graph_back[pos + 1]=m;
inNum[v] += 2;
}
//对反向图数据进行排序
for(int i = 0; i < datasize; ++i) {
int size = inNum[i];
if(size > 2){
int ssize = size >> 1;
vector<pair<int, int>> temp(ssize, make_pair(0, 0));
for(int vi = 0, vj = 0; vi < size, vj < ssize; vi += 2, ++vj){
temp[vj] = make_pair(graph_back[inAddr[i] + vi], graph_back[inAddr[i] + vi + 1]);
}
sort(temp.begin(), temp.end(), [](pair<int, int>&a, pair<int, int>&b)->bool{
return a.first > b.first;
});
for(int vi = 0, vj = 0; vi < size, vj < ssize; vi += 2, ++vj){
graph_back[inAddr[i] +vi] = temp[vj].first;
graph_back[inAddr[i] +vi + 1] = temp[vj].second;
}
}
}
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "buildMapBack Time: " << tPassed << endl;
#endif
}
为了提高程序效率,正反向图都采用数组来进行存储。由于预先不知道需要分配多大的空间,采用类前向星来进行紧密存储,首先根据出入度确定地址,然后依次放入映射后的节点值,最后对邻接节点进行排序,这里有一个较大的优化点为:
鲲鹏下测试,1963W环这部分耗时190ms。
找环部分为了均衡各个线程,使用了automic原子操作,将每个起始节点的查找当做一次任务,利用抢占式来争夺资源,经测试,这种方法线下线上都是比较均衡的。
inline void FindCycleThread(int sstart, int eend, int id){
#ifdef TEST
timeval tStart, cTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
int Anspos0, Anspos1, Anspos2, Anspos3, Anspos4;
for(int ys = sstart; ys < eend; ++ys){
if(flag[ys].test_and_set()) continue;
if(inNum[ys] == 0 || outNum[ys] == 0) continue;
//遍历之前 先记录地址
Anspos0 = posCount[id][0];
Anspos1 = posCount[id][1];
Anspos2 = posCount[id][2];
Anspos3 = posCount[id][3];
Anspos4 = posCount[id][4];
AnsSort[0][ys] = path[id][0] + Anspos0;
AnsSort[1][ys] = path[id][1] + Anspos1;
AnsSort[2][ys] = path[id][2] + Anspos2;
AnsSort[3][ys] = path[id][3] + Anspos3;
AnsSort[4][ys] = path[id][4] + Anspos4;
//找环开始
dfsBack(ys, id);
if(pathNode[id][0]) dfsFur(ys, id);
//置位标志位
for(int i = 1 ; i <= pathNode[id][0]; i++){
int& node = pathNode[id][i];
flagNode[id][node] = 4;
}
flagNode[id][ys] = 4;
//结束后记录长度
AnsSortCount[0][ys] = posCount[id][0] - Anspos0;
AnsSortCount[1][ys] = posCount[id][1] - Anspos1;
AnsSortCount[2][ys] = posCount[id][2] - Anspos2;
AnsSortCount[3][ys] = posCount[id][3] - Anspos3;
AnsSortCount[4][ys] = posCount[id][4] - Anspos4;
}
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "FindCycleThread thread: " << id << " time: " << tPassed << endl;
#endif
}
因为抢占式资源竞争会导致直接输出的结果是无序的,所有先用两个数组AnsSort和AnsSortCount分别记录以某个节点为起始的3-7环分别的位置以及数量,便于输出时合并。
在构建P3时,需要记录每个节点的位置(1、2、3)、哪些节点被记录以及最后一个路径权重这三个信息。
//找环的全局变量
uint8_t flagNode[THREADNUM][DATASIZE];//记录节点位置
int pathNode[THREADNUM][DATASIZE];//记录所有被记录的节点
int money[THREADNUM][DATASIZE];
//反向建图
inline void dfsBack(int start, int id){
int pathNodeCnt = 1;
int i = start;
flagNode[id][i] = 0;
int tk,tj,te;
int nomy1, nomy2, nomy3;
int k = inAddr[i];
for(;k<inAddr[i+1];k+=2){
tk = graph_back[k];
if(tk <= i) break;
nomy1 = graph_back[k + 1];
//在第一层的时候记录金额
money[id][tk] = nomy1;
flagNode[id][tk] = 1;
pathNode[id][pathNodeCnt++] =(tk);
int j = inAddr[tk];
for(;j<inAddr[tk + 1];j+=2){
tj = graph_back[j];
if(tj <= i) break;
nomy2 = graph_back[j + 1];
if(checkMoney(nomy2, nomy1)) continue;
if(flagNode[id][tj] > 2){
pathNode[id][pathNodeCnt++] = (tj);
flagNode[id][tj] = 2;
}
int e = inAddr[tj];
for(;e<inAddr[tj+1];e+=2){
te = graph_back[e];
if(te <= i) break;
if(flagNode[id][te] <= 3) continue;
nomy3 = graph_back[e + 1];
if(checkMoney(nomy3, nomy2)) continue;
//符合条件的标记 能成为1、2必然有一条符合条件的3路径
flagNode[id][te] = 3;
pathNode[id][pathNodeCnt++] = (te);
}
}
}
pathNode[id][0] = pathNodeCnt - 1;
}
这里的主要优化点有:
正向的时候,根据反向得到的信息进行剪枝。
inline void dfsFur(int start, int id){
//第一层
int i = start;
//放置每个节点
int tj,tk,tl,tm,tn,to;
//放置每个点的金额
int nomy1, nomy2,nomy3,nomy4,nomy5,nomy6,nomy7;
//找到环数的临时变量
int num = 0, posk = 0;
//第二层
int it2 = outAddr[i];
for(;it2 < outAddr[i+1]; it2 += 2){
tj = graph[it2];
if(tj <= i) break;
nomy1 = graph[it2 + 1];
//第三层
int it3 = outAddr[tj];
for(; it3 < outAddr[tj+1]; it3 += 2){
tk = graph[it3];
if(tk <= i) break;
nomy2 = graph[it3 + 1];
//金额
if(checkMoney(nomy1, nomy2)) continue;
//第四层
int it4 = outAddr[tk];
for(; it4 < outAddr[tk + 1]; it4 += 2){
tl = graph[it4];
if(tl < i) break;
nomy3 = graph[it4 + 1];
if(checkMoney(nomy2, nomy3)) continue;
else if(tl == i){
if(checkMoney(nomy3, nomy1)) continue;
//找到3环
auto& pathk = path[id][0];
posk = posCount[id][0];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
posCount[id][0] += 3;
++num;
continue;
}
else if(tl == tj) continue;
//第五层
int it5 = outAddr[tl];
for(; it5 < outAddr[tl+1]; it5+=2){
tm = graph[it5];
if(tm < i) break;
nomy4 = graph[it5 + 1];
if(flagNode[id][tm] > 3 || checkMoney(nomy3, nomy4)) continue;
else if(tm == i){
if(checkMoney(nomy4, nomy1)) continue;
//找到4环
auto& pathk = path[id][1];
posk = posCount[id][1];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
*(pathk + posk + 3) = tl;
posCount[id][1] += 4;
++num;
continue;
}
else if(tm == tj || tm == tk) continue;
//第六层
int it6 = outAddr[tm];
for(;it6 < outAddr[tm+1]; it6+=2){
tn = graph[it6];
if(tn < i) break;
nomy5 = graph[it6+1];
if(flagNode[id][tn] > 2 || checkMoney(nomy4, nomy5)) continue;
else if(tn == i){
if(checkMoney(nomy5, nomy1)) continue;
//找到5环
auto& pathk = path[id][2];
posk = posCount[id][2];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
*(pathk + posk + 3) = tl;
*(pathk + posk + 4) = tm;
posCount[id][2] += 5;
++num;
continue;
}
else if(tn == tj || tn == tk || tn == tl) continue;
//第七层
int it7 = outAddr[tn];
for(; it7 < outAddr[tn+1]; it7+=2){
to = graph[it7];
if(to < i) break;
nomy6 = graph[it7+1];
if(flagNode[id][to] > 1 || checkMoney(nomy5, nomy6)) continue;
else if(to == i){
if(checkMoney(nomy6, nomy1)) continue;
//找到6环
auto& pathk = path[id][3];
posk = posCount[id][3];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
*(pathk + posk + 3) = tl;
*(pathk + posk + 4) = tm;
*(pathk + posk + 5) = tn;
posCount[id][3] += 6;
++num;
continue;
}
else if(to == tj || to == tk || to == tl || to == tm) continue;
else{
nomy7 = money[id][to];
if(checkMoney(nomy6, nomy7) || checkMoney(nomy7, nomy1)) continue;
//找到7环
auto& pathk = path[id][4];
posk = posCount[id][4];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
*(pathk + posk + 3) = tl;
*(pathk + posk + 4) = tm;
*(pathk + posk + 5) = tn;
*(pathk + posk + 6) = to;
posCount[id][4] += 7;
++num;
}
}
}
}
}
}
}
ansCount[id] += num;
}
这里的主要优化点有:
auto& pathk = path[id][4];
posk = posCount[id][4];
由于找环中使用了原子操作,所以在输出之前首先要将答案重新排序,之后再输出。
inline int storePredict(string resultFile){
#ifdef TEST
timeval tStart, cTime, aTime, bTime, dTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
//对所有节点进行统一划分拼接
//1.先得到所有的节点 顺序节点 注意对逗号和回车的处理 空间需稍微开大 这里需要解决反向问题
int* ansNode = (int*)malloc(MAXCIRCLES * 2 * MAXCHARNUM * sizeof(int));
int len = 0, charNum = 0, depNum = 0, tmpCharNUm = 0;
int lenNum[5] = {0};
for(int i = 0; i < DEPTH; ++i){
len = i + 3;
lenNum[i] = charNum;
for(int j = 0; j < datasize; ++j){
depNum = AnsSortCount[i][j];
if(depNum == 0) continue;
auto& addrNum = AnsSort[i][j];
for(int t = 0; t < depNum; t += len){
tmpCharNUm = charNum + depNum - t - len;
for(int tt = 0; tt < len; ++tt){
ansNode[tmpCharNUm++] = *(addrNum + t + tt);
}
}
charNum += depNum;
}
}
#ifdef TEST
gettimeofday(&bTime, 0);
bTime.tv_sec -= tStart.tv_sec;
bTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * bTime.tv_sec + bTime.tv_usec;
tPassed /= 1000;
cout << "toInt Time: " << tPassed << endl;
#endif
//2.对所有节点进行划分 节点总数量charNum
int JustNode = charNum / 4;
//3.对数据进行划分
int start[5] = {0, JustNode, 2 * JustNode, 3 * JustNode, charNum};
//4.开启线程 进行转换 4线程
char* charNode[4];
for(int i = 0; i < 4; ++i) charNode[i] = (char*)malloc(MAXCIRCLES * MAXCHARNUM * 12);
int charCount[4] = {0};
vector<thread> myTreads;
for(int i = 0; i < THREADNUM; i++){
myTreads.push_back(thread(CombineChar,ansNode, start[i], start[i+1], charNode[i], i, &charCount[i], lenNum));
}
for(auto iter = myTreads.begin(); iter != myTreads.end(); iter++){
iter->join();
}
//集体写入
FILE *fp = fopen(resultFile.c_str(), "w");
char buf[32];
int resultC = 0;
for(int i = 0; i < THREADNUM; ++i) resultC += ansCount[i];
int idx = sprintf(buf,"%d\n",resultC);
buf[idx] = '\0';
//写入数量
fwrite(buf, idx , sizeof(char), fp);
for(int i = 0; i < 4; ++i) fwrite(charNode[i], charCount[i], sizeof(char), fp);
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "storePredict Time: " << tPassed << endl;
cout<< "find : "<<resultC<<endl;
#endif
}
首先根据预存的AnsSort地址信息和AnsSortCount数量信息,得到排序好的结果,然后利用多线程进行字符串转换,最后通过fwrite输出。
这里的主要优化点有:
由于赛题以程序运行时间为唯一指标,所以代码上应尽可能考虑程序的效率。这一点直接淘汰了Python和Java,导致清一色C++选手。从代码的角度来说,为了提高运行效率,可以看到,我们尽可能避免使用STL库,比如用数组去代替vector、减少使用map等耗时的数据结构,其他方面去掉类、用for循环去代替迭代、用直接判断去代替vis、用uint_8去代替int,甚至有些大佬去定制数据结构,进行内存的对齐等,均能在一定程度上提高成绩。于我而言,虽也曾有进决赛的机会,B榜同部分大佬无成绩,然收获亦是不小,继续努力,再接再厉!
全部代码如下:
#include
#include
#include
#include
#include
#include
#define TEST
//最大节点ID
#define DATASIZE 2000000
#define THREADNUM 4
#define DEPTH 5
//最大环数
#define MAXCIRCLES 20000000
// 每个环的最大int数
#define MAXCHARNUM 7
//bitmap
#define WORD 32
#define SHIFT 5 //移位5
#define MASK 0x1F //16进制下的31
#define MAXN 2147483647
using namespace std;
//数据量
int datasize;
int ansCount[THREADNUM]={0};
int posCount[THREADNUM][DEPTH]={0};
//存放出入度
int inNum[DATASIZE];
int outNum[DATASIZE];
//存放地址
int inAddr[DATASIZE];
int outAddr[DATASIZE];
//用于存答案的顺序
int* AnsSort[DEPTH][DATASIZE] = {0};
int AnsSortCount[DEPTH][DATASIZE] = {0};
//找环的全局变量
uint8_t flagNode[THREADNUM][DATASIZE];
int pathNode[THREADNUM][DATASIZE];
int money[THREADNUM][DATASIZE];
//转字符串 第一位放长度
char *idsComma = new char[11 * DATASIZE];
//记录正向图和反向图
int graph[DATASIZE * 2];
int graph_back[DATASIZE * 2];
//映射
unordered_map<int, int> graphmap;
//四线程分开 1线程
int *path13 = new int[3 * MAXCIRCLES];
int *path14 = new int[4 * MAXCIRCLES];
int *path15 = new int[5 * MAXCIRCLES];
int *path16 = new int[6 * MAXCIRCLES];
int *path17 = new int[7 * MAXCIRCLES];
//2线程
int *path23 = new int[3 * MAXCIRCLES];
int *path24 = new int[4 * MAXCIRCLES];
int *path25 = new int[5 * MAXCIRCLES];
int *path26 = new int[6 * MAXCIRCLES];
int *path27 = new int[7 * MAXCIRCLES];
//3线程
int *path33 = new int[3 * MAXCIRCLES];
int *path34 = new int[4 * MAXCIRCLES];
int *path35 = new int[5 * MAXCIRCLES];
int *path36 = new int[6 * MAXCIRCLES];
int *path37 = new int[7 * MAXCIRCLES];
//4线程
int *path43 = new int[3 * MAXCIRCLES];
int *path44 = new int[4 * MAXCIRCLES];
int *path45 = new int[5 * MAXCIRCLES];
int *path46 = new int[6 * MAXCIRCLES];
int *path47 = new int[7 * MAXCIRCLES];
int *path[4][5] = {{path13, path14, path15, path16, path17},
{path23, path24, path25, path26, path27},
{path33, path34, path35, path36, path37},
{path43, path44, path45, path46, path47}};
class BitMap{
private:
int *bitmap;
public:
BitMap(){
bitmap = new int[1 + MAXN / WORD];
}
void set(int i){
bitmap[i >> SHIFT] |= (1 << (i & MASK));
}
int test(int i){
return bitmap[i >> SHIFT] & (1 << (i & MASK));
}
};
void int2char(int v, char* s1, int p) {
int pos = 11 * p;
if(v == 0){
s1[pos + 0] = 1;
s1[pos + 1] = '0';
return;
}
int t = v;
int len = 0;
char buf[12] = "";
while(t) {
buf[len++] = t % 10 + '0';
t = t / 10;
}
for(int j = 0; j < len; j++){
s1[pos + len - j] = buf[j];
}
s1[pos + 0] = len;
return;
}
//记录所有数据
int* dataAll;
int dataAlln = 0;
void buildMapFur(){
#ifdef TEST
timeval tStart, cTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
//根据出入度确定地址
int graphIdx = 0;
for(int i = 0; i < datasize; i++){
outAddr[i] = graphIdx;
graphIdx += outNum[i];
outNum[i] = 0;
}
outAddr[datasize] = graphIdx;
int u, v, m;
int pos;
for(int i=0;i<dataAlln;i+=3){
u=dataAll[i];
v=dataAll[i+1];
m=dataAll[i+2];
pos = outAddr[u] + outNum[u];
graph[pos]=v;
graph[pos + 1]=m;
outNum[u] += 2;
}
//对正向图进行排序
for(int i = 0; i < datasize; ++i) {
int size = outNum[i];
if(size > 2){
int ssize = size >> 1;
vector<pair<int, int>> temp(ssize, make_pair(0, 0));
for(int vi = 0, vj = 0; vi < size, vj < ssize; vi += 2, ++vj){
temp[vj] = make_pair(graph[outAddr[i] + vi], graph[outAddr[i] + vi + 1]);
}
sort(temp.begin(), temp.end(), [](pair<int, int>&a, pair<int, int>&b)->bool{
return a.first > b.first;
});
for(int vi = 0, vj = 0; vi < size, vj < ssize; vi += 2, ++vj){
graph[outAddr[i] +vi] = temp[vj].first;
graph[outAddr[i] +vi + 1] = temp[vj].second;
}
}
}
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "buidMapFur Time: " << tPassed << endl;
#endif
}
//反向图建图
void buildMapBack(){
#ifdef TEST
timeval tStart, cTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
//根据出入度确定地址
int graphBackIdx = 0;
for(int i = 0; i < datasize; i++){
inAddr[i] = graphBackIdx;
graphBackIdx += inNum[i];
inNum[i] = 0;
}
//设置最大节点地址
inAddr[datasize] = graphBackIdx;
int u, v, m;
int pos;
for(int i=0;i<dataAlln;i+=3){
u=dataAll[i];
v=dataAll[i+1];
m=dataAll[i+2];
pos = inAddr[v] + inNum[v];
graph_back[pos]=u;
graph_back[pos + 1]=m;
inNum[v] += 2;
}
//对反向图数据进行排序
for(int i = 0; i < datasize; ++i) {
int size = inNum[i];
if(size > 2){
int ssize = size >> 1;
vector<pair<int, int>> temp(ssize, make_pair(0, 0));
for(int vi = 0, vj = 0; vi < size, vj < ssize; vi += 2, ++vj){
temp[vj] = make_pair(graph_back[inAddr[i] + vi], graph_back[inAddr[i] + vi + 1]);
}
sort(temp.begin(), temp.end(), [](pair<int, int>&a, pair<int, int>&b)->bool{
return a.first > b.first;
});
for(int vi = 0, vj = 0; vi < size, vj < ssize; vi += 2, ++vj){
graph_back[inAddr[i] +vi] = temp[vj].first;
graph_back[inAddr[i] +vi + 1] = temp[vj].second;
}
}
}
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "buildMapBack Time: " << tPassed << endl;
#endif
}
inline void toMapId(int sstart, int eend){
#ifdef TEST
timeval tStart, cTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
for(int i=sstart;i<eend;i += 3){
dataAll[i]=graphmap[dataAll[i]];
dataAll[i+1]=graphmap[dataAll[i+1]];
}
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "toMapId thread = " << tPassed << endl;
#endif
}
inline int loadTestData(string testFile){
#ifdef TEST
timeval tStart, cTime, aTime, bTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
char *buf = NULL;
int fd = open(testFile.c_str(),O_RDONLY);
if(fd < 0) {
cout << "open testFile error" << endl;
return false;
}
long dataSize = lseek(fd, 0, SEEK_END); //总的字符数
buf = (char *)mmap(NULL, dataSize, PROT_READ, MAP_PRIVATE, fd, 0);
dataAll = (int*)malloc(DATASIZE * 3 * sizeof(int));
int* tmp = (int*)malloc(DATASIZE * sizeof(int));
BitMap* NumMap = new BitMap();//用来记录压入状态
int u = 0, v = 0, m = 0;
int n = 0;
int tmpn = 0;
while(n < dataSize){
__builtin_prefetch(buf + 1024, 0); //数据预取
u = 0;
v = 0;
m = 0;
while(*buf != ',') u = u * 10 + (*buf++ - '0'), ++n;
++buf;
++n;
while(*buf != ',') v = v * 10 + (*buf++ - '0'), ++n;
++buf;
++n;
//\r
while(*buf >= '0') m = m * 10 + (*buf++ - '0'), ++n;
while(*buf < '0') ++buf, ++n;
//压入
*(dataAll + dataAlln++) = u;
*(dataAll + dataAlln++) = v;
*(dataAll + dataAlln++) = m;
//根据状态确定是否压入
if(!NumMap->test(u)){
*(tmp + tmpn++) = u;
NumMap->set(u);
}
if(!NumMap->test(v)){
*(tmp + tmpn++) = v;
NumMap->set(v);
}
}
sort(tmp,tmp+tmpn);
datasize=tmpn;
//建立映射
int mapId = 0, id = 0;
graphmap.reserve(datasize);
for(int i = 0; i < tmpn; ++i){
id = tmp[i];
int2char(id,idsComma,mapId);
graphmap[id]=mapId++;
}
//多线程转换
int splitNum = dataAlln / 3;
splitNum /= 4;
int start[5] = {0, splitNum * 3, 2 * splitNum * 3, 3 * splitNum * 3, dataAlln};
vector<thread> spTreads;
for(int i = 0; i < THREADNUM; i++){
spTreads.push_back(thread(toMapId,
start[i], start[i+1]));
}
for(auto iter = spTreads.begin(); iter != spTreads.end(); iter++){
iter->join();
}
//计算一次出入度
for(int i=0;i<dataAlln;i+=3){
u=dataAll[i];
v=dataAll[i+1];
outNum[u] += 2;
inNum[v] += 2;
}
//开辟线程,正反向建图
thread bulidMap1 = thread(buildMapFur);
thread bulidMap2 = thread(buildMapBack);
bulidMap1.join();
bulidMap2.join();
free(dataAll);
free(tmp);
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "loadTestData Time: " << tPassed << endl;
#endif
return 1;
}
inline bool checkMoney(long long mony2, long long mony1){
if(mony2 > 5 * mony1 || mony1 > 3 * mony2) return true;
return false;
}
//反向建图
inline void dfsBack(int start, int id){
int pathNodeCnt = 1;
int i = start;
flagNode[id][i] = 0;
int tk,tj,te;
int nomy1, nomy2, nomy3;
int k = inAddr[i];
for(;k<inAddr[i+1];k+=2){
tk = graph_back[k];
if(tk <= i) break;
nomy1 = graph_back[k + 1];
//在第一层的时候记录金额
money[id][tk] = nomy1;
flagNode[id][tk] = 1;
pathNode[id][pathNodeCnt++] =(tk);
int j = inAddr[tk];
for(;j<inAddr[tk + 1];j+=2){
tj = graph_back[j];
if(tj <= i) break;
nomy2 = graph_back[j + 1];
if(checkMoney(nomy2, nomy1)) continue;
if(flagNode[id][tj] > 2){
pathNode[id][pathNodeCnt++] = (tj);
flagNode[id][tj] = 2;
}
int e = inAddr[tj];
for(;e<inAddr[tj+1];e+=2){
te = graph_back[e];
if(te <= i) break;
if(flagNode[id][te] <= 3) continue;
nomy3 = graph_back[e + 1];
if(checkMoney(nomy3, nomy2)) continue;
//符合条件的标记 能成为1、2必然有一条符合条件的3路径
flagNode[id][te] = 3;
pathNode[id][pathNodeCnt++] = (te);
}
}
}
pathNode[id][0] = pathNodeCnt - 1;
}
inline void dfsFur(int start, int id){
//第一层
int i = start;
//放置每个节点
int tj,tk,tl,tm,tn,to;
//放置每个点的金额
int nomy1, nomy2,nomy3,nomy4,nomy5,nomy6,nomy7;
//找到环数的临时变量
int num = 0, posk = 0;
//第二层
int it2 = outAddr[i];
for(;it2 < outAddr[i+1]; it2 += 2){
tj = graph[it2];
if(tj <= i) break;
nomy1 = graph[it2 + 1];
//第三层
int it3 = outAddr[tj];
for(; it3 < outAddr[tj+1]; it3 += 2){
tk = graph[it3];
if(tk <= i) break;
nomy2 = graph[it3 + 1];
//金额
if(checkMoney(nomy1, nomy2)) continue;
//第四层
int it4 = outAddr[tk];
for(; it4 < outAddr[tk + 1]; it4 += 2){
tl = graph[it4];
if(tl < i) break;
nomy3 = graph[it4 + 1];
if(checkMoney(nomy2, nomy3)) continue;
else if(tl == i){
if(checkMoney(nomy3, nomy1)) continue;
//找到3环
auto& pathk = path[id][0];
posk = posCount[id][0];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
posCount[id][0] += 3;
++num;
continue;
}
else if(tl == tj) continue;
//第五层
int it5 = outAddr[tl];
for(; it5 < outAddr[tl+1]; it5+=2){
tm = graph[it5];
if(tm < i) break;
nomy4 = graph[it5 + 1];
if(flagNode[id][tm] > 3 || checkMoney(nomy3, nomy4)) continue;
else if(tm == i){
if(checkMoney(nomy4, nomy1)) continue;
//找到4环
auto& pathk = path[id][1];
posk = posCount[id][1];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
*(pathk + posk + 3) = tl;
posCount[id][1] += 4;
++num;
continue;
}
else if(tm == tj || tm == tk) continue;
//第六层
int it6 = outAddr[tm];
for(;it6 < outAddr[tm+1]; it6+=2){
tn = graph[it6];
if(tn < i) break;
nomy5 = graph[it6+1];
if(flagNode[id][tn] > 2 || checkMoney(nomy4, nomy5)) continue;
else if(tn == i){
if(checkMoney(nomy5, nomy1)) continue;
//找到5环
auto& pathk = path[id][2];
posk = posCount[id][2];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
*(pathk + posk + 3) = tl;
*(pathk + posk + 4) = tm;
posCount[id][2] += 5;
++num;
continue;
}
else if(tn == tj || tn == tk || tn == tl) continue;
//第七层
int it7 = outAddr[tn];
for(; it7 < outAddr[tn+1]; it7+=2){
to = graph[it7];
if(to < i) break;
nomy6 = graph[it7+1];
if(flagNode[id][to] > 1 || checkMoney(nomy5, nomy6)) continue;
else if(to == i){
if(checkMoney(nomy6, nomy1)) continue;
//找到6环
auto& pathk = path[id][3];
posk = posCount[id][3];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
*(pathk + posk + 3) = tl;
*(pathk + posk + 4) = tm;
*(pathk + posk + 5) = tn;
posCount[id][3] += 6;
++num;
continue;
}
else if(to == tj || to == tk || to == tl || to == tm) continue;
else{
nomy7 = money[id][to];
if(checkMoney(nomy6, nomy7) || checkMoney(nomy7, nomy1)) continue;
//找到7环
auto& pathk = path[id][4];
posk = posCount[id][4];
*(pathk + posk) = i;
*(pathk + posk + 1) = tj;
*(pathk + posk + 2) = tk;
*(pathk + posk + 3) = tl;
*(pathk + posk + 4) = tm;
*(pathk + posk + 5) = tn;
*(pathk + posk + 6) = to;
posCount[id][4] += 7;
++num;
}
}
}
}
}
}
}
ansCount[id] += num;
}
atomic_flag flag[DATASIZE] = {ATOMIC_FLAG_INIT};
inline void FindCycleThread(int sstart, int eend, int id){
#ifdef TEST
timeval tStart, cTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
int Anspos0, Anspos1, Anspos2, Anspos3, Anspos4;
for(int ys = sstart; ys < eend; ++ys){
if(flag[ys].test_and_set()) continue;
if(inNum[ys] == 0 || outNum[ys] == 0) continue;
//遍历之前 先记录地址
Anspos0 = posCount[id][0];
Anspos1 = posCount[id][1];
Anspos2 = posCount[id][2];
Anspos3 = posCount[id][3];
Anspos4 = posCount[id][4];
AnsSort[0][ys] = path[id][0] + Anspos0;
AnsSort[1][ys] = path[id][1] + Anspos1;
AnsSort[2][ys] = path[id][2] + Anspos2;
AnsSort[3][ys] = path[id][3] + Anspos3;
AnsSort[4][ys] = path[id][4] + Anspos4;
//找环开始
dfsBack(ys, id);
if(pathNode[id][0]) dfsFur(ys, id);
//置位标志位
for(int i = 1 ; i <= pathNode[id][0]; i++){
int& node = pathNode[id][i];
flagNode[id][node] = 4;
}
flagNode[id][ys] = 4;
//结束后记录长度
AnsSortCount[0][ys] = posCount[id][0] - Anspos0;
AnsSortCount[1][ys] = posCount[id][1] - Anspos1;
AnsSortCount[2][ys] = posCount[id][2] - Anspos2;
AnsSortCount[3][ys] = posCount[id][3] - Anspos3;
AnsSortCount[4][ys] = posCount[id][4] - Anspos4;
}
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "FindCycleThread thread: " << id << " time: " << tPassed << endl;
#endif
}
inline void FindCycle()
{
#ifdef TEST
timeval tStart, cTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
memset(flagNode, 4, THREADNUM * DATASIZE);
//初始化
vector<thread> myTreads;
for(int i = 0; i < THREADNUM; i++){
myTreads.push_back(thread(FindCycleThread,
0, datasize, i));
}
for(auto iter = myTreads.begin(); iter != myTreads.end(); iter++){
iter->join();
}
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "FindCycle Time: " << tPassed << endl;
#endif
}
//解析字符串具体实现
inline void toChar(int sstart, int eend, int* res, int len, char* buf, int* num, int myLen){
//进来后先判断长度
int tempLen = myLen;
int nodeValue = 0, nodeLen = 0, begin = 0;
//cout << *(num) << "++" << endl;
int ToCharNUm = *(num);
for(int i = sstart; i < eend; ++i){
++tempLen;
nodeValue = res[i];
begin = 11 * nodeValue;
//cout << nodeValue << endl;
nodeLen = idsComma[begin];
memcpy(buf + ToCharNUm, idsComma + begin + 1, nodeLen);
ToCharNUm += nodeLen;
//cout << ToCharNUm << endl;
buf[ToCharNUm++] = ',';
if(tempLen == len){
buf[ToCharNUm - 1] = '\n';
tempLen = 0;
}
}
*(num) = ToCharNUm;
}
inline void CombineChar(int* ansNode, int sstart, int eend, char* buf, int id, int* Num, int* lenNum){
#ifdef TEST
timeval tStart, cTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
int pos4 = lenNum[1], pos5 = lenNum[2], pos6 = lenNum[3], pos7 = lenNum[4];
int thPos = 0;
int myLen = 0;
if(sstart < pos7){
//先处理3环
if(sstart < pos4){
//两种情况
myLen = sstart % 3;
if(eend <= pos4){
toChar(sstart, eend, ansNode, 3, buf, &thPos, myLen);
sstart = -1;
}else{
toChar(sstart, pos4, ansNode, 3, buf, &thPos, myLen);
sstart = pos4;
}
}
//再处理4环
if(sstart != -1 && sstart < pos5){
myLen = (sstart - pos4) % 4;
if(eend <= pos5){
toChar(sstart, eend, ansNode, 4, buf, &thPos, myLen);
sstart = -1;
}else{
toChar(sstart, pos5, ansNode, 4, buf, &thPos, myLen);
sstart = pos5;
}
}
//再处理5环
if(sstart != -1 && sstart < pos6){
myLen = (sstart - pos5) % 5;
if(eend <= pos6){
toChar(sstart, eend, ansNode, 5, buf, &thPos, myLen);
sstart = -1;
}else{
toChar(sstart, pos6,ansNode, 5, buf, &thPos, myLen);
sstart = pos6;
}
}
//再处理6环
if(sstart != -1 && sstart < pos7){
myLen = (sstart - pos6) % 6;
if(eend <= pos7){
toChar(sstart, eend, ansNode, 6, buf, &thPos, myLen);
sstart = -1;
}else{
toChar(sstart, pos7, ansNode, 6, buf, &thPos, myLen);
sstart = pos7;
}
}
}
//最后处理7环
if(sstart != -1 && sstart >= pos7){
myLen = (sstart - pos7) % 7;
toChar(sstart, eend, ansNode, 7, buf, &thPos, myLen);
}
*(Num) = thPos;
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "CombineChar thread: " << id << " time: " << tPassed << endl;
#endif
}
inline int storePredict(string resultFile){
#ifdef TEST
timeval tStart, cTime, aTime, bTime, dTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
//对所有节点进行统一划分拼接
//1.先得到所有的节点 顺序节点 注意对逗号和回车的处理 空间需稍微开大 这里需要解决反向问题
int* ansNode = (int*)malloc(MAXCIRCLES * 2 * MAXCHARNUM * sizeof(int));
int len = 0, charNum = 0, depNum = 0, tmpCharNUm = 0;
int lenNum[5] = {0};
for(int i = 0; i < DEPTH; ++i){
len = i + 3;
lenNum[i] = charNum;
for(int j = 0; j < datasize; ++j){
depNum = AnsSortCount[i][j];
if(depNum == 0) continue;
auto& addrNum = AnsSort[i][j];
for(int t = 0; t < depNum; t += len){
tmpCharNUm = charNum + depNum - t - len;
for(int tt = 0; tt < len; ++tt){
ansNode[tmpCharNUm++] = *(addrNum + t + tt);
}
}
charNum += depNum;
}
}
#ifdef TEST
gettimeofday(&bTime, 0);
bTime.tv_sec -= tStart.tv_sec;
bTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * bTime.tv_sec + bTime.tv_usec;
tPassed /= 1000;
cout << "toInt Time: " << tPassed << endl;
#endif
//2.对所有节点进行划分 节点总数量charNum
int JustNode = charNum / 4;
//3.对数据进行划分
int start[5] = {0, JustNode, 2 * JustNode, 3 * JustNode, charNum};
//4.开启线程 进行转换 4线程
char* charNode[4];
for(int i = 0; i < 4; ++i) charNode[i] = (char*)malloc(MAXCIRCLES * MAXCHARNUM * 12);
int charCount[4] = {0};
vector<thread> myTreads;
for(int i = 0; i < THREADNUM; i++){
myTreads.push_back(thread(CombineChar,ansNode, start[i], start[i+1], charNode[i], i, &charCount[i], lenNum));
}
for(auto iter = myTreads.begin(); iter != myTreads.end(); iter++){
iter->join();
}
//集体写入
FILE *fp = fopen(resultFile.c_str(), "w");
char buf[32];
int resultC = 0;
for(int i = 0; i < THREADNUM; ++i) resultC += ansCount[i];
int idx = sprintf(buf,"%d\n",resultC);
buf[idx] = '\0';
//写入数量
fwrite(buf, idx , sizeof(char), fp);
for(int i = 0; i < 4; ++i) fwrite(charNode[i], charCount[i], sizeof(char), fp);
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "storePredict Time: " << tPassed << endl;
cout<< "find : "<<resultC<<endl;
#endif
}
int main()
{
#ifdef TEST
timeval tStart, cTime;
gettimeofday(&tStart, 0);
long long tPassed = 0;
#endif
string testFile = "test_data_1963w.txt";
//string testFile = "test_data_1890w.txt";
//string testFile = "test_data_351.txt";
//string testFile = "test_data_289.txt";
//string testFile = "test_data_28.txt";
//string testFile = "test_data_10000_60000.txt";
//string testFile = "test_data_10000_40000.txt";
//string testFile = "test_data_50000.txt";
//string testFile = "test_data.txt";
//string testFile = "test_data_Fu.txt";
string resultFile = "re515.txt";
//string testFile = "/data/test_data.txt";
//string resultFile = "/projects/student/result.txt";
loadTestData(testFile);
FindCycle();
storePredict(resultFile);
//sleep(20);
#ifdef TEST
gettimeofday(&cTime, 0);
cTime.tv_sec -= tStart.tv_sec;
cTime.tv_usec -= tStart.tv_usec;
tPassed = 1000000LL * cTime.tv_sec + cTime.tv_usec;
tPassed /= 1000;
cout << "main Time: " << tPassed << endl;
#endif
return 0;
}