题目大意:
给出一个长度不超过10^5的字符串求其所有子串当中字典序排行第K(K <= 10^5)的那个
此题对于多个子串相同的视为不同子串,即起点不同也视为不同
大致思路:
刚开始想的是后缀数组的解法, 首先对于这个字符串求后缀数组之后, 利用得到的sa数组的字典序, 因为这里的相同子串要多次计数, 所以对于后缀sa[i]和其他串的LCP也要多次计
于是计数稍微有点麻烦, 需要对每次连续的公共前缀多次计数, 细节见代码
另外是学长做的优先队列的写法:
首先建立优先队列, 其中的元素有两个关键字,分别是当前子串和其后续位置
按照子串的字典序排列之后, 每次抛出一个元素就是字典序最小的, 然后抛出的元素的串加上后续位置后插入优先队列, 继续抛出
第K个出队列的元素的第一关键字即为第K小子串
后缀数组解法:
Result : Accepted Memory : 3600 KB Time : 62 ms
/* * Author: Gatevin * Created Time: 2015/2/14 19:36:27 * File Name: Mononobe_Mitsuki.cpp */ #include<iostream> #include<sstream> #include<fstream> #include<vector> #include<list> #include<deque> #include<queue> #include<stack> #include<map> #include<set> #include<bitset> #include<algorithm> #include<cstdio> #include<cstdlib> #include<cstring> #include<cctype> #include<cmath> #include<ctime> #include<iomanip> using namespace std; const double eps(1e-8); typedef long long lint; #define maxn 100010 int wa[maxn], wb[maxn], wv[maxn], Ws[maxn]; int cmp(int *r, int a, int b, int l) { return r[a] == r[b] && r[a + l] == r[b + l]; } void da(int *r, int *sa, int n, int m) { int *x = wa, *y = wb, *t, i, j, p; for(i = 0; i < m; i++) Ws[i] = 0; for(i = 0; i < n; i++) Ws[x[i] = r[i]]++; for(i = 1; i < m; i++) Ws[i] += Ws[i - 1]; for(i = n - 1; i >= 0; i--) sa[--Ws[x[i]]] = i; for(j = 1, p = 1; p < n; j *= 2, m = p) { for(p = 0, i = n - j; i < n; i++) y[p++] = i; for(i = 0; i < n; i++) if(sa[i] >= j) y[p++] = sa[i] - j; for(i = 0; i < n; i++) wv[i] = x[y[i]]; for(i = 0; i < m; i++) Ws[i] = 0; for(i = 0; i < n; i++) Ws[wv[i]]++; for(i = 1; i < m; i++) Ws[i] += Ws[i - 1]; for(i = n - 1; i >= 0; i--) sa[--Ws[wv[i]]] = y[i]; for(t = x, x = y, y = t, p = 1, x[sa[0]] = 0, i = 1; i < n; i++) x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++; } return; } int rank[maxn], height[maxn]; void calheight(int *r, int *sa, int n) { int i, j, k = 0; for(i = 1; i <= n; i++) rank[sa[i]] = i; for(i = 0; i < n; height[rank[i++]] = k) for(k ? k-- : 0, j = sa[rank[i] - 1]; r[i + k] == r[j + k]; k++); return; } char in[maxn]; int s[maxn], sa[maxn]; int K; int a[maxn]; void findK(int n)//寻找第K小的子串 { memset(a, 0, sizeof(a)); int r = 1; while(K) { a[r]++; if(a[r] > n - sa[r])//后缀sa[r]已经不能提供下一个子串 { r++; continue; } K--; /* * 由于后缀sa[r]提供了长度为a[r]的子串, 所有相同子串需要扫描一遍 * 这里height[j] >= a[r]利用到了RMQ的性质,说明可以提供此长度的LCP */ for(int j = r + 1; j <= n && height[j] >= a[r] && K; j++) a[j]++, K--; } for(int i = 0; i < a[r]; i++)//输出第K子串 printf("%c", in[sa[r] + i]); printf("\n"); return; } int main() { scanf("%s%d", in, &K); int n = strlen(in); lint kinds = (lint)n*((lint)n + 1LL)/2LL; if(kinds < K)//子串总数做比较 { printf("No such line.\n"); return 0; } for(int i = 0; i < n; i++) s[i] = in[i] - 'a' + 1; s[n] = 0; da(s, sa, n + 1, 27); calheight(s, sa, n); findK(n); return 0; }
优先队列的解法:
Result : Accepted Memory : 3868 KB Time : 560 ms
/* * Author: Gatevin * Created Time: 2015/2/14 18:57:03 * File Name: Mononobe_Mitsuki.cpp */ #include<iostream> #include<sstream> #include<fstream> #include<vector> #include<list> #include<deque> #include<queue> #include<stack> #include<map> #include<set> #include<bitset> #include<algorithm> #include<cstdio> #include<cstdlib> #include<cstring> #include<cctype> #include<cmath> #include<ctime> #include<iomanip> using namespace std; const double eps(1e-8); typedef long long lint; char in[100010]; int K; struct node { string value; int next; node(string v, int n) { value = v, next = n; } friend bool operator < (node a, node b) { return a.value > b.value;//写大于号才是小的优先... } }; int main() { scanf("%s", in); scanf("%d", &K); int len = strlen(in); priority_queue <node> Q; for(int i = 0; i < len; i++) { string tmp = ""; tmp.push_back(in[i]); //话说这里string tmp = "" + in[i]为什么会得到一堆奇怪的结果... Q.push(node(tmp, i + 1)); } lint kinds = (lint)len*((lint)len + 1LL)/2LL; if(kinds < K) { printf("No such line.\n"); return 0; } while(K)//考虑到K <= 10^5最多只有10^5次出队和入队操作 { node now = Q.top(); Q.pop(); K--; if(!K) { printf("%s\n", now.value.c_str()); return 0; } if(now.next != len) { now.value += in[now.next]; now.next++; Q.push(now); //Q.push(node(now.value + in[now.next], now.next + 1));如果这么写耗时将增加约700ms.. } } return 0; }