题目:You have a large text file containing words. Given any two words, find the shortest distance (in terms of number of words) between them in the file. If the operation will be repeated many times for the same file(but different pairs of words), can you optimize your solution?
解法一:只需要一次性查询的,我们可以利用两个下标,一个记录最后一次遇到第一个单词的下标,另外一个记录遇到另外一个单词的下标,然后每次比较两个单词的距离,记录当前最小的两个单词的距离,最后就得到最小的距离了。时间效率O(n);
程序如下:
int shortestWordsDistance(vector<string> &words, string &w1, string &w2) { int lastWord1 = -1, lastWord2 = -1; int minDistance = INT_MAX; for (int i = 0; i < words.size(); i++) { if (words[i] == w1) { lastWord1 = i; if (lastWord2 != -1) minDistance = min(minDistance, lastWord1 - lastWord2); } if (words[i] == w2)//不加else是因为当w1==w2的时候,可以计算出为零 { lastWord2 = i; if (lastWord1 != -1) minDistance = min(minDistance, lastWord2 - lastWord1); } } return minDistance; }
那么需要建立一个表,所用时间效率是:O(n);查找的时候所用时间内效率是O(m), m是两个单词出现在文中的次数。因为表不需要重复计算,所以效率可以优化很多。
带测试实例的程序如下:
struct MarkInt { int a; string str; MarkInt(int a1, string s1):a(a1), str(s1){} }; bool operator<(const MarkInt &m1, const MarkInt &m2) { return m1.a < m2.a; } class WordsDistacne { map<string, vector<MarkInt> > lookupTable; vector<string> words; public: int shortestWordsDistanceRepeated(string &w1, string &w2) { if (!lookupTable.count(w1) || !lookupTable.count(w2)) return -1; vector<MarkInt> vm; lookupTable[w1].push_back(MarkInt(INT_MAX,w1)); lookupTable[w2].push_back(MarkInt(INT_MAX,w2)); int n1 = lookupTable[w1].size(); int n2 = lookupTable[w2].size(); for (int i = 0, j = 0; i < n1 && j < n2; ) { if (lookupTable[w1][i] < lookupTable[w2][j]) { vm.push_back(lookupTable[w1][i++]); } else vm.push_back(lookupTable[w2][j++]); } vm.pop_back(); lookupTable[w1].pop_back(); lookupTable[w2].pop_back(); int lastword1 = -1, lastword2 = -1; int minDistance = INT_MAX; for (int i = 0; i < vm.size(); i++) { if (vm[i].str == w1) { lastword1 = vm[i].a; if (lastword2 >= 0) { minDistance = min(minDistance, lastword1 - lastword2); } } if(vm[i].str == w2) { lastword2 = vm[i].a; if (lastword1 >= 0) { minDistance = min(minDistance, lastword2 - lastword1); } } } return minDistance; } void setupTable() { for (int i = 0; i < words.size(); i++) { lookupTable[words[i]].push_back(MarkInt(i, words[i])); } } WordsDistacne(vector<string> s):words(s) { setupTable(); } }; int main() { string s[] = {"my", "you", "hi", "you", "hi","my", "go", "not", "yes", "you"}; int n = sizeof(s)/sizeof(string); vector<string> vs(s, s+n); string s1 = "my"; string s2 = "not"; cout<<shortestWordsDistance(vs, s1, s2)<<endl; WordsDistacne wd(vs); cout<<wd.shortestWordsDistanceRepeated(s1,s2)<<endl; system("pause"); return 0; }