《Cracking the Coding Interview》——第18章:难题——题目13

2014-04-29 04:40

题目:给定一个字母组成的矩阵,和一个包含一堆单词的词典。请从矩阵中找出一个最大的子矩阵,使得从左到右每一行,从上到下每一列组成的单词都包含在词典中。

解法:O(n^3)级别的时间和空间进行动态规划。这道题目和第17章的最后一题很像,由于这题的时间复杂度实在是高,我动手写了字典树进行加速。如果单纯用哈希表来作为词典,查询效率实际会达到O(n)级别,导致最终的算法复杂度为O(n^4)。用字典树则可以加速到O(n^3),因为对于一个字符串“abcd”,只需要从字典树的根节点出发往下走,就可以一次性判断“a”、“ab”、“abc”、“abcd”是否包含在字典里,而用哈希表无法做到这样的效率。动态规划过程仍然是O(n^4)级别,字典树只是将查单词的过程中进行了加速,从O(n^4)加速到了O(n^3)。空间复杂度为O(n^3),用于标记横向和纵向的每一个子段是否包含在字典里。

代码:

  1 // 18.13 There is a matrix of lower case letters. Given a dictionary of words, you have to find the maximum subrectangle, such that every row reading from left to right, and every column reading from top to bottom is a word in the dictionary. Return the area as the result.

  2 #include <iostream>

  3 #include <string>

  4 #include <unordered_set>

  5 #include <vector>

  6 using namespace std;

  7 

  8 const int MAX_NODE = 10000;

  9 

 10 struct TrieNode {

 11     bool is_word;

 12     char ch;

 13     vector<int> child;

 14     TrieNode(char _ch = 0): is_word(false), ch(_ch), child(vector<int>(26, -1)) {};

 15 };

 16 int node_count;

 17 TrieNode nodes[MAX_NODE];

 18 

 19 void insertWordIntoTrie(int root, const string &s)

 20 {

 21     int len = s.length();

 22     

 23     for (int i = 0; i < len; ++i) {

 24         if (nodes[root].child[s[i] - 'a'] < 0) {

 25             nodes[root].child[s[i] - 'a'] = node_count++;

 26             root = nodes[root].child[s[i] - 'a'];

 27             nodes[root].ch = s[i];

 28         } else {

 29             root = nodes[root].child[s[i] - 'a'];

 30         }

 31         if (i == len - 1) {

 32             nodes[root].is_word = true;

 33         }

 34     }

 35 }

 36 

 37 int constructTrie(const unordered_set<string> &dict)

 38 {

 39     int root = node_count++;

 40     

 41     unordered_set<string>::const_iterator usit;

 42     for (usit = dict.begin(); usit != dict.end(); ++usit) {

 43         insertWordIntoTrie(root, *usit);

 44     }

 45     

 46     return root;

 47 }

 48 

 49 int main()

 50 {

 51     int i, j, k;

 52     int i1, i2;

 53     string s;

 54     int n, m;

 55     vector<vector<char> > matrix;

 56     vector<vector<vector<bool> > > is_row_word;

 57     vector<vector<vector<bool> > > is_col_word;

 58     vector<int> dp;

 59     unordered_set<string> dict;

 60     int root;

 61     int ptr;

 62     int max_area;

 63     

 64     while (cin >> n && n > 0) {

 65         node_count = 0;

 66         for (i = 0; i < n; ++i) {

 67             cin >> s;

 68             dict.insert(s);

 69         }

 70         root = constructTrie(dict);

 71         

 72         cin >> n >> m;

 73         

 74         matrix.resize(n);

 75         for (i = 0; i < n; ++i) {

 76             matrix[i].resize(m);

 77         }

 78         for (i = 0; i < n; ++i) {

 79             cin >> s;

 80             for (j = 0; j < m; ++j) {

 81                 matrix[i][j] = s[j];

 82             }

 83         }

 84         

 85         is_row_word.resize(n);

 86         for (i = 0; i < n; ++i) {

 87             is_row_word[i].resize(m);

 88             for (j = 0; j < m; ++j) {

 89                 is_row_word[i][j].resize(m);

 90             }

 91         }

 92 

 93         is_col_word.resize(m);

 94         for (i = 0; i < m; ++i) {

 95             is_col_word[i].resize(n);

 96             for (j = 0; j < n; ++j) {

 97                 is_col_word[i][j].resize(n);

 98             }

 99         }

100         

101         for (i = 0; i < n; ++i) {

102             for (j = 0; j < m; ++j) {

103                 ptr = root;

104                 for (k = j; k < m; ++k) {

105                     if (ptr < 0) {

106                         is_row_word[i][j][k] = false;

107                         continue;

108                     }

109                     

110                     ptr = nodes[ptr].child[matrix[i][k] - 'a'];

111                     if (ptr < 0) {

112                         is_row_word[i][j][k] = false;

113                         continue;

114                     }

115                     

116                     is_row_word[i][j][k] = nodes[ptr].is_word;

117                 }

118             }

119         }

120         

121         for (i = 0; i < m; ++i) {

122             for (j = 0; j < n; ++j) {

123                 ptr = root;

124                 for (k = j; k < n; ++k) {

125                     if (ptr < 0) {

126                         is_col_word[i][j][k] = false;

127                         continue;

128                     }

129                     

130                     ptr = nodes[ptr].child[matrix[k][i] - 'a'];

131                     if (ptr < 0) {

132                         is_col_word[i][j][k] = false;

133                         continue;

134                     }

135                     

136                     is_col_word[i][j][k] = nodes[ptr].is_word;

137                 }

138             }

139         }

140         

141         max_area = 0;

142         dp.resize(m);

143         for (i1 = 0; i1 < n; ++i1) {

144             for (i2 = i1; i2 < n; ++i2) {

145                 k = 0;

146                 for (j = 0; j < m; ++j) {

147                     dp[j] = is_col_word[j][i1][i2] ? (++k) : (k = 0);

148                     if (!dp[j]) {

149                         continue;

150                     }

151                     

152                     for (i = i1; i <= i2; ++i) {

153                         if (!is_row_word[i][j - dp[j] + 1][j]) {

154                             break;

155                         }

156                     }

157                     if (i > i2) {

158                         max_area = max(max_area, (i2 - i1 + 1) * dp[j]);

159                     }

160                 }

161             }

162         }

163         

164         cout << max_area << endl;

165         

166         // clear up data

167         dict.clear();

168         for (i = 0; i < n; ++i) {

169             matrix[i].clear();

170         }

171         matrix.clear();

172         

173         for (i = 0; i < n; ++i) {

174             for (j = 0; j < m; ++j) {

175                 is_row_word[i][j].clear();

176             }

177             is_row_word[i].clear();

178         }

179         is_row_word.clear();

180         

181         for (i = 0; i < m; ++i) {

182             for (j = 0; j < n; ++j) {

183                 is_col_word[i][j].clear();

184             }

185             is_col_word[i].clear();

186         }

187         is_col_word.clear();

188     }

189     

190     return 0;

191 }

 

你可能感兴趣的:(interview)