题目如下:
Given a string s and a dictionary of words dict, add spaces in s to construct a sentence where each word is a valid dictionary word.
Return all such possible sentences.
For example, given
s = "catsanddog",
dict = ["cat", "cats", "and", "sand", "dog"].
A solution is ["cats and dog", "cat sand dog"].
分析如下:
题目可以用trie来做,也可以用DP + DFS来做。
这里先说说DP + DFS的思路。
首先,和Word Break I类似,用DP来判断输入string是否能够被break,并且在其中记录中间结果。这个中间结果记录了所有的前驱下标,是下一步DFS的基础,等下细细讲解。
然后,借用word ladder II的求所有路径的思想,对中间结果进行dfs,从而找到所有的路径。
现在来讲中间结果。
先看下面的例子。
s = "catsand",
dict = ["cat", "cats", "and", "sand", ].
那么,我希望用一个数组result_node来记录,假设在最后一点result_node[length - 1] 可以break的话,都可以从哪些点开始break到最后一点。
length(s) = 7
result_node[6] = {2, 3},
因为可以从cats 这里切分,"catsand" = "cats" + "and"
也可以从cat这里切分, "catsand" = "cat" + "sand"
也就是说,最后求路的时候,"d"节点(index=6)的前一个节点是"s"节点(index = 3) , 或者"t"节点(index 2);
如果以下标0为开始,以当前下标j为结束的子串正好在dictionary中,那么把当前下标的前驱就记为-1,并计入前驱集合中.例如,“catsand” 下标"t" 正好有"cat"在dictionary中,所以result_node[2] = -1,同理result_node[3] = -1;
再看一个比较麻烦的例子。
unordered_set<string> dict = {"a", "aa"};
string s = "aaaa";
result_node[0] = {-1};
result_node[1] = {-1, 0};
result_node[2] = {0, 1};
result_node[3] = {1, 2};
现在,使用DP,从头到尾把输入字符传扫一遍,并且记录每个节点的前驱,最后进行DFS并且按照要求的格式把结果输出就可以了。
容易出错的地方是,因为记录了-1这个前驱,所以最后dfs的时候,还需要注意这个特殊值。见代码。
我的代码:
//30ms class Solution { public: void dfs(vector<vector<int> > & result_node, int start, vector<int> & path, vector<vector<int> > &paths) { if (start == -1) { // BUG1: // C++ pass by value, so first insert into paths, then edit(pop_back(),which holds the value -1) the value of path. // If pop_back() path happens before it is inserted into the paths, then the next round in dfs will be affected. paths.push_back(path); paths.back().pop_back(); reverse(paths.back().begin(), paths.back().end()); return; } else { for (int i = 0; i < result_node[start].size(); ++i) { // BUG2: // path.push_back(i); path.push_back(result_node[start][i]); dfs(result_node, result_node[start][i], path, paths); path.pop_back(); } } } void format_result(string &s ,vector<vector<int> > &paths, vector<string> &final_paths) { for (int i = 0; i < paths.size(); ++i) { // BUG3: // the first component of the word string is simply the first word // the last delimiter(as in the example, it should be ) should be inserted paths[i].push_back(s.length() -1); string tmp = s.substr(0, paths[i][0] + 1); // BUG4: // the second component of the word string is the space sign " " and the word. for (int j = 1;j < paths[i].size(); ++j) { tmp += " "; tmp += s.substr(paths[i][j - 1] + 1, paths[i][j] - paths[i][j - 1]); } final_paths.push_back(tmp); } } vector<string> wordBreak(string s, unordered_set<string> &dict) { vector<bool> result_bool(s.length(), false); vector<vector<int> > result_node(s.length()); vector<string> final_paths; if (dict.empty()) return final_paths; if (dict.find(s) != dict.end()) { final_paths.push_back(s); return final_paths; } for (int i = 0; i < result_bool.size(); ++i) { if (dict.find(s.substr(0, i + 1)) != dict.end()) { result_bool[i] = true; result_node[i].push_back(-1); } for (int j = 0; j < i; ++j) { if ((result_bool[j] == true) && (dict.find(s.substr(j + 1, i -j))!= dict.end())) { result_bool[i] = true; result_node[i].push_back(j); } } } vector<vector<int> > paths; // BUG5: // similar to Word Break I, use a bool array to record if the word can be segmented. // before dfs, should test whether dfs is needed if ( !result_bool[s.length() - 1]) { return final_paths; } vector<int> path; dfs(result_node, result_node.size() - 1, path, paths); format_result(s, paths, final_paths); return final_paths; } };