一个简单的文本查询程序,来自C++ primer
// main.cpp
/* *目的:一个简单的文本查询程序 *作用:程序将读取用户指定的任意文本文件,然后允许用户从该文件中查找单词。 *查询的结果是该单词出现的次数,并列出每次出现所在的行。 *如果某单词在同一行中多次出现,程序将只显示该行一次。 *行号按升序显示,即第 7 行应该在第 9 行之前输出,依此类推。 *思路: *1.使用一个 vector<string> 类型的对象存储整个输入文件的副本。 * 输入文件的每一行是该 vector 对象的一个元素。 * 因而,在希望输出某一行时,只需以行号为下标获取该行所在的元素即可。 *2.将每个单词所在的行号存储在一个 set 容器对象中。 * 使用 set 就可确保每行只有一个条目,而且行号将自动按升序排列。 *3.使用一个 map 容器将每个单词与一个 set 容器对象关联起来, * 该 set 容器对象记录此单词所在的行号。 */ #include "TextQuery.h" #include <string> #include <vector> #include <map> #include <set> #include <iostream> #include <fstream> #include <cctype> #include <cstring> #include <cstdlib> using std::set; using std::string; using std::map; using std::vector; using std::cerr; using std::cout; using std::cin; using std::ifstream; using std::endl; string make_plural(size_t, const string&, const string&); ifstream& open_file(ifstream&, const string&); void print_results(const set<TextQuery::line_no>& locs, const string& sought, const TextQuery &file) { // if the word was found, then print count and all occurrences typedef set<TextQuery::line_no> line_nums; line_nums::size_type size = locs.size(); cout << "\n" << sought << " occurs " << size << " " << make_plural(size, "time", "s") << endl; // print each line in which the word appeared line_nums::const_iterator it = locs.begin(); for (; it != locs.end(); ++it) { cout << "\t(line " // don't confound user with text lines starting at 0 << (*it) + 1 << ") " << file.text_line(*it) << endl; } } // program takes single argument specifying the file to query int main() { // open the file from which user will query words ifstream infile; if (!open_file(infile, "Tanky_Woo.txt")) { cerr << "No input file!" << endl; return EXIT_FAILURE; } TextQuery tq; tq.read_file(infile); // builds query map // iterate with the user: prompt for a word to find and print results // loop indefinitely; the loop exit is inside the while while (true) { cout << "enter word to look for, or q to quit: "; string s; cin >> s; // stop if hit eof on input or a 'q' is entered if (!cin || s == "q") break; // get the set of line numbers on which this word appears set<TextQuery::line_no> locs = tq.run_query(s); // print count and all occurrences, if any print_results(locs, s, tq); } system("pause"); return 0; } string make_plural(size_t ctr, const string &word, const string &ending) { return (ctr == 1) ? word : word + ending; } ifstream& open_file(ifstream &in, const string &file) { in.close(); // close in case it was already open in.clear(); // clear any existing errors // if the open fails, the stream will be in an invalid state in.open(file.c_str()); // open the file we were given return in; // condition state is good if open succeeded }
TextQuery.h
//TextQuery.h #define TEXTQUERY_H #include <string> #include <vector> #include <map> #include <set> #include <iostream> #include <fstream> #include <cctype> #include <cstring> class TextQuery { // as before public: // typedef to make declarations easier typedef std::string::size_type str_size; typedef std::vector<std::string>::size_type line_no; /* interface: * read_file builds internal data structures for the given file * run_query finds the given word and returns set of lines on which it appears * text_line returns a requested line from the input file */ void read_file(std::ifstream &is) { store_file(is); build_map(); } std::set<line_no> run_query(const std::string&) const; std::string text_line(line_no) const; str_size size() const { return lines_of_text.size(); } void display_map(); // debugging aid: print the map private: // utility functions used by read_file void store_file(std::ifstream&); // store input file void build_map(); // associated each word with a set of line numbers // remember the whole input file std::vector<std::string> lines_of_text; // map word to set of the lines on which it occurs std::map< std::string, std::set<line_no> > word_map; // characters that constitute whitespace static std::string whitespace_chars; // canonicalizes text: removes punctuation and makes everything lower case static std::string cleanup_str(const std::string&); };
TextQuery.cpp
#include "TextQuery.h" #include <sstream> #include <string> #include <vector> #include <map> #include <set> #include <iostream> #include <fstream> #include <cctype> #include <cstring> #include <stdexcept> using std::istringstream; using std::set; using std::string; using std::getline; using std::map; using std::vector; using std::cerr; using std::cout; using std::cin; using std::ifstream; using std::endl; using std::ispunct; using std::tolower; using std::strlen; using std::out_of_range; string TextQuery::text_line(line_no line) const { if (line < lines_of_text.size()) return lines_of_text[line]; throw std::out_of_range("line number out of range"); } // read input file: store each line as element in lines_of_text void TextQuery::store_file(ifstream &is) { string textline; while (getline(is, textline)) lines_of_text.push_back(textline); } // \v: vertical tab; \f: formfeed; \r: carriage return are // treated as whitespace characters along with space, tab and newline string TextQuery::whitespace_chars(" \t\n\v\r\f"); // finds whitespace-separated words in the input vector // and puts the word in word_map along with the line number void TextQuery::build_map() { // process each line from the input vector for (line_no line_num = 0; line_num != lines_of_text.size(); ++line_num) { // we'll use line to read the text a word at a time istringstream line(lines_of_text[line_num]); string word; while (line >> word) // add this line number to the set; // subscript will add word to the map if it's not already there word_map[cleanup_str(word)].insert(line_num); } } set<TextQuery::line_no> TextQuery::run_query(const string &query_word) const { // Note: must use find and not subscript the map directly // to avoid adding words to word_map! map<string, set<line_no> >::const_iterator loc = word_map.find(cleanup_str(query_word)); if (loc == word_map.end()) return set<line_no>(); // not found, return empty set else // fetch and return set of line numbers for this word return loc->second; } void TextQuery::display_map() { map< string, set<line_no> >::iterator iter = word_map.begin(), iter_end = word_map.end(); // for each word in the map for (; iter != iter_end; ++iter) { cout << "word: " << iter->first << " {"; // fetch location vector as a const reference to avoid copying it const set<line_no> &text_locs = iter->second; set<line_no>::const_iterator loc_iter = text_locs.begin(), loc_iter_end = text_locs.end(); // print all line numbers for this word while (loc_iter != loc_iter_end) { cout << *loc_iter; if (++loc_iter != loc_iter_end) cout << ", "; } cout << "}\n"; // end list of output this word } cout << endl; // finished printing entire map } // lower-case to upper-case string TextQuery::cleanup_str(const string &word) { string ret; for (string::const_iterator it = word.begin(); it != word.end(); ++it) { if (!ispunct(*it)) ret += tolower(*it); } return ret; }