[LintCode][System Design] Inverted Index

Problem

Create an inverted index with given documents.

Example
Given a list of documents with id and content. (class
Document)

[
  {
    "id": 1,
    "content": "This is the content of document 1, it's very short"
  },
  {
    "id": 2,
    "content": "This is the content of document 2, it's very long. bilabial bilabial heheh hahaha ..."
  },
]

Return an inverted index (HashMap with key is the word and value is a list of document ids).

{
   "This": [1, 2],
   "is": [1, 2],
   ...
}

Solution

/**
 * Definition of Document:
 * class Document {
 * public:
 *     int id;
 *     string content;
 * }
 */
class Solution {
public:
    /**
     * @param docs a list of documents
     * @return an inverted index
     */
    map> invertedIndex(vector& docs) {
        map> ret;
        for(int i = 0; i < docs.size(); i++) {
            set words = parseWords(docs[i].content);
            for(set::iterator iter = words.begin(); iter != words.end(); iter++) {
                ret[*iter].push_back(docs[i].id);
            }
        }
        
        return ret;
    }
    
    set parseWords(string &s) {
        s = s + " ";
        set ret;
        int start = -1;
        for(int i = 0; i < s.size(); i++) {
            if (s[i] == ' ') {
                if (start != -1) {
                    string word = s.substr(start, i - start);
                    ret.insert(word);
                    start = -1;
                }
            } else {
                if (start == -1) {
                    start = i;
                }
            }
        }
        
        return ret;
    }
};

你可能感兴趣的:([LintCode][System Design] Inverted Index)