字符串类面试题总结

1.实现strStr()

字符串查找(又称查找子字符串),对于一个给定的 source 字符串和一个 target 字符串,你应该在 source 字符串中找出 target 字符串出现的第一个位置(从0开始)。如果不存在,则返回 -1。
例子:

如果 source = "source" 和 target = "target",返回 -1。如果 source = "abcdabcdefg" 和 target = "bcd",返回 1。

1.1 普通实现

  int strStr(const char *source, const char *target) {
        // write your code here
        if (source == NULL || target == NULL) return -1;
        int lenS = strlen(source);
        int lenT = strlen(target);
        for (int i = 0; i < lenS - lenT + 1; ++i) {
            int j;
            for (j = 0; j < lenT; ++j) {
                if (source[i + j] != target[j]) break;
            }
            if (j == lenT) return i;
        }
        return -1;
    }

1.2 KMP算法

讲解:http://www.ituring.com.cn/article/59881
   int strStr(const char *source, const char *target) {
        // write your code here
        if (source == NULL || target == NULL) return -1;
        int lenS = strlen(source);
        int lenT = strlen(target);
        int* N = new int[lenT];
        N[0] = -1;
        for (int i = 1; i < lenT; ++i) {
            int idx = N[i - 1];
            while (idx >= 0 && target[idx + 1] != target[i]) {
                idx = N[idx];
            }
            if (target[idx + 1] == target[i]) {
                N[i] = idx + 1;
            } else {
                N[i] = -1;
            }
        }
        
        int idxS = 0;
        int idxT = 0;
        while (idxS < lenS && idxT < lenT) {
            if (source[idxS] == target[idxT]) {
                ++idxS;
                ++idxT;
            } else if (idxT == 0) {
                ++idxS;
            } else {
                idxT = N[idxT - 1] + 1;
            }
        }
        delete[] N;
        if (idxT == lenT) {
            return idxS - idxT;
        }
        
        return -1;
    }


2. 纠正单词拼写错误

鹅厂面试题
比如输入hello,却错误的输入了hellu,找出出错的字母。
tire树解法:
from string import ascii_lowercase

END = '$'

def make_trie(words):
    trie = {}
    for word in words:
        t = trie
        for c in word:
            if c not in t:
                t[c] = {}
            t = t[c]
        t[END] = {}
    return trie

def check_fuzzy(trie, word, path='', tol=1):
    # print(path)
    if tol < 0:
        return set()
    elif word == '':
        return {path} if END in trie else set()
    else:
        ps = set()
        for k in trie:
            tol1 = tol - 1 if k != word[0] else tol
            ps |= check_fuzzy(trie[k], word[1:], path + k, tol1)

            # 增加一个字母
            for c in ascii_lowercase:
                ps |= check_fuzzy(trie[k], c + word[1:], path + k, tol1 - 1)

            #减少一个字母
            if len(word) > 1:
                ps |= check_fuzzy(trie[k], word[2:], path + k, tol - 1)

            #交换字母的位置
            if len(word) > 2:
                ps |= check_fuzzy(trie[k], word[2] + word[1] + word[3:], path + k, tol1 - 1)
        return ps

words = ['hello', 'hela', 'dome']

t = make_trie(words)
print(t)

print(check_fuzzy(t, 'hellu', tol=1))
print(check_fuzzy(t, 'healu', tol=1))
print(check_fuzzy(t, 'healu', tol=2))
结果:
{'h': {'e': {'l': {'l': {'o': {'$': {}}}, 'a': {'$': {}}}}}, 'd': {'o': {'m': {'e': {'$': {}}}}}}
{'hello', 'hela'}
set()
{'hello', 'hela'}


朴素贝叶斯解法

3.乱序字符串(Anagram)

给出一个字符串数组S,找到其中所有的乱序字符串(Anagram)。如果一个字符串是乱序字符串,那么他存在一个字母集合相同,但顺序不同的字符串也在S中。
样例:对于字符串数组 ["lint","intl","inlt","code"],返回 ["lint","inlt","intl"]。
注意:所有的字符串都只包含小写字母。
时间复杂度O(n^2)
    public List anagrams(String[] strs) {
        List angs = new ArrayList();
        boolean[] hasAdd = new boolean[strs.length];
        for (int i = 0; i < strs.length; ++i) {
            for (int j = i + 1; j < strs.length; ++j) {
                if (hasAdd[j]) continue;
                if (areAnagrams(strs[i], strs[j])) {
                    if (!hasAdd[i]) {
                        angs.add(strs[i]);
                        hasAdd[i] = true;
                    }
                    if (!hasAdd[j]) {
                        angs.add(strs[j]);
                        hasAdd[j] = true;
                    }
                }
            }
        }
        return angs;
    }
    
    public boolean areAnagrams(String str1, String str2) {
        int[] a = new int[30];
        for (int i = 0; i < str1.length(); ++i) {
            a[str1.charAt(i) - 'a'] += 1;
        }
        for (int i = 0; i < str2.length(); ++i) {
            a[str2.charAt(i) - 'a'] -= 1;
        }
        for (int i = 0; i < 26; ++i) {
            if (a[i] != 0) return false;
        }
        return true;
    }
时间复制度O(n)的hash算法:
   public List anagrams(String[] strs) {
        List angs = new ArrayList();
        Map> map = new HashMap>();
        
        for (String str : strs) {
            int[] count = new int[26];
            for (int i = 0; i < str.length(); ++i) {
                count[str.charAt(i) - 'a']++;
            }
            int hash = getHash(count);
            if (!map.containsKey(hash)) {
                map.put(hash, new ArrayList());
            }
            map.get(hash).add(str);
        }
        for (List tmp : map.values()) {
            if (tmp.size() > 1) {
                angs.addAll(tmp);
            }
        }
        return angs;
    }
    
    public int getHash(int[] count) {
        int hash = 0;
        int a = 378551;
        int b = 63689;
        for (int num : count) {
            hash = a * hash + num;
            a *= b;
        }
        return hash;
    }   





你可能感兴趣的:(找工作)