轻量级字符串搜索引擎

package xcollections;

import java.util.*;
import java.util.concurrent.locks.ReentrantReadWriteLock;

/**
 * @author wanqilin
 * @date 2019/5/18
 * @description
 * StringSearch 是一个轻量的字符串模糊搜索引擎
 * 调节 resizeThreshold 可以控制 StringSearch 的内存大小,但同时会影响性能
 * resizeThreshold 越小,内存消耗越大,但速度越快(search的速度很快,但put的速度很慢,此时的StringSearch 相当于一课字典树)
 * resizeThreshold 越大,内存消耗越少,但单次搜索的速度将会变慢
 *
 * 因此 resizeThreshold 应该控制在一个合理的值
 *
 * StringSearch 是线程安全的,使用读写锁来实现线程安全性,因此适用于读多写少的环境
 */
public class StringSearch {
    private int resizeThreshold;
    private int resultMaxCount;
    private Comparator comparator;

    private Map strMap;
    private MasterNode head;

    private ReentrantReadWriteLock.ReadLock  rl;
    private ReentrantReadWriteLock.WriteLock wl;

    public StringSearch(Comparator comparator){
        this(50,20,comparator);
    }

    public StringSearch(int resizeThreshold,int resultMaxCount,Comparator comparator){
        this.resultMaxCount = resultMaxCount;
        this.resizeThreshold = resizeThreshold;
        this.comparator = comparator;
        strMap = new HashMap<>(resultMaxCount);

        head = new MasterNode();
        ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
        rl = lock.readLock();wl = lock.writeLock();
    }

    public void put(Key k,String str){
        wl.lock();
        try{
            strMap.put(k,str);
            for (int i = 0; i < str.length(); i++) {
                head.add(k,str,i);
            }
        }finally {
            wl.unlock();
        }
    }

    public List search(String keyword){
        rl.lock();
        try {
            return head.find(keyword,0);
        }finally {
            rl.unlock();
        }
    }

    public String get(Key k){
        rl.lock();
        try{
            return strMap.get(k);
        }finally {
            rl.unlock();
        }
    }

    public boolean remove(Key k){
        wl.lock();
        try{
            String str = strMap.remove(k);
            if (str != null){
                for (int i = 0; i < str.length(); i++) {
                    head.remove(k,str,i);
                }
                return true;
            }
            return false;
        }finally {
            wl.unlock();
        }
    }

    public int size(){
        return strMap.size();
    }

    abstract class Node{
        abstract public List find(String str, int beginIndex);
        abstract public void add(Key k,String str,int idx);
        abstract public void remove(Key k,String str,int idx);
        abstract public List allKeys();
        abstract public int size();
    }
    class ServantNode extends Node{
        private List keys;
        private Character currentChar;
        ServantNode(Character currentChar){
            this.currentChar = currentChar;
            keys = new ArrayList<>();
        }

        @Override
        public List find(String str, int beginIndex) {
            List res= new LinkedList<>();
            String subStr = str.substring(beginIndex);
            for (Key k : keys){
                if (strMap.containsKey(k) && strMap.get(k).contains(subStr)){
                    res.add(k);
                    if (res.size() >= resultMaxCount)
                        return res;
                }
            }
            return res;
        }

        @Override
        public void add(Key k, String str, int idx) {
            int index = Collections.binarySearch(keys,k,comparator);
            if (index < 0){
                keys.add(-index - 1,k);
            }
        }

        @Override
        public void remove(Key k, String str, int idx) {
            int index = Collections.binarySearch(keys,k,comparator);
            if (index >= 0){
                keys.remove(index);
            }
        }

        @Override
        public List allKeys() {
            return keys;
        }

        @Override
        public int size() {
            return keys.size();
        }

        Node levelUp(){
            if (currentChar == null)
                return this;

            MasterNode masterNode = new MasterNode(resizeThreshold);
            for (Key k : keys){
                String str = strMap.get(k);
                for (int i = 0; i < str.length(); i++) {
                    if (str.charAt(i) == currentChar){
                        masterNode.add(k,str,i + 1);
                    }
                }
            }
            return masterNode;
        }
    }
    class MasterNode extends Node{
        private Map map;
        MasterNode(){
            this(0);
        }
        MasterNode(int initialCapacity){
            map = new HashMap<>(initialCapacity);
        }
        @Override
        public List find(String str, int beginIndex) {
            Character c = null;
            if (beginIndex < str.length())
                c = str.charAt(beginIndex);

            Node node = map.get(c);
            if (node == null)
                return new LinkedList<>();

            if (beginIndex + 1 == str.length()){
                List res = allKeys();
                if (res.size() > resultMaxCount){
                    return res.subList(0,20);
                }
                return res;
            }

            return node.find(str, beginIndex + (c == null ? -1 : 1));
        }

        @Override
        public void add(Key k, String str, int idx) {
            Character c = idx == str.length() ? null : str.charAt(idx);
            Node node = map.get(c);
            if (node == null){
                node = new ServantNode(c);
                map.put(c,node);
            }

            node.add(k,str,++idx);

            if (node instanceof StringSearch.ServantNode && node.size() >= resizeThreshold){
                Node master = ((ServantNode) node).levelUp();
                map.put(c,master);
            }
        }

        @Override
        public void remove(Key k, String str, int idx) {
            Character c = str.charAt(idx);
            Node node = map.get(c);

            if (node == null)
                return;
            node.remove(k,str,++idx);

            if (node.size() == 0){
                map.remove(c);
            }
        }

        @Override
        public List allKeys() {
            List res = new ArrayList<>();
            map.values().forEach(n -> {
                if (res.size() < resultMaxCount){
                    res.addAll(n.allKeys());
                }
            });

            return res;
        }

        @Override
        public int size() {
            return map.size();
        }
    }
}


你可能感兴趣的:(轻量级字符串搜索引擎)