转 lucene3搜索引擎,索引建立搜索排序分页高亮显示, IKAnalyzer分词

package com.zjr.service.impl;  
  
import java.io.File;  
import java.io.IOException;  
import java.io.StringReader;  
import java.lang.reflect.InvocationTargetException;  
import java.util.ArrayList;  
import java.util.List;  
  
import org.apache.commons.beanutils.BeanUtils;  
import org.apache.commons.logging.Log;  
import org.apache.commons.logging.LogFactory;  
import org.apache.lucene.analysis.Analyzer;  
import org.apache.lucene.analysis.TokenStream;  
import org.apache.lucene.document.Document;  
import org.apache.lucene.document.Field;  
import org.apache.lucene.document.Field.Index;  
import org.apache.lucene.document.Field.Store;  
import org.apache.lucene.index.CorruptIndexException;  
import org.apache.lucene.index.IndexReader;  
import org.apache.lucene.index.IndexWriter;  
import org.apache.lucene.index.Term;  
import org.apache.lucene.search.BooleanClause;  
import org.apache.lucene.search.IndexSearcher;  
import org.apache.lucene.search.Query;  
import org.apache.lucene.search.ScoreDoc;  
import org.apache.lucene.search.Sort;  
import org.apache.lucene.search.SortField;  
import org.apache.lucene.search.TopDocs;  
import org.apache.lucene.search.TopScoreDocCollector;  
import org.apache.lucene.search.highlight.Highlighter;  
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;  
import org.apache.lucene.search.highlight.QueryScorer;  
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;  
import org.apache.lucene.store.Directory;  
import org.apache.lucene.store.FSDirectory;  
import org.wltea.analyzer.lucene.IKAnalyzer;  
import org.wltea.analyzer.lucene.IKQueryParser;  
import org.wltea.analyzer.lucene.IKSimilarity;  
  
import com.zjr.model.User;  
  
public class UserIndexService {  
  
    private final Log logger = LogFactory.getLog(UserIndexService.class);  
    private final String dirPath = "d:/temp/user";  
  
    Analyzer analyzer = new IKAnalyzer();  
    Directory directory = null;  
    IndexWriter writer = null;  
    IndexSearcher indexSearcher = null;  
  
    private void confirmDirs() {  
        File indexFile = new File(dirPath);  
        if (!indexFile.exists()) {  
            indexFile.mkdirs();  
        }  
        if (!indexFile.exists() || !indexFile.canWrite()) {  
            if (logger.isDebugEnabled())  
                logger.error("索引文件目录创建失败或不可写入!");  
        }  
    }  
  
    public void init() {  
        confirmDirs();  
        try {  
            File f = new File(dirPath);  
            directory = FSDirectory.open(f);  
              
        } catch (Exception e) {  
            if (logger.isDebugEnabled()) {  
                logger.error("解除索引文件锁定失败!" + e.getCause());  
            }  
        }  
    }  
  
    public void createIndex(List<User> userList) {  
        init();  
        try {  
              
//           第一个参数是存放索引目录有FSDirectory(存储到磁盘上)和RAMDirectory(存储到内存中),  
//          第二个参数是使用的分词器, 第三个:true,建立全新的索引,false,建立增量索引,第四个是建立的索引的最大长度  
            writer = new IndexWriter(directory, analyzer, true,IndexWriter.MaxFieldLength.LIMITED);  
            writer.setMergeFactor(500);  
            writer.setMaxBufferedDocs(155);  
            writer.setMaxFieldLength(Integer.MAX_VALUE);  
            writeIndex(writer, userList);  
            writer.optimize();  
            writer.close();  
        } catch (IOException e) {  
            // TODO Auto-generated catch block  
            e.printStackTrace();  
        }  
    }  
  
    public List<User> search(String keyword) {  
  
        File indexFile = new File(dirPath);  
        if (!indexFile.exists()) {  
            return null;  
        }  
        Directory dir;  
        try {  
            dir = FSDirectory.open(indexFile);  
            indexSearcher = new IndexSearcher(dir);  
            indexSearcher.setSimilarity(new IKSimilarity());  
            // 单字段查询,单条件查询  
            // Query query = IKQueryParser.parse("userInfo", keyword);  
  
            // 多字段,单条件查询  
            String[] fields = new String[] { "userInfo", "parameter1" };  
            Query query = IKQueryParser.parseMultiField(fields, keyword);  
  
            // 多字体,单条件,多BooleanClause.Occur[] flags , 查询条件的组合方式(Or/And)  
            // BooleanClause.Occur[]数组,它表示多个条件之间的关系,  
            // BooleanClause.Occur.MUST表示 and,  
            // BooleanClause.Occur.MUST_NOT表示not,  
            // BooleanClause.Occur.SHOULD表示or.  
            // String[] fields =new String[]{"userInfo","parameter1"};  
            // BooleanClause.Occur[] flags=new  
            // BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD};  
            // Query query = IKQueryParser.parseMultiField(fields,  
            // keyword,flags);  
  
            // //多Field,多条件查询分析  
            // String[] fields =new String[]{"userInfo","parameter1"};  
            // String[] queries = new String[]{keyword,keyword};  
            // Query query = IKQueryParser.parseMultiField(fields,queries);  
  
            // 多Field,多条件,多Occur 查询  
            // String[] fields =new String[]{"userInfo","parameter1"};  
            // String[] queries = new String[]{keyword,keyword};  
            // BooleanClause.Occur[] flags=new  
            // BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD};  
            // Query query =  
            // IKQueryParser.parseMultiField(fields,queries,flags);  
  
            // 搜索相似度最高的20条记录  
            TopDocs topDocs = indexSearcher.search(query, 20);  
            ScoreDoc[] hits = topDocs.scoreDocs;  
            return hitsToQuery(hits, query);  
  
        } catch (IOException e) {  
            // TODO Auto-generated catch block  
            e.printStackTrace();  
        }  
  
        return null;  
    }  
  
    private List<User> hitsToQuery(ScoreDoc[] hits, Query query) {  
        List<User> list = new ArrayList<User>();  
        try {  
            for (int i = 0; i < hits.length; i++) {  
                User u = new User();  
                Document doc = indexSearcher.doc(hits[i].doc);  
                u.setUserId(Integer.parseInt(doc.get("userId")));  
                u.setUserName(doc.get("userName"));  
                u.setUserAge(Integer.parseInt(doc.get("userAge")));  
                // 高亮设置  
                SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter(  
                        "<font color=\"red\">", "</font>");  
                Highlighter highlighter = new Highlighter(simpleHtmlFormatter,  
                        new QueryScorer(query));  
                TokenStream tokenStream = analyzer.tokenStream("text",  
                        new StringReader(doc.get("userInfo")));  
                String userInfo = highlighter.getBestFragment(tokenStream, doc  
                        .get("userInfo"));  
                if (userInfo != null) {  
                    u.setUserInfo(userInfo);  
                } else {  
                    u.setUserInfo(doc.get("userInfo"));  
                }  
  
                SimpleHTMLFormatter simpleHtmlFormatter1 = new SimpleHTMLFormatter(  
                        "<font color=\"red\">", "</font>");  
                Highlighter highlighter1 = new Highlighter(  
                        simpleHtmlFormatter1, new QueryScorer(query));  
                TokenStream tokenStream1 = analyzer.tokenStream("text1",  
                        new StringReader(doc.get("parameter1")));  
                String p1 = highlighter1.getBestFragment(tokenStream1, doc  
                        .get("parameter1"));  
                if (p1 != null) {  
                    u.setParameter1(p1);  
                } else {  
                    u.setParameter1(doc.get("parameter1"));  
                }  
  
                u.setParameter2(doc.get("parameter2"));  
                u.setParameter3(doc.get("parameter3"));  
                u.setParameter4(doc.get("parameter4"));  
                list.add(u);  
            }  
  
            indexSearcher.close();  
            return list;  
        } catch (CorruptIndexException e) {  
            // TODO Auto-generated catch block  
            e.printStackTrace();  
        } catch (IOException e) {  
            // TODO Auto-generated catch block  
            e.printStackTrace();  
        } catch (InvalidTokenOffsetsException e) {  
            // TODO Auto-generated catch block  
            e.printStackTrace();  
        }  
        return null;  
    }  
  
    public void writeIndex(IndexWriter writer, List<User> userList) {  
  
        try {  
            for (User u : userList) {  
                Document doc = getDoc(u);  
                writer.addDocument(doc);  
            }  
        } catch (IOException e) {  
            // TODO Auto-generated catch block  
            e.printStackTrace();  
        }  
  
    }  
  
    private Document getDoc(User user) {  
        System.out.println("用户ID 为" + user.getUserId() + " 索引被创建");  
        Document doc = new Document();  
        addField2Doc(doc, user, "userId", Store.YES, Index.NOT_ANALYZED);  
        addField2Doc(doc, user, "userName", Store.YES, Index.NOT_ANALYZED);// Index.NOT_ANALYZED  
                                                                            // 不分词,但建立索引  
        addField2Doc(doc, user, "userAge", Store.YES, Index.NOT_ANALYZED);// Index.ANALYZED  
                                                                            // 分词并且建立索引  
        addField2Doc(doc, user, "userInfo", Store.YES, Index.ANALYZED);  
        addField2Doc(doc, user, "parameter1", Store.YES, Index.ANALYZED);  
        addField2Doc(doc, user, "parameter2", Store.YES, Index.ANALYZED);  
        addField2Doc(doc, user, "parameter3", Store.YES, Index.ANALYZED);  
        addField2Doc(doc, user, "parameter4", Store.YES, Index.ANALYZED);  
        return doc;  
    }  
  
    private void addField2Doc(Document doc, Object bean, String name, Store s,  
            Index i) {  
        String value;  
        try {  
            value = BeanUtils.getProperty(bean, name);  
            if (value != null) {  
                doc.add(new Field(name, value, s, i,  
                        Field.TermVector.WITH_POSITIONS_OFFSETS));  
            }  
        } catch (IllegalAccessException e) {  
            logger.error("get bean property error", e);  
        } catch (InvocationTargetException e) {  
            logger.error("get bean property error", e);  
        } catch (NoSuchMethodException e) {  
            logger.error("get bean property error", e);  
        }  
    }  
  
    /** 
     * 没有排序,有高亮,有分页 
     *  
     * @param pageNo 
     * @param pageSize 
     * @param keyword 
     * @return 
     */  
    public PageBean getPageQuery(int pageNo, int pageSize, String keyword) {  
        List result = new ArrayList();  
        File indexFile = new File(dirPath);  
        if (!indexFile.exists()) {  
            return null;  
        }  
        Directory dir;  
        try {  
            dir = FSDirectory.open(indexFile);  
            indexSearcher = new IndexSearcher(dir);  
            indexSearcher.setSimilarity(new IKSimilarity());  
  
            String[] fields = new String[] { "userInfo", "parameter1" };  
            BooleanClause.Occur[] flags = new BooleanClause.Occur[] {  
                    BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD };  
            Query query = IKQueryParser.parseMultiField(fields, keyword, flags);  
  
            TopScoreDocCollector topCollector = TopScoreDocCollector.create(  
                    indexSearcher.maxDoc(), true);  
            indexSearcher.search(query, topCollector);  
            // 查询当页的记录  
            ScoreDoc[] docs = topCollector.topDocs((pageNo - 1) * pageSize,  
                    pageSize).scoreDocs;  
  
            // String[] highlightCol = {"userInfo", "parameter1"};  
            // 高亮设置  
            SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter(  
                    "<font color=\"red\">", "</font>");  
            Highlighter highlighter = new Highlighter(simpleHtmlFormatter,  
                    new QueryScorer(query));  
  
            for (ScoreDoc scdoc : docs) {  
                User u = new User();  
                Document doc = indexSearcher.doc(scdoc.doc);  
                //                
                // for (Fieldable fa : doc.getFields()) {  
                // System.out.println(fa.name());  
                // String value = doc.get(fa.name());  
                // for (String col : highlightCol) {  
                // if(fa.name().equals(col)) {  
                // //设置高显内容  
                // TokenStream tokenStream = analyzer.tokenStream("text",new  
                // StringReader(value));  
                // value = highlighter.getBestFragment(tokenStream, value);  
                // }  
                // }  
                //                    
                // }  
  
                u.setUserId(Integer.parseInt(doc.get("userId")));  
                u.setUserName(doc.get("userName"));  
                u.setUserAge(Integer.parseInt(doc.get("userAge")));  
  
                TokenStream tokenStream = analyzer.tokenStream("text",  
                        new StringReader(doc.get("userInfo")));  
                String userInfo = highlighter.getBestFragment(tokenStream, doc  
                        .get("userInfo"));  
                if (userInfo != null) {  
                    u.setUserInfo(userInfo);  
                } else {  
                    u.setUserInfo(doc.get("userInfo"));  
                }  
  
                TokenStream tokenStream1 = analyzer.tokenStream("text1",  
                        new StringReader(doc.get("parameter1")));  
                String p1 = highlighter.getBestFragment(tokenStream1, doc  
                        .get("parameter1"));  
                if (p1 != null) {  
                    u.setParameter1(p1);  
                } else {  
                    u.setParameter1(doc.get("parameter1"));  
                }  
  
                u.setParameter2(doc.get("parameter2"));  
                u.setParameter3(doc.get("parameter3"));  
                u.setParameter4(doc.get("parameter4"));  
                result.add(u);  
  
            }  
            PageBean pb = new PageBean();  
            pb.setCurrentPage(pageNo);// 当前页  
            pb.setPageSize(pageSize);  
            pb.setAllRow(topCollector.getTotalHits());// hit中的记录数目  
            pb.setList(result);  
            return pb;  
  
        } catch (IOException e) {  
            // TODO Auto-generated catch block  
            e.printStackTrace();  
        } catch (InvalidTokenOffsetsException e) {  
            // TODO Auto-generated catch block  
            e.printStackTrace();  
        }  
  
        return null;  
    }  
  
    /** 
     * 排序,有高亮,有分页 
     *  
     * @param pageNo 
     * @param pageSize 
     * @param keyword 
     * @return 
     */  
    public PageBean getPageQuery2(int pageNo, int pageSize, String keyword) {  
        List result = new ArrayList();  
        File indexFile = new File(dirPath);  
        if (!indexFile.exists()) {  
            return null;  
        }  
        Directory dir;  
        try {  
            dir = FSDirectory.open(indexFile);  
            indexSearcher = new IndexSearcher(dir);  
            indexSearcher.setSimilarity(new IKSimilarity());  
  
            String[] fields = new String[] { "userInfo", "parameter1" };  
            BooleanClause.Occur[] flags = new BooleanClause.Occur[] {  
                    BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD };  
            Query query = IKQueryParser.parseMultiField(fields, keyword, flags);  
  
            // 多字段排序,设置在前面的会优先排序  
            SortField[] sortFields = new SortField[2];  
            SortField sortField = new SortField("userId", SortField.INT, false);//false升序,true降序  
            SortField FIELD_SEX = new SortField("userAge", SortField.INT, true);  
            sortFields[0] = sortField;  
            sortFields[1] = FIELD_SEX;  
            Sort sort = new Sort(sortFields);  
  
            TopDocs topDocs = indexSearcher.search(query, null, 50, sort);  
  
            if (topDocs.totalHits != 0) {  
                // for(ScoreDoc sd : topDocs.scoreDocs) {  
                //                    
                // }  
                // 高亮设置  
                SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");  
                Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query));  
  
                for (int i = (pageNo - 1) * pageSize; i < pageSize * pageNo; i++) {  
                    ScoreDoc scdoc = topDocs.scoreDocs[i];  
                    User u = new User();  
                    Document doc = indexSearcher.doc(scdoc.doc);  
                    u.setUserId(Integer.parseInt(doc.get("userId")));  
                    u.setUserName(doc.get("userName"));  
                    u.setUserAge(Integer.parseInt(doc.get("userAge")));  
                    TokenStream tokenStream = analyzer.tokenStream("text",new StringReader(doc.get("userInfo")));  
                    String userInfo = highlighter.getBestFragment(tokenStream,doc.get("userInfo"));  
                    if (userInfo != null) {  
                        u.setUserInfo(userInfo);  
                    } else {  
                        u.setUserInfo(doc.get("userInfo"));  
                    }  
  
                    TokenStream tokenStream1 = analyzer.tokenStream("text1",new StringReader(doc.get("parameter1")));  
                    String p1 = highlighter.getBestFragment(tokenStream1, doc.get("parameter1"));  
                    if (p1 != null) {  
                        u.setParameter1(p1);  
                    } else {  
                        u.setParameter1(doc.get("parameter1"));  
                    }  
  
                    u.setParameter2(doc.get("parameter2"));  
                    u.setParameter3(doc.get("parameter3"));  
                    u.setParameter4(doc.get("parameter4"));  
                    result.add(u);  
  
                }  
                PageBean pb = new PageBean();  
                pb.setCurrentPage(pageNo);// 当前页  
                pb.setPageSize(pageSize);  
                pb.setAllRow(topDocs.totalHits);// hit中的记录数目  
                pb.setList(result);  
                return pb;  
  
            }  
        } catch (IOException e) {  
            // TODO Auto-generated catch block  
            e.printStackTrace();  
        } catch (InvalidTokenOffsetsException e) {  
            // TODO Auto-generated catch block  
            e.printStackTrace();  
        }  
  
        return null;  
    }  
      
    /** 
     * 删除索引 
     * @param userId 
     */  
    public void deleIndex(String userId){  
          
        try {  
            File f = new File(dirPath);  
            directory = FSDirectory.open(f);  
            IndexReader reader = IndexReader.open(directory,false);   
            Term term = new Term("userId", userId);   
            reader.deleteDocuments(term);  
            reader.close();   
        } catch (IOException e) {  
            // TODO Auto-generated catch block  
            e.printStackTrace();  
        }  
          
          
    }  
  
}

 

高亮设置集成抽取成一个方法

public String toHighlighter(Query query,Document doc,String field){
        try {
            SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
            Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query));
            TokenStream tokenStream1 = analyzer.tokenStream("text",new StringReader(doc.get(field)));
            String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field));
           
            return highlighterStr == null ? doc.get(field):highlighterStr;
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (InvalidTokenOffsetsException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return null;
}
 

你可能感兴趣的:(apache,bean,搜索引擎,F#,Lucene)