lucene 索引小示例

//---------------------------创建索引
public class IndexWrite {
    
    
    private IndexWrite() {}
    
    public static String index () throws IOException {
        
        //要创建索引目录:F:/search/index_create
        File indexDirectory = new File (Common.INDEX_PATH + Common.INDEX_CREATE_DIRECTORY);
        
        //判断目录是否从在
        if (indexDirectory.exists()) {
            return "此目录 ("+Common.INDEX_PATH + Common.INDEX_CREATE_DIRECTORY+ ")已经存在,如果要重新创建索引,请删掉此目录!";
        }
        
        //创建索引目录
        indexDirectory.mkdirs();
        
        //获得创建索引的路径
        Directory directory = FSDirectory.open(indexDirectory);
        
        //获得要查询数据的接口
        CompanyService iCompany = (CompanyService) BeanFactory.getBean("iCompany");
        
        //查询数据的结果集总数
        int count = iCompany.queryCount();
        
        //结果集总数大于零创建索引
        if(count>0) {
            
            //结果集总数太大,分批创建索引 createNum为创建索引循环的次数
            int createNum = count / Common.RECORD_SIZE;
            if (count % Common.RECORD_SIZE > 0) {
                createNum = createNum + 1;
            }
            
            //创建索引的开始时间
            long starttime = System.currentTimeMillis();
            
            //分批次循环创建索引
            for (int i=1; i<createNum+1; i++) {
                
                //创建硬盘索引
                IndexWriter FSDWriter = null;        
                if (i == 1) {
                    /*
                     * directory:要创建索引的路径
                     * new IKAnalyzer():分词器
                     * true:新建索引, false是在原有的基础上累加
                     * IndexWriter.MaxFieldLength.LIMITED:索引字段的最大限制长度
                     */
                    FSDWriter = new IndexWriter(directory, new IKAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
                } else {
                    FSDWriter = new IndexWriter(directory, new IKAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED);
                }
                
                //创建内存索引
                IndexWriter RAMWriter = new IndexWriter(new RAMDirectory(), new IKAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
                
                //开始创建内容索引
                Directory RAMDirectory = RAMIndexDirectory(RAMWriter, i, count, iCompany);
                
                //关闭内容索引
                RAMWriter.close();
                
                //将内存索引添加到硬盘索引
                FSDWriter.addIndexesNoOptimize(new Directory[] {RAMDirectory});
                
                //关闭硬盘索引
                FSDWriter.close();
            }
            
            //开始做文件夹替换
            replaceFolder();
            long endtime = System.currentTimeMillis();
            return "创建索引耗时:" + (endtime - starttime) + "毫秒";
        } else {
            return "没有要创建索引的记录。";
        }
        
        
    }
    
    
    
    public static Directory RAMIndexDirectory (IndexWriter RAMWriter, int createNum, int count, CompanyService iCompany) throws CorruptIndexException, IOException {
        
        //每次循环的开始记录
        int start = (createNum - 1) * Common.RECORD_SIZE;
        if (start != 0) {
            start = start + 1;
        }
        
        //每次循环的末记录
        int end = Math.min(start + Common.RECORD_SIZE, count);
        
        //查询结果集
        List<Company> list = iCompany.query(start, end);
        
        //循环索引并添加到内存
        for (int i=0; i<list.size(); i++) {
            Company company = list.get(i);
            RAMWriter.addDocument(FileDocument.document(company.getTitle(), company.getContent(), company.getUrl(), Common.gmdate("yyyy-MM-dd", company.getLastdate(), "8")));
            System.out.println(i++);
        }
        
        //对索引进行优化
        RAMWriter.optimize();
        return RAMWriter.getDirectory();
    }
    
    
    public static void replaceFolder () {
        
        //复制索引路径
        File indexCopy = new File(Common.INDEX_PATH + Common.INDEX_COPY_DIRECTORY);
        
        //查询索引路径
        File index = new File(Common.INDEX_PATH + Common.INDEX_DIRECTORY);
        
        //创建索引类型
        File indexCreate = new File(Common.INDEX_PATH + Common.INDEX_CREATE_DIRECTORY);
        
        //indexCopy存在 删除, 将index 命名为indexCopy, 将indexCreate 命名为index
        if (indexCopy.exists()) {
            deleteDiretory(indexCopy);
            index.renameTo(indexCopy);
            indexCreate.renameTo(index);
        } else {
            index.renameTo(indexCopy);
            indexCreate.renameTo(index);
        }
    }
    
    //删除文件,目录
    public static void deleteDiretory (File indexCopy) {
        File[] files = indexCopy.listFiles();
        for (int i=0; i>files.length; i++) {
            if (files[i].isDirectory()) {
                deleteDiretory(files[i]);
            } else {
                files[i].delete();
            }
        }
    }
    
    
}



//-------------------------------查询
public class IndexRead {

    private IndexRead () {};
    
    public static void search (HttpServletRequest request, String keyword, String pageNo) throws CorruptIndexException, IOException, ParseException, InvalidTokenOffsetsException {
        
        //查询结果总数
        int searchTotals = 0;
        
        if (!Common.empty(keyword)) {
            if (keyword.contains("%")) {
                keyword = Common.decode(keyword);
            }
        }
        
        //当前页码
        int pageNum;
        if (Common.empty(pageNo)) {
            pageNum = 1;
        } else {
            keyword = new String (keyword.getBytes("ISO-8859-1"), "utf-8");
        }
        
        try {
            int i = Integer.parseInt(pageNo);
            if (i < 0) {
                pageNum = 1;
            } else {
                pageNum = i;
            }
        } catch (NumberFormatException e) {
            pageNum = 1;
        }
        
        
        Date start = new Date();
        
        //创建索引搜索器
        Searcher searcher = new IndexSearcher(FSDirectory.open(new File(Common.INDEX_PATH + "index")));
        
        //创建多条件索引分析器
        QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT, new String[] {"title", "content"}, new IKAnalyzer());
        
        //创建查询
        Query query = parser.parse(keyword);

        
        //排序
        Sort sort = new Sort();
        SortField sortField = new SortField("lastTime", SortField.STRING, true);
        sort.setSort(sortField);

        //结果集 searcher.maxDoc():查询的结果总数, false代表查询的结果不排序, true是排序
        TopScoreDocCollector collector = TopScoreDocCollector.create(searcher.maxDoc(), false);
        
        //开始查询
        searcher.search(query, collector);
        
        //查询到得所有记录
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        
        //记录总数
        searchTotals = collector.getTotalHits();
        
        if (searchTotals <= 0) {
            request.setAttribute("message", "没有您要查找的内容");
        }
        
        //对查询到的结果分页
        PageBean pageBean = new PageBean(pageNum, searchTotals, "ss.action?keyword="+(keyword.contains("%") ? keyword:Common.encode(keyword)), 0, 0);
        
        //设置高亮显示
        Highlighter highlighter = new Highlighter (new SimpleHTMLFormatter("<b><font color=\"red\">", "</font></b>"), new QueryScorer(query));
        highlighter.setTextFragmenter(new SimpleFragmenter(100));
        
        List<Company> list = new ArrayList<Company>();
        
        int endNo = Math.min(pageBean.getStartRecord()+pageBean.getPageSize(), collector.getTotalHits());
        for (int i=pageBean.getStartRecord(); i<endNo; i++) {
            Document doc = searcher.doc(hits[i].doc);
            Company company = new Company();
            StringBuffer bufferTitle = new StringBuffer();
            TokenStream sTokenStreamTitle = new IKAnalyzer().tokenStream("title", new StringReader(doc.get("title")));
            String[] titles = highlighter.getBestFragments(sTokenStreamTitle, doc.get("title"), 50);
            if (titles.length == 0) {
                company.setTitle(doc.get("title").substring(0, Math.min(50, doc.get("title").length())));
            } else {
                for (int j=0; j<titles.length; j++) {
                    bufferTitle.append(titles[j]);
                }
                company.setTitle(bufferTitle.toString());
            }
            
            StringBuffer bufferContent = new StringBuffer();
            TokenStream sTokenStreamContent = new IKAnalyzer().tokenStream("content", new StringReader(doc.get("content")));
            String[] contents = highlighter.getBestFragments(sTokenStreamContent, doc.get("content"), 50);
            if (contents.length == 0) {
                company.setContent(doc.get("content").substring(0, Math.min(50, doc.get("content").length())));
            } else {
                for (int j=0; j<contents.length; j++) {
                    bufferContent.append(contents[j]);
                }
                company.setContent(bufferContent.toString());
            }
            
            company.setUrl(doc.get("url"));
            list.add(company);
        }
        Date end = new Date();
        request.setAttribute("keyword", keyword);
        request.setAttribute("page", pageBean);
        request.setAttribute("pageStr", pageBean.toString());
        request.setAttribute("date", end.getTime() - start.getTime());
        request.setAttribute("result", list);
    }

}

 

你可能感兴趣的:(jsp,Hibernate,F#,Lucene,J#)