IndexSearcher indexSearcher = new IndexSearcher(LuceneUtils.getDirectory()); // 指定所用的索引库
这句会引发线程安全问题,在全剧终 IndexSearcher只能有一个对象才可以,所以在ArticleDocumentUtils中保存一个 并且引用它。
indexSearcher为了提高效率,也是在内存中有缓存的所以需要commit才能放入索引文件数据库中
数据库优化
每次添加数据在索引文件夹下有很多小文件,为了合并小文件提高效率
//优化,合并多个小文件为一个打文件
LuceneUtils.getIndexWriter.optimize();
//配置当小文件的数量达到多少个后就自动合并为一个大文件,最小2,默认10
LucenenUtils.getIndexWriter().setMergeFactor(3);
当增加数据的时候自动触发。
Lucene.java
1 package cn.itcast._util; 2 3 import java.io.File; 4 import java.io.IOException; 5 6 import org.apache.lucene.analysis.Analyzer; 7 import org.apache.lucene.analysis.standard.StandardAnalyzer; 8 import org.apache.lucene.index.CorruptIndexException; 9 import org.apache.lucene.index.IndexWriter; 10 import org.apache.lucene.index.IndexWriter.MaxFieldLength; 11 import org.apache.lucene.store.Directory; 12 import org.apache.lucene.store.FSDirectory; 13 import org.apache.lucene.store.LockObtainFailedException; 14 import org.apache.lucene.util.Version; 15 16 public class LuceneUtils { 17 18 private static Directory directory; // 索引库目录 19 private static Analyzer analyzer; // 分词器 20 21 private static IndexWriter indexWriter; 22 23 static { 24 try { 25 // 这里应是读取配置文件得到的索引库目录 26 directory = FSDirectory.open(new File("./indexDir")); 27 analyzer = new StandardAnalyzer(Version.LUCENE_30); 28 } catch (IOException e) { 29 throw new RuntimeException(e); 30 } 31 } 32 33 /** 34 * 获取全局唯一的IndexWriter对象 35 * 36 * @return 37 */ 38 public static IndexWriter getIndexWriter() { 39 // 在第一次使用IndexWriter是进行初始化 40 if (indexWriter == null) { 41 synchronized (LuceneUtils.class) { // 注意线程安全问题 42 if (indexWriter == null) { 43 try { 44 indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED); 45 System.out.println("=== 已经初始化 IndexWriter ==="); 46 } catch (Exception e) { 47 throw new RuntimeException(e); 48 } 49 } 50 } 51 52 // 指定一段代码,会在JVM退出之前执行。 53 Runtime.getRuntime().addShutdownHook(new Thread() { 54 public void run() { 55 try { 56 indexWriter.close(); 57 System.out.println("=== 已经关闭 IndexWriter ==="); 58 } catch (Exception e) { 59 throw new RuntimeException(e); 60 } 61 } 62 }); 63 } 64 65 return indexWriter; 66 } 67 68 public static Directory getDirectory() { 69 return directory; 70 } 71 72 public static Analyzer getAnalyzer() { 73 return analyzer; 74 } 75 76 }
ArticleDocumentUtils.java
1 package cn.itcast._util; 2 3 import org.apache.lucene.document.Document; 4 import org.apache.lucene.document.Field; 5 import org.apache.lucene.document.Field.Index; 6 import org.apache.lucene.document.Field.Store; 7 import org.apache.lucene.util.NumericUtils; 8 9 import cn.itcast._domain.Article; 10 11 public class ArticleDocumentUtils { 12 13 /** 14 * 把Article转为Document 15 * 16 * @param article 17 * @return 18 */ 19 public static Document articleToDocument(Article article) { 20 Document doc = new Document(); 21 22 String idStr = NumericUtils.intToPrefixCoded(article.getId()); // 一定要使用Lucene的工具类把数字转为字符串! 23 24 doc.add(new Field("id", idStr, Store.YES, Index.NOT_ANALYZED)); // 注意:唯一标示符一般选择Index.NOT_ANALYZED 25 doc.add(new Field("title", article.getTitle(), Store.YES, Index.ANALYZED)); 26 doc.add(new Field("content", article.getContent(), Store.YES, Index.ANALYZED)); 27 28 return doc; 29 } 30 31 /** 32 * 把Document转为Article 33 * 34 * @param doc 35 * @return 36 */ 37 public static Article documentToArticle(Document doc) { 38 Article article = new Article(); 39 40 Integer id = NumericUtils.prefixCodedToInt(doc.get("id")); // 一定要使用Lucene的工具类把字符串转为数字! 41 42 article.setId(id); 43 article.setTitle(doc.get("title")); 44 article.setContent(doc.get("content")); 45 46 return article; 47 } 48 49 }
QueryResult.java
1 package cn.itcast._domain; 2 3 import java.util.List; 4 5 public class QueryResult { 6 private List list; // 一段数据列表 7 private int count; // 总记录数 8 9 public QueryResult(List list, int count) { 10 this.list = list; 11 this.count = count; 12 } 13 14 public List getList() { 15 return list; 16 } 17 18 public void setList(List list) { 19 this.list = list; 20 } 21 22 public int getCount() { 23 return count; 24 } 25 26 public void setCount(int count) { 27 this.count = count; 28 } 29 30 }
ArticleIndexDao.java
1 package cn.itcast.b_indexdao; 2 3 import java.io.IOException; 4 import java.util.ArrayList; 5 import java.util.List; 6 7 import org.apache.lucene.document.Document; 8 import org.apache.lucene.index.Term; 9 import org.apache.lucene.queryParser.MultiFieldQueryParser; 10 import org.apache.lucene.queryParser.QueryParser; 11 import org.apache.lucene.search.IndexSearcher; 12 import org.apache.lucene.search.Query; 13 import org.apache.lucene.search.TopDocs; 14 import org.apache.lucene.util.NumericUtils; 15 import org.apache.lucene.util.Version; 16 17 import cn.itcast._domain.Article; 18 import cn.itcast._domain.QueryResult; 19 import cn.itcast._util.ArticleDocumentUtils; 20 import cn.itcast._util.LuceneUtils; 21 22 public class ArticleIndexDao { 23 24 /** 25 * 保存到索引库(建立索引) 26 * 27 * @param article 28 */ 29 public void save(Article article) { 30 // 1,把Article转为Document 31 Document doc = ArticleDocumentUtils.articleToDocument(article); 32 33 // 2,添加到索引库中 34 try { 35 LuceneUtils.getIndexWriter().addDocument(doc); // 添加 36 LuceneUtils.getIndexWriter().commit(); // 提交更改 37 } catch (Exception e) { 38 throw new RuntimeException(e); 39 } 40 } 41 42 /** 43 * 删除索引 44 * 45 * Term :某字段中出现的某一个关键词(在索引库的目录中) 46 * 47 * @param id 48 */ 49 public void delete(Integer id) { 50 try { 51 String idStr = NumericUtils.intToPrefixCoded(id); // 一定要使用Lucene的工具类把数字转为字符串! 52 Term term = new Term("id", idStr); 53 54 LuceneUtils.getIndexWriter().deleteDocuments(term); // 删除所有含有这个Term的Document 55 LuceneUtils.getIndexWriter().commit(); // 提交更改 56 } catch (Exception e) { 57 throw new RuntimeException(e); 58 } 59 } 60 61 /** 62 * 更新索引 63 * 64 * @param article 65 */ 66 public void update(Article article) { 67 try { 68 Term term = new Term("id", NumericUtils.intToPrefixCoded(article.getId())); // 一定要使用Lucene的工具类把数字转为字符串! 69 Document doc = ArticleDocumentUtils.articleToDocument(article); 70 71 LuceneUtils.getIndexWriter().updateDocument(term, doc); // 更新就是先删除再添加 72 LuceneUtils.getIndexWriter().commit(); // 提交更改 73 74 // indexWriter.deleteDocuments(term); 75 // indexWriter.addDocument(doc); 76 } catch (Exception e) { 77 throw new RuntimeException(e); 78 } 79 } 80 81 /** 82 * * 搜索 用于分页的 83 * 84 * @param queryString 85 * 查询条件 86 * @param first 87 * 从结果列表的哪个索引开始获取数据 88 * @param max 89 * 最多获取多少条数据(如果没有这么多,就把剩余的都返回) 90 * 91 * @return 一段数据列表 + 符合条件的总记录数 92 */ 93 public QueryResult search(String queryString, int first, int max) { 94 IndexSearcher indexSearcher = null; 95 try { 96 // 1,把查询字符串转为Query对象(在title与content中查询) 97 QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30, new String[] { "title", "content" }, LuceneUtils.getAnalyzer()); 98 Query query = queryParser.parse(queryString); 99 100 // 2,执行查询,得到中间结果 101 indexSearcher = new IndexSearcher(LuceneUtils.getDirectory()); 102 TopDocs topDocs = indexSearcher.search(query, first + max); // 最多返回前n条数据,这里要计算好,要返回足够数量的数据 103 int count = topDocs.totalHits; // 符合条件的总记录数 104 105 // 3,处理数据 106 List<Article> list = new ArrayList<Article>(); 107 int endIndex = Math.min(first + max, topDocs.scoreDocs.length); // 计算结束的边界 108 109 for (int i = first; i < endIndex; i++) { // 应只取一段数据 110 // 根据内部编号获取真正的Document数据 111 int docId = topDocs.scoreDocs[i].doc; 112 Document doc = indexSearcher.doc(docId); 113 // 把Document转换为Article 114 Article article = ArticleDocumentUtils.documentToArticle(doc); 115 list.add(article); 116 } 117 118 // 4,封装结果并返回 119 return new QueryResult(list, count); 120 121 } catch (Exception e) { 122 throw new RuntimeException(e); 123 } finally { 124 // 关闭IndexSearcher 125 if (indexSearcher != null) { 126 try { 127 indexSearcher.close(); 128 } catch (IOException e) { 129 throw new RuntimeException(e); 130 } 131 } 132 } 133 } 134 }
不分页的查询
LuceneUtils.getIndexWriter()
1 public List<Article> searchArticle(String condition) { 2 // 执行搜索 3 List<Article> list = new ArrayList<Article>(); 4 IndexSearcher indexSearcher = null; 5 try { 6 // 1,把查询字符串转为Query对象(默认只从title中查询) 7 QueryParser queryParser = new MultiFieldQueryParser( 8 Version.LUCENE_30, new String[] { "title", "content" }, 9 LuceneUtils.getAnalyzer()); 10 Query query = queryParser.parse(condition); 11 12 // 2,执行查询,得到中间结果 13 //indexSearcher = new IndexSearcher(LuceneUtils.getDirectory()); // 指定所用的索引库,会引发线程安全问题
indexSearcher=LuceneUtils.getIndexWriter();
14 TopDocs topDocs = indexSearcher.search(query, 1000); // 最多返回前n条结果 15 int count = topDocs.totalHits; 16 System.out.println("scoreDocs.length"+topDocs.scoreDocs.length); //一样 17 System.out.println("count"+count); //一样 18 ScoreDoc[] scoreDocs = topDocs.scoreDocs; 19 20 // 3,处理结果 21 for (int i = 0; i < scoreDocs.length; i++) { 22 ScoreDoc scoreDoc = scoreDocs[i]; 23 float score = scoreDoc.score; // 相关度得分 24 int docId = scoreDoc.doc; // Document的内部编号 25 26 // 根据编号拿到Document数据 27 Document document = indexSearcher.doc(docId); 28 29 // 把Document转为Article 30 Article article=ArticleDocumentUtils.documentToArticle(document); 31 32 list.add(article); 33 } 34 } catch (Exception e) { 35 throw new RuntimeException(); 36 } finally { 37 try { 38 if (null != indexSearcher) 39 indexSearcher.close(); 40 } catch (Exception e) { 41 e.printStackTrace(); 42 } 43 } 44 return list; 45 }
ArticleIndexDaoTest.java
1 package cn.itcast.b_indexdao; 2 3 import java.util.List; 4 5 import org.junit.Test; 6 7 import cn.itcast._domain.Article; 8 import cn.itcast._domain.QueryResult; 9 10 public class ArticleIndexDaoTest { 11 12 private ArticleIndexDao indexDao = new ArticleIndexDao(); 13 14 @Test 15 public void testSave() { 16 // 准备数据 17 Article article = new Article(); 18 article.setId(1); 19 article.setTitle("准备Lucene的开发环境"); 20 article.setContent("如果信息检索系统在用户发出了检索请求后再去互联网上找答案,根本无法在有限的时间内返回结果。"); 21 22 // 放到索引库中 23 indexDao.save(article); 24 } 25 26 @Test 27 public void testSave_25() { 28 for (int i = 1; i <= 25; i++) { 29 // 准备数据 30 Article article = new Article(); 31 article.setId(i); 32 article.setTitle("准备Lucene的开发环境"); 33 article.setContent("如果信息检索系统在用户发出了检索请求后再去互联网上找答案,根本无法在有限的时间内返回结果。"); 34 35 // 放到索引库中 36 indexDao.save(article); 37 } 38 } 39 40 @Test 41 public void testDelete() { 42 indexDao.delete(1); 43 } 44 45 @Test 46 public void testUpdate() { 47 // 准备数据 48 Article article = new Article(); 49 article.setId(1); 50 article.setTitle("准备Lucene的开发环境"); 51 article.setContent("这是更新后的内容"); 52 53 // 更新到索引库中 54 indexDao.update(article); 55 } 56 //用于分页的 57 @Test 58 public void testSearch() { 59 // 准备查询条件 60 String queryString = "lucene"; 61 // String queryString = "hibernate"; 62 63 // 执行搜索 64 // QueryResult qr = indexDao.search(queryString, 0, 10000); 65 66 // QueryResult qr = indexDao.search(queryString, 0, 10); // 第1页,每页10条 67 // QueryResult qr = indexDao.search(queryString, 10, 10); // 第2页,每页10条 68 QueryResult qr = indexDao.search(queryString, 20, 10); // 第3页,每页10条 69 70 // 显示结果 71 System.out.println("总结果数:" + qr.getCount()); 72 for (Article a : (List<Article>) qr.getList()) { 73 System.out.println("------------------------------"); 74 System.out.println("id = " + a.getId()); 75 System.out.println("title = " + a.getTitle()); 76 System.out.println("content = " + a.getContent()); 77 } 78 } 79 80 }
不分页查询测试
1 @Test 2 public void testSearchArticle() { 3 // 准备查询条件 4 String queryString = "lucene的"; 5 // String queryString = "hibernate"; 6 7 // 执行搜索 8 List<Article> list =dao.searchArticle(queryString); 9 10 // 显示结果 11 System.out.println("总结果数:" + list.size()); 12 for (Article a : list) { 13 System.out.println("------------------------------"); 14 System.out.println("id = " + a.getId()); 15 System.out.println("title = " + a.getTitle()); 16 System.out.println("content = " + a.getContent()); 17 } 18 }