1. 在eclipse中安装配置lucene
a) 下载lucene-core-2.3.2.jar
b) File->new java project->properties->javabuild path->add external jars: lucene-core-2.3.2.jar
c) 为支持中文搜索,和导入lucene-core-2.3.2.jar相同的方式导入je-analysis-1.5.3.jar
在电脑中D盘建立lucene/docs的文件夹,其中包含要被检索的文档集合
在电脑中D盘建立index文件夹,用于存储索引
2. 索引相关的程序
a) 建立索引
package index; import java.io.File; import java.io.FileReader; import java.io.IOException; import jeasy.analysis.MMAnalyzer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.LockObtainFailedException; public class Create { static String docDirPath = "D://lucene/docs"; static String indexDirPath = "D://lucene/index"; /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub File docDir = new File(docDirPath); File indexDir = new File(indexDirPath); File[] docFiles = docDir.listFiles(); //StandardAnalyzer analyzer = new StandardAnalyzer(); Analyzer analyzer = new MMAnalyzer(); try { IndexWriter writer = new IndexWriter(indexDir,analyzer,true); for(int i=0; i<docFiles.length; i++){ if(docFiles[i].isFile() && docFiles[i].getName().endsWith(".txt")){ Document document = new Document(); document.add(new Field("path", docFiles[i].getCanonicalPath(), Field.Store.YES, Field.Index.UN_TOKENIZED)); document.add(new Field("contents",new FileReader(docFiles[i]))); //设置此参数,表明该文档比其他文档更重要,这时该document的所有字段都被增量,可以对field进行相同的操作以增量某个字段 document.setBoost((float)1.50); writer.addDocument(document); } } writer.optimize(); writer.close(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (LockObtainFailedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
b) 删除索引中包含某个term的document
package index; import java.io.IOException; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.StaleReaderException; import org.apache.lucene.index.Term; import org.apache.lucene.store.LockObtainFailedException; public class Delete_index_term { static String indexDirPath = "D://lucene/index"; /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub try { //索引中term的删除 IndexReader reader = IndexReader.open(indexDirPath); //此处标记删除索引中带有“liu”的文档 reader.deleteDocuments(new Term("contents", "liu")); reader.close(); //索引中document的更新:先删除该document,再添加该document } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (StaleReaderException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (LockObtainFailedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
3. 查询相关的程序
a) Term查询、phrase查询
package search; import java.io.File; import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.index.Term; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.FSDirectory; public class Term_Phrase_Search { /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub File indexDir = new File("D://lucene/index"); try { if (!indexDir.exists()) { System.out.println("The Lucene index is not exist"); return; } FSDirectory directory = FSDirectory.getDirectory(indexDir, false); IndexSearcher searcher = new IndexSearcher(directory); /*// 严格包含词语“北京邮电” PhraseQuery query = new PhraseQuery(); query.add(new Term("contents", "北京")); query.add(new Term("contents", "邮电"));*/ // 查询包含term=“北京” String queryStr = "liu"; Term term = new Term("contents",queryStr); TermQuery query = new TermQuery(term); Hits hits = searcher.search(query); if(hits.length() == 0){ System.out.println("no result matches"); return; } for (int i = 0; i < hits.length(); i++) { Document document = hits.doc(i); System.out.println("File: " + document.get("path")); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
b) Boolean查询
package search; import java.io.File; import java.io.IOException; import jeasy.analysis.MMAnalyzer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class Boolean_search { /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub try { File indexDir = new File("D://lucene/index"); if (!indexDir.exists()) { System.out.println("The Lucene index is not exist"); return; } Directory dir = FSDirectory.getDirectory(indexDir, false); IndexSearcher indexSearcher = new IndexSearcher(dir); Analyzer analyzer = new MMAnalyzer(); // Analyzer analyzer = new StandardAnalyzer(); QueryParser qp = new QueryParser("contents", analyzer); // queryParser默认是求并搜索,此处设置为求交搜索 qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query query; String[] searchWords = { "北京 liu" }; for (int i = 0; i < searchWords.length; i++) { query = qp.parse(searchWords[i]); Hits results = indexSearcher.search(query); System.out.println(results.length() + " search results for query " + searchWords[i]); } } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }