前面两篇博文已经介绍过了Lucene,大家也对Lucene有了初步了解,我呢,在这里给出我项目中的一些实际的例子,这些例子中包含了
对索引的增删改查操作,还包括关键字高亮~~~
当然这些例子建立在Lucene3.0的基础之上,是对txt文件创建的索引
好的以下是代码~~
创建索引类:
import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.SimpleFSDirectory; import org.apache.lucene.util.Version;
public class Index{
/** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { // 保存索引文件的地方 String indexDir = "C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index"; // 将要搜索TXT文件的地方 String dateDir = "C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\data"; IndexWriter indexWriter = null; // 创建Directory对象 ,FSDirectory代表待索引的文件存在磁盘上 Directory dir = new SimpleFSDirectory(new File(indexDir)); // 创建IndexWriter对象,第一个参数是Directory,第二个是分词器,第三个表示是否是创建,如果为false为在此基础上面修改,第四表示表示分词的最大值,比如说new // MaxFieldLength(2),就表示两个字一分,一般用IndexWriter.MaxFieldLength.LIMITED indexWriter = new IndexWriter(dir, new StandardAnalyzer( Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED); File[] files = new File(dateDir).listFiles(); for (int i = 0; i < files.length; i++) { Document doc = new Document(); // 创建Field对象,并放入doc对象中 doc.add(new Field("id", i+"", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("contents", new FileReader(files[i]))); doc.add(new Field("path", files[i].getAbsolutePath(), Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("indexDate", DateTools.dateToString(new Date(), DateTools.Resolution.DAY), Field.Store.YES, Field.Index.NOT_ANALYZED)); // 写入IndexWriter indexWriter.addDocument(doc); } // 查看IndexWriter里面有多少个索引 System.out.println("总共------》" + indexWriter.numDocs()); indexWriter.close();
}
}
查找索引类:
import java.io.File; import java.util.Date; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * @author ht * 查询 * */ public class Seacher { private static String INDEX_DIR = "C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index";//索引所在的路径 private static String KEYWORD = " ";//关键词 private static int TOP_NUM = 10;//显示前10条结果 public static void main(String[] args) throws Exception { File indexDir = new File(INDEX_DIR); if (!indexDir.exists() || !indexDir.isDirectory()) { throw new Exception(indexDir +" 该目录不存在~~"); } search(indexDir, KEYWORD);//调用search方法进行查询 } /**查询 * @param indexDir * @param q * @throws Exception */ public static void search(File indexDir, String q) throws Exception { IndexSearcher is = new IndexSearcher(FSDirectory.open(indexDir),true);//read-only String field = "contents"; QueryParser parser = new QueryParser(Version.LUCENE_30, field, new StandardAnalyzer(Version.LUCENE_30));//有变化的地方 Query query = parser.parse(q); TopScoreDocCollector collector = TopScoreDocCollector.create(TOP_NUM , false);//有变化的地方 long start = new Date().getTime();// start time is.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int i = 0; i < hits.length; i++) { Document doc = is.doc(hits[i].doc);//new method is.doc() System.out.println("路径:"+doc.get("path")+"---------------------"+doc.getField("path")+"\n创建时间:"+doc.get("indexDate")); } long end = new Date().getTime();//end time System.out.println("\n找到" + collector.getTotalHits() + "个结果,总共花费时间 : " + (end - start)+"毫秒" ); } }
增加索引:
import java.io.File; import java.io.IOException; import java.util.Date; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /* * 添加索引 * * */ public class addIndex { public static void main(String s[]) throws IOException, ParseException { Directory dir=FSDirectory.open(new File("C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index")); IndexWriter iw=new IndexWriter(dir, new StandardAnalyzer( Version.LUCENE_30), false, IndexWriter.MaxFieldLength.UNLIMITED);//注意在执行删除索引时IndexWriter中的Boolean属性必须为false;否则其他索引会被删除(true为重新建立索引,false为不重新建索引) Document doc=new Document(); doc.add(new Field("contents", "java", Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("path", "新添加的路径www.baidu.com", Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("indexDate", DateTools.dateToString(new Date(), DateTools.Resolution.DAY), Field.Store.YES, Field.Index.NOT_ANALYZED)); iw.addDocument(doc); IndexReader id=IndexReader.open(dir);// System.out.println("总共索引数"+id.maxDoc()); iw.commit(); iw.close(); System.out.println("删除条数:"+id.numDeletedDocs()); } }
删除索引:
import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /* * 删除索引 * * */ public class deleteIndex { public static void main(String s[]) throws IOException, ParseException { Directory dir=FSDirectory.open(new File("C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index")); IndexWriter iw=new IndexWriter(dir, new StandardAnalyzer( Version.LUCENE_30), false, IndexWriter.MaxFieldLength.UNLIMITED);//注意在执行删除索引时IndexWriter中的Boolean属性必须为false;否则其他索引会被删除(true为重新建立索引,false为不重新建索引) QueryParser qp =new QueryParser(Version.LUCENE_30,"contents",new StandardAnalyzer(Version.LUCENE_30)); Query p=qp.parse("新添加的contents"); iw.deleteDocuments(p); IndexReader id=IndexReader.open(dir);// System.out.println("总共索引数"+id.maxDoc()); iw.commit(); System.out.println("删除条数:"+id.numDeletedDocs()); } }
更新索引:
import java.awt.font.OpenType; import java.io.File; import java.io.IOException; import java.util.Date; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /* * 更新索引 * * */ public class updateIndex { public static void main(String s[]) throws IOException, ParseException { Directory dir = FSDirectory.open(new File( "C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index")); IndexSearcher is = new IndexSearcher(dir, true);// read-only IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer( Version.LUCENE_30), false, IndexWriter.MaxFieldLength.UNLIMITED);// 注意在执行删除索引时IndexWriter中的Boolean属性必须为false;否则其他索引会被删除(true为重新建立索引,false为不重新建索引) //新建要拿来替换的doc Document doc = new Document(); doc.add(new Field("contents", "android", Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("path", "新添加的路径www.baidu.com android", Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("indexDate", DateTools.dateToString(new Date(), DateTools.Resolution.DAY), Field.Store.YES, Field.Index.ANALYZED)); /* * 使用indexwriter中的updatedocument不知道为什么。只添加,不会更新 * * 所以这里只好采用 “先删除,再添加” 的方式就行更新操作了 */ long start = new Date().getTime();// start time ///查找要被替换的所有对应的项 QueryParser qp = new QueryParser(Version.LUCENE_30, "contents", new StandardAnalyzer(Version.LUCENE_30)); Query p = qp.parse("java"); /* * 查找模块,查找要更新的项 */ TopScoreDocCollector collector = TopScoreDocCollector.create(10, false);// 有变化的地方 is.search(p, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // //删除所有查到的,只添加一条 // if (hits.length > 0) // {// 控制添加,如果有要更新的项,就进行,没有就不进行 // /* // * 这里有个问题要注意:当搜索出多个要更新的项,,将所有的项都删除了,但是就添加了一个,这里可以用id唯一标示来解决这个问题, // * 也可以用hits数组来循环删除添加,来解决这个问题,如注释1 // * 但是呢,对于数据库操作时,基本不会出现这样的问题,因为数据库中有id项,直接对id进行查询就行了,不会出现重复项 // */ // // 删除操作 // iw.deleteDocuments(p); // // 添加操作 // iw.addDocument(doc); // // } /*注释1 删除几条,添加几条*/ if(hits.length > 0){//如果有更新项才进行操作,否则不进行 iw.deleteDocuments(p);//删除只执行一次就行,就可以将所有的删除 for(int i=0 ; i < hits.length ;i++){ iw.addDocument(doc); } } iw.optimize();//优化索引 iw.close(); IndexReader id = IndexReader.open(dir); System.out.println("总共索引数" + id.maxDoc()); long end = new Date().getTime();// end time System.out.println("删除条数:" + id.numDeletedDocs() + "总共花费时间:" + (end - start)); } }
关键字高亮:
import java.io.File; import java.io.IOException; import java.io.StringReader; import java.util.Date; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class HighLighter { /** * 高亮设置 * * @param query * @param doc * @param field * @return */ private static String INDEX_DIR = "C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index";// 索引所在的路径 private static String KEYWORD = "android";// 关键词 private static int TOP_NUM = 100;// 显示前100条结果 private static String toHighlighter(Query query, Document doc, String field) {// 关键字高亮显示 try { SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter( "<font color=\"red\">", "</font>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); TokenStream tokenStream1 = analyzer.tokenStream("path", new StringReader(doc.get(field))); String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field)); return highlighterStr == null ? doc.get(field) : highlighterStr; } catch (IOException e) { // TODO Auto-generated catch block System.out.println(e); } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block System.out.println(e); } return null; } public static void main(String[] args) throws Exception { File indexDir = new File(INDEX_DIR); if (!indexDir.exists() || !indexDir.isDirectory()) { throw new Exception(indexDir + " 该目录不存在~~"); } search(indexDir, KEYWORD);// 调用search方法进行查询 } /** * 查询 * * @param indexDir * @param q * @throws Exception */ public static void search(File indexDir, String q) throws Exception { IndexSearcher is = new IndexSearcher(FSDirectory.open(indexDir), true);// read-only String field = "contents"; QueryParser parser = new QueryParser(Version.LUCENE_30, field, new StandardAnalyzer(Version.LUCENE_30));// 有变化的地方 Query query = parser.parse(q); TopScoreDocCollector collector = TopScoreDocCollector.create(TOP_NUM, false);// 有变化的地方 long start = new Date().getTime();// 计算开始时间 is.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int i = 0; i < hits.length; i++) { Document doc = is.doc(hits[i].doc);// new method is.doc() // System.out.println("路径:"+doc.get("path")+"---------------------"+doc.getField("path")+"\n创建时间:"+doc.get("indexDate")); System.out.println("高亮----------------" + toHighlighter(query, doc, "path") + "\n 创建时间:" + doc.get("indexDate")); } long end = new Date().getTime();// 计算结束时间 System.out.println("\n找到" + collector.getTotalHits() + "个结果,总共花费时间 : " + (end - start) + "毫秒"); } }
以上是我自己整理的,亲自调试过,没什么问题~~~对于更新索引操作我目前还没什么好的办法!!!希望大家批评指正!
在这里把源代码以及架包和索引例子给大家贡献出来~~~~
http://download.csdn.net/detail/zeq9069/6571589