Lucene简单使用
最近看下Lucene的东西,把它写下来可以看下。Lucene结构和工作原理我就不说了,网上好多。
我的环境是Lucene2.0
写一个简单使用Lucene的示例。此类首创建索引,然后显示索引文档的情况,最后搜索(只在content找,和在title或content里找)。
package
net.blogjava.chenlb.lucene;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
/**
* Lucene简单使用
* @author chenlb 2008-3-8 下午11:42:55
*/
public class LuceneUse {
public static void main(String[] args) throws Exception {
LuceneUse liu = new LuceneUse();
// 索引
IndexWriter iw = new IndexWriter( " index " , new StandardAnalyzer(), true );
// 添加要索引的Lucene文档
Document doc = liu.createDoc( " Lucene创建索引示例 " , " chenlb " , " 2008-03-08 " , " Lucene索引的内容在这里,这些内容不被存储. " );
iw.addDocument(doc);
doc = liu.createDoc( " 文档2 " , " bin " , " 2007-10-03 " , " 这是索引的另一个文档 " );
iw.addDocument(doc);
doc = liu.createDoc( " 学习内容 " , " chenlb " , " 2008-3-3 " , " 要努力奋斗,祝网友们天天快乐 " );
iw.addDocument(doc);
iw.optimize(); // 优化
iw.close();
// 读
System.out.println( " ===========索引文档内容============= " );
IndexReader reader = IndexReader.open( " index " );
for ( int i = 0 ; i < reader.numDocs(); i ++ ) {
Document d = reader.document(i);
liu.printDoc(d);
}
System.out.println( " ===========以下是单域查找'天天'结果============ " );
// 单域搜索
IndexSearcher searcher = new IndexSearcher( " index " );
QueryParser parser = new QueryParser( " content " , new StandardAnalyzer());
Query q = parser.parse( " 天天 " );
long start = System.currentTimeMillis();
Hits hits = searcher.search(q);
long end = System.currentTimeMillis();
for ( int i = 0 ; i < hits.length(); i ++ ) {
liu.printDoc(hits.doc(i));
}
System.out.println( " 共找到: " + hits.length() + " 个文档,花了: " + (end - start) + " ms " );
// 多域搜索
System.out.println( " ===========以下多域是查找'内容'结果============ " );
// 从title或content找
q = MultiFieldQueryParser.parse( " 内容 " , new String[] { " title " , " content " }, new BooleanClause.Occur[] {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD}, new StandardAnalyzer());
start = System.currentTimeMillis();
hits = searcher.search(q);
end = System.currentTimeMillis();
for ( int i = 0 ; i < hits.length(); i ++ ) {
liu.printDoc(hits.doc(i));
}
System.out.println( " 共找到: " + hits.length() + " 个文档,花了: " + (end - start) + " ms " );
}
/**
* 显示文档内容
*/
private void printDoc(Document d) {
System.out.println( " 标题: " + d.get( " title " ) + " , 作者: " + d.get( " author " ) + " , 日期: " + d.get( " date " ) + " , 内容: " + d.get( " content " ));
}
/**
* 创建一个Lucene文档
*/
private Document createDoc(String title, String author, String date, String content) {
Document doc = new Document();
doc.add( new Field( " title " , title, Field.Store.YES, Field.Index.TOKENIZED));
doc.add( new Field( " author " , author, Field.Store.YES, Field.Index.NO));
doc.add( new Field( " date " , date, Field.Store.YES, Field.Index.NO));
doc.add( new Field( " content " , content, Field.Store.YES, Field.Index.TOKENIZED));
return doc;
}
}
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
/**
* Lucene简单使用
* @author chenlb 2008-3-8 下午11:42:55
*/
public class LuceneUse {
public static void main(String[] args) throws Exception {
LuceneUse liu = new LuceneUse();
// 索引
IndexWriter iw = new IndexWriter( " index " , new StandardAnalyzer(), true );
// 添加要索引的Lucene文档
Document doc = liu.createDoc( " Lucene创建索引示例 " , " chenlb " , " 2008-03-08 " , " Lucene索引的内容在这里,这些内容不被存储. " );
iw.addDocument(doc);
doc = liu.createDoc( " 文档2 " , " bin " , " 2007-10-03 " , " 这是索引的另一个文档 " );
iw.addDocument(doc);
doc = liu.createDoc( " 学习内容 " , " chenlb " , " 2008-3-3 " , " 要努力奋斗,祝网友们天天快乐 " );
iw.addDocument(doc);
iw.optimize(); // 优化
iw.close();
// 读
System.out.println( " ===========索引文档内容============= " );
IndexReader reader = IndexReader.open( " index " );
for ( int i = 0 ; i < reader.numDocs(); i ++ ) {
Document d = reader.document(i);
liu.printDoc(d);
}
System.out.println( " ===========以下是单域查找'天天'结果============ " );
// 单域搜索
IndexSearcher searcher = new IndexSearcher( " index " );
QueryParser parser = new QueryParser( " content " , new StandardAnalyzer());
Query q = parser.parse( " 天天 " );
long start = System.currentTimeMillis();
Hits hits = searcher.search(q);
long end = System.currentTimeMillis();
for ( int i = 0 ; i < hits.length(); i ++ ) {
liu.printDoc(hits.doc(i));
}
System.out.println( " 共找到: " + hits.length() + " 个文档,花了: " + (end - start) + " ms " );
// 多域搜索
System.out.println( " ===========以下多域是查找'内容'结果============ " );
// 从title或content找
q = MultiFieldQueryParser.parse( " 内容 " , new String[] { " title " , " content " }, new BooleanClause.Occur[] {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD}, new StandardAnalyzer());
start = System.currentTimeMillis();
hits = searcher.search(q);
end = System.currentTimeMillis();
for ( int i = 0 ; i < hits.length(); i ++ ) {
liu.printDoc(hits.doc(i));
}
System.out.println( " 共找到: " + hits.length() + " 个文档,花了: " + (end - start) + " ms " );
}
/**
* 显示文档内容
*/
private void printDoc(Document d) {
System.out.println( " 标题: " + d.get( " title " ) + " , 作者: " + d.get( " author " ) + " , 日期: " + d.get( " date " ) + " , 内容: " + d.get( " content " ));
}
/**
* 创建一个Lucene文档
*/
private Document createDoc(String title, String author, String date, String content) {
Document doc = new Document();
doc.add( new Field( " title " , title, Field.Store.YES, Field.Index.TOKENIZED));
doc.add( new Field( " author " , author, Field.Store.YES, Field.Index.NO));
doc.add( new Field( " date " , date, Field.Store.YES, Field.Index.NO));
doc.add( new Field( " content " , content, Field.Store.YES, Field.Index.TOKENIZED));
return doc;
}
}