Lucene作为当前较为强大的java全文搜索引擎,应用主要分为两步:
一、建立索引
package builder;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import java.sql.Connection;
import java.sql.Statement;
import java.sql.ResultSet;
import java.sql.DriverManager;
public class Builder {
public static ResultSet rs = null;
public static Statement statement = null;
public static Connection con = null;
public Builder() throws Exception{
}
/**
* @param args
*/
public static void main(String[] args) throws Exception{
/** 建索 */
IndexWriter indexWriter = new IndexWriter("c:/index", new StandardAnalyzer(),false);//???false时为追加索引
indexWriter.setMaxFieldLength(250000);//???设置建立索引的长度,就是对数据的前多少条建立索引
indexWriter.setMaxBufferedDocs(100);//控制写入一个新的segment前内存中保存的document的数目,设置较大的数目可以加快建索引速度
indexWriter.setMaxMergeDocs(100);// 控制一个segment中可以保存的最大document数目,值较小有利于追加索引的速度
indexWriter.setMergeFactor(100);// 控制多个segment合并的频率,值较大时建立索引速度较快,默认是10
System.out.println("开始取数据");
Class.forName("com.mysql.jdbc.Driver");
System.out.println("已装载驱动");
con = DriverManager.getConnection("jdbc:mysql://localhost:3306/testlucene?user=root&password=admin&useUnicode=true&characterEncoding=gbk");
System.out.println("已连接上数据库");
statement = con.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE,ResultSet.CONCUR_UPDATABLE);
rs = statement.executeQuery("select id,title,content from article");
System.out.println("已读取数据集");
String id,title,content;
int j=0;
while(rs.next()){
Document doc = new Document();
id = rs.getString(1);
title =rs.getString(2);
content = rs.getString(3);
if(id == null)
id = "";
if(title == null)
title = "";
if(content == null )
content = "";
doc.add(new Field("id",id,Field.Store.YES,Field.Index.UN_TOKENIZED));
doc.add(new Field("title",title,Field.Store.YES,Field.Index.TOKENIZED,Field.TermVector.YES));
doc.add(new Field("content",content,Field.Store.NO,Field.Index.TOKENIZED,Field.TermVector.YES));
//doc.add(new Field("date",date,Field.Store.COMPRESS,Field.Index.TOKENIZED));
indexWriter.addDocument(doc);
System.out.println("已添加"+(++j)+"doc,其ID为:"+id);
}
indexWriter.optimize();
rs.close();
statement.close();
con.close();
System.out.println("结束取数据");
indexWriter.close();
System.out.println("已生成所有数据索引");
}
public static void getCon() throws Exception{
}
public static void getStatement() throws Exception{
}
}
二、搜索
package search;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class Search {
private static String indexPath = "c:/index";
public static void main(String[] args)throws Exception{
Directory directory = FSDirectory.getDirectory(indexPath, false);//???boolean值为false时可以进行查询
/** 搜索 */
IndexReader reader = IndexReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(reader);
System.out.println("建立搜索引擎");
QueryParser parser = new QueryParser("content",new StandardAnalyzer());
System.out.println("建立搜索域和分析器");
Query query = parser.parse("好孩子");
System.out.println("提供搜索内容");
Hits hits = indexSearcher.search(query);
System.out.println("进行搜索并返回数据集");
for(int i=0;i<hits.length();i++){
Document doc = hits.doc(i);
System.out.println(doc.get("title"));
}
System.out.println("取得查询数据标题");
directory.close();
indexSearcher.close();
System.out.println("关闭搜索器");
}
}