搜索引擎主要包含以下三点:
1.索引
2.分词
3.搜索
作为一个网站,他还包括前台和后台,lucene的开发就属于后台应用。
以下是我写的一个小程序:
package org.itat.test;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
/**
*lucene3.5中主要包含以下三部分:
*一、建立索引
*二、分词部分
*三、搜索部分
**/
public class hellolucene {
/**
* 建立索引
*/
public void index(){
IndexWriter writer=null;
try{
//1、创建Directory
//Directory directory=new RAMDirectory();将索引建立在内存中
Directory directory=FSDirectory.open(new File("d:/lucene/index01"));//将索引建立在硬盘中
//2、创建IndexWriter writer
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35));
writer = new IndexWriter(directory,iwc);
//3、创建Document对象
Document doc = null;
//4、为Document添加Field
File f = new File("d:/lucene/example");//指定搜索路径
for(File file:f.listFiles()){
doc = new Document();
doc.add(new Field("content",new FileReader(file)));
doc.add(new Field("filename",file.getName(),Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("path",file.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));
//5、通过IndexWriter添加文档到索引中
writer.addDocument(doc);
}
}catch(CorruptIndexException e){
e.printStackTrace();
}catch(LockObtainFailedException e){
e.printStackTrace();
}catch(IOException e){
e.printStackTrace();
}finally{
try{
if(writer!=null) writer.close();
}catch(CorruptIndexException e){
e.printStackTrace();
}catch(IOException e){
e.printStackTrace();
}
}
}
/**
* 搜索
* @throws ParseException
*/
public void searcher(){
try{
//1.创建Directory
Directory directory=FSDirectory.open(new File("d:/lucene/index01"));
//2.创建IndexReader
IndexReader reader = IndexReader.open(directory);
//3.根据IndexReader创建IndexSearcher
IndexSearcher searcher = new IndexSearcher(reader);
//4.创建搜索的Query
//创建parser来确定搜索的内容
QueryParser parser = new QueryParser(Version.LUCENE_35,"content",new StandardAnalyzer(Version.LUCENE_35));
Query query = parser.parse("事件");
//5.根据search搜索并且返回TopDocs
TopDocs tds = searcher.search(query, 10);
//6.根据TopDocs获取ScoreDoc对象
ScoreDoc[] sds=tds.scoreDocs;
for(ScoreDoc sd:sds){
//7.根据search和ScordDoc对象获取具体的Document对象
Document d=searcher.doc(sd.doc);
//8.根据Document对象获取需要的值
System.out.println(d.get("filename")+"["+d.get("path")+"]");
}//关闭reader
reader.close();
} catch(CorruptIndexException e){
e.printStackTrace();
} catch(IOException e){
e.printStackTrace();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
package org.itat.test;
import org.junit.Test;
public class testlucene {
@Test
public void testIndex(){
hellolucene hl=new hellolucene();
hl.index();
}
@Test
public void testSearch(){
hellolucene hl=new hellolucene();
hl.searcher();
}
}