lucene增量索引的简单实现

用lucene来建立搜索程序,在检索的时候效率大大的提高了,但是却以建立索引为代价,建立索引本身就是个耗内存大、时间长的过程(数据量比较大,数据少何必用lucene来建立全文检索,个人拙见),从而索引的建立就是个瓶颈,如果我们建立好索引,然后每次更新数据后重新建立索引,无疑是不合理的,为什么不能在原先索引文件的基础上再把新更新的加在上面呢?增量索引就是在建完索引的后,将数据库的最后一条记录的ID存储起来,下次建立时候将这个ID拿到,从而可以把更新的数据拿到,并把这些更新数据的索引文件加在原先的索引文件里面,下面来看个简单的例子
数据库有两个字段id和title,话不多说,直接上代码,一看便知

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

public class Index {

public static void main(String[] args) {
try {
Index index = new Index();
String path = "d:\\index";//索引文件的存放路径
String storeIdPath = "d:\\storeId.txt";//存储ID的路径
String storeId ="";
storeId = index.getStoreId(storeIdPath);
ResultSet rs = index.getResult(storeId);
index.indexBuilding(path, storeIdPath, rs);
storeId = index.getStoreId(storeIdPath);
System.out.println(storeId);//打印出这次存储起来的ID
} catch (Exception e) {
e.printStackTrace();
}
}

public ResultSet getResult(String storeId) throws Exception{
Class.forName("com.mysql.jdbc.Driver").newInstance();
String url = "jdbc:mysql://localhost:3306/ding";
String userName = "root";
String password = "ding";
Connection conn = DriverManager.getConnection(url,userName,password);
Statement stmt = conn
.createStatement();
ResultSet rs = stmt
.executeQuery("select * from newitem where id > '"+storeId+"'order by id");
return rs;
}

public boolean indexBuilding(String path,String storeIdPath, ResultSet rs) {// 把RS换成LIST原理一样

try {
Analyzer luceneAnalyzer = new StandardAnalyzer();
// 取得存储起来的ID,以判定是增量索引还是重新索引
boolean isEmpty = true;
try {
File file = new File(storeIdPath);
if (!file.exists()) {
file.createNewFile();
}
FileReader fr = new FileReader(storeIdPath);
BufferedReader br = new BufferedReader(fr);
if(br.readLine()!= null) {
isEmpty = false;
}
br.close();
fr.close();
} catch (IOException e) {
e.printStackTrace();
}

IndexWriter writer = new IndexWriter(path, luceneAnalyzer, isEmpty);//参数isEmpty是false表示增量索引
String storeId = "";
boolean indexFlag = false;
String id;
String title;
while (rs.next()) {
// for(Iterator it = list.iterator();it.hasNext();){
id = rs.getString("id");
title = rs.getString("title");
writer.addDocument(Document(id, title));
storeId = id;//将拿到的id给storeId,这种拿法不合理,这里为了方便
indexFlag = true;
}
writer.optimize();
writer.close();
if(indexFlag){
// 将最后一个的ID存到磁盘文件中
this.writeStoreId(storeIdPath, storeId);
}
return true;
} catch (Exception e) {
e.printStackTrace();
System.out.println("出错了" + e.getClass() + "\n 错误信息为: "
+ e.getMessage());
return false;
}

}


public static Document Document(String id, String title) {
Document doc = new Document();
doc.add(new Field("ID", id, Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("TITLE", title, Field.Store.YES,
Field.Index.TOKENIZED));
return doc;
}

// 取得存储在磁盘中的ID
public static String getStoreId(String path) {
String storeId = "";
try {
File file = new File(path);
if (!file.exists()) {
file.createNewFile();
}
FileReader fr = new FileReader(path);
BufferedReader br = new BufferedReader(fr);
storeId = br.readLine();
if (storeId == null || storeId == "")
storeId = "0";
br.close();
fr.close();
} catch (Exception e) {
e.printStackTrace();
}
return storeId;
}

// 将ID写入到磁盘文件中
public static boolean writeStoreId(String path,String storeId) {
boolean b = false;
try {
File file = new File(path);
if (!file.exists()) {
file.createNewFile();
}
FileWriter fw = new FileWriter(path);
PrintWriter out = new PrintWriter(fw);
out.write(storeId);
out.close();
fw.close();
b=true;
} catch (IOException e) {
e.printStackTrace();
}
return b;
}
}

这里代码写的比较简单,很多需要改进的地方,自己改进就行了,这里只是说明了增量索引的原理,望指正。
转自:[url]http://hi.baidu.com/lewutian/blog/item/e98c63d3f10f7ed3a9ec9ae2.html[/url]


这只是一种实现增量索引的方式,此方法可行,但是有人说这个很烂,我还会继续关注增量索引的。

你可能感兴趣的:(lucene)