来自于本人博客: lucene构建restful风格的简单搜索引擎服务
本人的博客现在也要改成使用lucene进行全文检索的功能,因此在这里把代码贴出来与大家分享
一,目录结构:
二,配置文件:
总共有四个配置文件:bonecp-config.xml,IKAnalyzer.cfg.xml,log4j.properties,system-config.xml
1.bonecp-config.xml是配置jdbc连接池用的,不用这个配置也行,bonecp包有默认配置
2.IKAnalyzer.cfg.xml是IKAnalyzer分词要用的字典配置文件
这里也可以不用配置IK Analyzer 扩展配置 /data/lucene/dict/1_dict.txt;/data/lucene/dict/2_dict.txt;/data/lucene/dict/3_dict.txt;/data/lucene/dict/4_dict.txt;/data/lucene/dict/5_dict.txt;/data/lucene/dict/6_dict.txt;/data/lucene/dict/7_dict.txt;/data/lucene/dict/8_dict.txt;
3.log4j.properties这个不用多说了
4.system-config.xml是一些系统的配置参数
3306 test test 6 3600 com.mysql.jdbc.Driver 1800 300 jdbc:mysql://localhost/blog?characterEncode=UTF-8 /data/lucene/index /data/lucene/index/recommendNet 10 10000
三,监听器SystemStartupListener,实现了ServletContextListener
package com.blog.listener;
import java.io.File;
import java.net.URL;
import java.sql.SQLException;
import java.util.List;
import javax.servlet.ServletContextEvent;
import javax.servlet.ServletContextListener;
import org.apache.log4j.Logger;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import com.blog.db.DBFactory;
import com.blog.search.BlogSearch;
import com.blog.search.index.BlogIndex;
public class SystemStartupListener implements ServletContextListener {
private static Logger log = Logger.getLogger(SystemStartupListener.class);
public void contextDestroyed(ServletContextEvent arg0) {
DBFactory.shutDown();
}
public void contextInitialized(ServletContextEvent arg0) {
SAXReader reader = new SAXReader();
try {
URL url = this.getClass().getClassLoader().getResource("system-config.xml");
String path = url.getFile();
Document doc = reader.read(new File(path));
Element rootEle = doc.getRootElement();
List list = rootEle.elements("mysql");
if(list.size() > 0) {
Element mysqlEle = (Element) list.get(0);
if(null != mysqlEle) {
String host = mysqlEle.elementText("host");
String port = mysqlEle.elementText("port");
String user = mysqlEle.elementText("user");
String password = mysqlEle.elementText("password");
Integer partitionCount = Integer.parseInt(mysqlEle.elementText("partitionCount"));
Integer maxWait = Integer.parseInt(mysqlEle.elementText("maxWait"));
String driverClass = mysqlEle.elementText("driverClass");
Integer idleMaxAge = Integer.parseInt(mysqlEle.elementText("idleMaxAge"));
Integer idleConnectionTestPeriod = Integer.parseInt(mysqlEle.elementText("idleConnectionTestPeriod"));
DBFactory.init(driverClass, host, user, password, partitionCount, maxWait, idleMaxAge, idleConnectionTestPeriod);
}
} else {
throw new RuntimeException("初始化失败....");
}
list = rootEle.elements("search");
if(list.size() > 0) {
Element searchEle = (Element) list.get(0);
String indexPath = searchEle.elementText("indexPath"); //索引文件的存放位置
String searcNum = searchEle.elementText("searcNum"); //一次搜索结果数
String resultNum = searchEle.elementText("resultNum");
String recommendNetIndexPath = searchEle.elementText("recommendNetIndexPath");
System.setProperty("searcNum", searcNum);
System.setProperty("resultNum", resultNum);
System.setProperty("indexFilePath", indexPath);
System.setProperty("recommendNetIndexPath", recommendNetIndexPath);
BlogIndex.buildIndex(recommendNetIndexPath);
} else {
throw new RuntimeException("初始化失败....");
}
log.info("初始化搜索.....");
BlogSearch.init();
} catch (DocumentException e) {
log.error("解析配置文件出错.....",e);
} catch(Exception e) {
log.error("出现未知错误....",e);
}
}
}
四,util包中的Constant常量类
package com.blog.util;
public class Constant {
public static final Integer searcNum = Integer.parseInt(System.getProperty("searcNum"));
public static final Integer resultNum = Integer.parseInt(System.getProperty("resultNum"));
}
util包中的DataToJson类:
package com.blog.util;
import java.util.List;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
public class DataToJson {
public static String parseDataToJson(List ids, int totalCount) {
JsonObject json = new JsonObject();
json.addProperty("totalCount", totalCount);
JsonArray array = new JsonArray();
if(ids.size() > 0) {
for(Long id : ids) {
JsonObject obj = new JsonObject();
obj.addProperty("id", id);
array.add(obj);
}
}
json.add("data", array);
return json.toString();
}
}
五,entity包中的实体类:
Dashboard:
package com.blog.search.entity;
public class Dashboard {
private Long id;
private String content;
private String title;
public Long getId() {
return id;
}
public void setId(Long id) {
this.id = id;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
}
六,lucene相关的索引和检索类:
index包中的BlogIndex:
package com.blog.search.index;
import java.io.File;
import java.io.IOException;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
import com.blog.search.entity.Dashboard;
public class BlogIndex {
private static final String indexFilePath = System.getProperty("indexFilePath");
private static Logger log = Logger.getLogger(BlogIndex.class);
public BlogIndex() {
}
//这个方法在没有索引的时候需要在初始化时调用
public static void buildIndex(String path) {
File file = new File(path);
if(file.isDirectory() && file.listFiles().length == 0){
Directory dir;
try {
dir = FSDirectory.open(new File(path));
Analyzer analyzer = new IKAnalyzer(true);
//配置类
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer);
iwc.setOpenMode(OpenMode.CREATE);
IndexWriter writer = new IndexWriter(dir, iwc);
writer.deleteAll();
writer.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
@SuppressWarnings("deprecation")
private Document getDocument(Dashboard dashboard) throws Exception {
Document doc = new Document();
doc.add(new Field("title", dashboard.getTitle(), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("content", dashboard.getContent(),Field.Store.NO,Field.Index.ANALYZED));
Field idField = new StringField("id",dashboard.getId().toString(), Field.Store.YES);
doc.add(idField);
return doc;
}
public void writeToIndex(Dashboard dashboard) throws Exception {
Document doc = getDocument(dashboard);
IndexWriter writer = null;
try {
Directory dir = FSDirectory.open(new File(indexFilePath));
//分析器
Analyzer analyzer = new IKAnalyzer(true);
//配置类
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer);
writer = new IndexWriter(dir, iwc);
} catch(Exception e) {
e.printStackTrace();
}
writer.addDocument(doc);
writer.commit();
writer.close();
}
public void deleteIndex(Long id) {
IndexWriter writer = null;
try {
Directory dir = FSDirectory.open(new File(indexFilePath));
Analyzer analyzer = new IKAnalyzer(true);
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer);
writer = new IndexWriter(dir, iwc);
writer.deleteDocuments(new Term("id",id.toString()));
writer.commit();
} catch(Exception e) {
log.error("删除索引出错.....");
} finally {
if(writer != null) {
try {
writer.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
public void updateIndex(Dashboard dashboard) throws Exception {
Document doc = getDocument(dashboard);
IndexWriter writer = null;
try {
Directory dir = FSDirectory.open(new File(indexFilePath));
//分析器
Analyzer analyzer = new IKAnalyzer(true);
//配置类
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer);
//iwc.setOpenMode(OpenMode.CREATE);
writer = new IndexWriter(dir, iwc);
} catch(Exception e) {
e.printStackTrace();
}
writer.updateDocument(new Term("id", dashboard.getId().toString()), doc);
writer.commit();
writer.close();
}
}
七,search包下面的BlogSearch类:
package com.blog.search;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.QueryParser.Operator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
import com.blog.util.Constant;
import com.blog.util.DataToJson;
public class BlogSearch {
private static Logger log = Logger.getLogger(BlogSearch.class);
private static final String indexFilePath = System.getProperty("indexFilePath");
private static String[] field = {"title","content"};
private IndexSearcher searcher;
//存储初始化的IndexReader,节省每次重新打开索引文件的性能开销
private static Map readers = new ConcurrentHashMap();
private static Object lock = new Object();
public static void init() {
try {
IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexFilePath)));
readers.put("blogsearch", reader);
log.info(readers.toString());
} catch (IOException e) {
log.error("初始化搜索器出错.......",e);
}
}
public TopDocs search(String keyword) {
try {
Analyzer analyzer = new IKAnalyzer(true);
QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_43, field,analyzer);
parser.setDefaultOperator(Operator.AND);
// 将关键字包装成Query对象
Query query = parser.parse(keyword);
//加锁为了防止在一个线程读取IndexReader之后,但是还没有执行查询之前,索引改变了,
//导致IndexReader对象被关闭后重新创建,可能导致关闭异常的问题
synchronized(lock) {
IndexReader reader = readers.get("blogsearch");
IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader)reader);
if(newReader == null) { //如果为空,表示索引没有变化
newReader = reader;
} else {
readers.put("blogsearch", newReader);
reader.close();
}
searcher = new IndexSearcher(newReader);
}
//newReader = DirectoryReader.open(FSDirectory.open(new File(indexFilePath)));
TopDocs results = searcher.search(query, Constant.resultNum);
return results;
} catch(Exception e) {
log.error("搜索关键字出错......",e);
return null;
}
}
public String getResult(String keyword, int pageSize) {
TopDocs td = search(keyword);
int totalCount = td.totalHits;
ScoreDoc[] h = td.scoreDocs;
List ids = new ArrayList(h.length);
if(h.length == 0) {
log.debug("no result data");
} else {
int start = Constant.searcNum*(pageSize - 1);
int end = Constant.searcNum*pageSize;
if(start >= totalCount) {
start = 0;
end = totalCount;
}
if(end > totalCount) {
end = totalCount;
}
for(int i = start; i < end; i++) {
try {
Document doc = searcher.doc(h[i].doc);
ids.add(Long.parseLong(doc.get("id")));
//log.debug("这是第" + (i + 1) + "个检索到的结果,id为:" + doc.get("id")+", " + doc.get("title"));
} catch(Exception e) {
e.printStackTrace();
log.error("start=" +start + ", end=" + end + ", " + h.length);
}
}
}
return DataToJson.parseDataToJson(ids, totalCount);
}
}
八,service包下的BlogSearchService,这是jersey的入口,由这个类向外界提供api:
package com.blog.search.service;
import javax.ws.rs.FormParam;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
import javax.ws.rs.QueryParam;
import javax.ws.rs.core.MediaType;
import com.blog.search.BlogSearch;
import com.blog.search.entity.Dashboard;
import com.blog.search.index.BlogIndex;
import com.google.gson.JsonObject;
@Path("/blogSearch/")
public class BlogSearchService {
@GET
@Path("/queryByKeyword")
@Produces(MediaType.APPLICATION_JSON)
public String queryIdsByKeyword(@QueryParam("keyword") String keyword, @QueryParam("pageSize") Integer pageSize) {
return new BlogSearch().getResult(keyword, pageSize);
}
@POST
@Path("/buildByContent")
@Produces(MediaType.APPLICATION_JSON)
public String buildIndexByContent(@FormParam("content") String content,@FormParam("title")String title, @FormParam("id") Long id) {
BlogIndex bi = new BlogIndex();
Dashboard dashboard = new Dashboard();
dashboard.setContent(content);
dashboard.setTitle(title);
dashboard.setId(id);
JsonObject json = new JsonObject();
try {
bi.writeToIndex(dashboard);
json.addProperty("result", "200");
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
json.addProperty("result", "500");
} finally {
//index();
return json.toString();
}
}
@POST
@Path("/deleteById")
@Produces(MediaType.APPLICATION_JSON)
public String deleteIndexById(@FormParam("id") Long id) {
BlogIndex bi = new BlogIndex();
JsonObject json = new JsonObject();
try {
bi.deleteIndex(id);
json.addProperty("result", 200);
} catch(Exception e) {
json.addProperty("result", 500);
} finally {
return json.toString();
}
}
@POST
@Path("/update")
@Produces(MediaType.APPLICATION_JSON)
public String updateIndex(@FormParam("id") Long id, @FormParam("content") String content, @FormParam("title") String title) {
BlogIndex bi = new BlogIndex();
JsonObject json = new JsonObject();
try {
Dashboard dashboard = new Dashboard();
dashboard.setContent(content);
dashboard.setTitle(title);
dashboard.setId(id);
bi.updateIndex(dashboard);
json.addProperty("result", 200);
} catch(Exception e) {
json.addProperty("result", 500);
} finally {
return json.toString();
}
}
}
九,web.xml的配置:
index.jsp JerseyServlet com.sun.jersey.spi.container.servlet.ServletContainer com.sun.jersey.config.property.packages com.blog.search.service 1 JerseyServlet /search/* com.blog.listener.SystemStartupListener
十,程序依赖包:
self4j-nop-1.7.5.jar
好了,完成之后,tomcat的配置好之后,如果你是用myeclipse的自带tomcat发布的,则访问http://localhost:port/项目名称/search/blogSearch/buildByContent?后面就是参数传递,查询也跟这个url类似
就这样,我们创建了一个简单的restful风格的简单搜索引擎,里面的配置大家按照自己的需求改改就好