一、下载lucene4.7的jar包:
lucene-analyzers-common-4.7.0.jar
lucene-analyzers-smartcn-4.7.0.jar
lucene-core-4.7.0.jar
lucene-facet-4.7.0.jar
lucene-highlighter-4.7.0.jar
lucene-queries-4.7.0.jar
lucene-queryparser-4.7.0.jar
把以上jar包导入项目中
二、创建索引
*************************************************1.创建索引配置****************************************
因为创建索引是针对表的,所以定义配置文件,配置需要创建索引的SQL
index.xml:
<?xml version='1.0' encoding='UTF-8'?>
<indexs>
<index>
<name>riskRule</name>
<all>
<![CDATA[
select ID,NAME,BODY,DOCUMENT_TYPE from ARMS.T_RISK_RULES
where remove_flag = 0
ORDER BY ID
]]>
</all>
<add>
<![CDATA[
SELECT ID,NAME,BODY,DOCUMENT_TYPE from ARMS.T_RISK_RULES
WHERE remove_flag = 0
and ID > {?#ID#}
AND UPDATE_TIMESTAMP > {?#UPDATE_TIME#}
]]>
</add>
<update>
<![CDATA[
SELECT ID,NAME,BODY,DOCUMENT_TYPE from ARMS.T_RISK_RULES
WHERE remove_flag = 0
and ID < {?#ID#}
AND UPDATE_TIMESTAMP > {?#UPDATE_TIME#}
]]>
</update>
<delete>
<![CDATA[
SELECT ID,NAME,BODY,DOCUMENT_TYPE from ARMS.T_RISK_RULES
WHERE remove_flag = 1
AND ID < {?#ID#}
AND UPDATE_TIMESTAMP > {?#UPDATE_TIME#}
]]>
</delete>
<blob>BODY:DOCUMENT_TYPE</blob>
</index>
<index>
<name>riskProblem</name>
<all>
<![CDATA[
SELECT ID,TITLE,CONTENTS,PUNISH,CRITERION_CONTENT FROM ARMS.T_RISK
WHERE REMOVE_FLAG = 0
ORDER BY ID
]]>
</all>
<add>
<![CDATA[
SELECT ID,TITLE,CONTENTS,PUNISH,CRITERION_CONTENT FROM ARMS.T_RISK
WHERE REMOVE_FLAG = 0
AND ID > {?#ID#}
AND UPDATE_TIME > {?#UPDATE_TIME#}
]]>
</add>
<update>
<![CDATA[
SELECT ID,TITLE,CONTENTS,PUNISH,CRITERION_CONTENT FROM ARMS.T_RISK
WHERE REMOVE_FLAG = 0
AND ID < {?#ID#}
AND UPDATE_TIME > {?#UPDATE_TIME#}
]]>
</update>
<delete>
<![CDATA[
SELECT ID,TITLE,CONTENTS,PUNISH,CRITERION_CONTENT FROM ARMS.T_RISK
WHERE REMOVE_FLAG = 1
AND ID < {?#ID#}
AND UPDATE_TIME > {?#UPDATE_TIME#}
]]>
</delete>
<blob></blob>
</index>
</indexs>
读取解析index.xml的工具类
package com.lhzq.ibms.lucene.util;
import com.htsc.abms.lucene.model.Index;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
/**
* Created with IntelliJ IDEA.
* User: 陈圣林
* Date: 14-5-10
* Time: 上午9:28
* 索引配置解析
*/
public class IndexConfigMgr
{
/**
* 自身对象用来做单例
*/
private static IndexConfigMgr indexConfigMgr;
/**
* 用来做线程锁
*/
private static Object obj = new Object();
/**
* 要索引的表的配置
*/
private List<Index> tableConfigs;
/**
* 日志
*/
private static Logger logger= LoggerFactory.getLogger(IndexConfigMgr.class);
/**
* 索引配置文件
*/
private static final String INDEX_DIR = "index/index.xml";
/**
* index节点的名称
*/
private static final String INDEX_NODE_NAME = "index";
/**
* name节点的名称
*/
private static final String NAME_NODE_NAME = "name";
/**
* all节点的名称
*/
private static final String ALL_NODE_NAME = "all";
/**
* add节点的名称
*/
private static final String ADD_NODE_NAME = "add";
/**
* update节点的名称
*/
private static final String UPDATE_NODE_NAME = "update";
/**
* delete节点的名称
*/
private static final String DELETE_NODE_NAME = "delete";
/**
* blob节点的名称
*/
private static final String BLOB_NODE_NAME = "blob";
/**
* 私有的构造方法
*/
private IndexConfigMgr()
{
// 创建配置容器
tableConfigs = new ArrayList<Index>();
}
/**
* 获取实例对象
* @return
*/
public static IndexConfigMgr getInstance()
{
synchronized (obj)
{
if(null == indexConfigMgr)
{
indexConfigMgr = new IndexConfigMgr();
}
}
// 加载配置文件
indexConfigMgr.load();
return indexConfigMgr;
}
/**
* 加载配置文件
*/
private void load()
{
// 拿到索引配置文件的路径
String path = WorkSpaceCenter.getClassPath(INDEX_DIR);
Document doc = null;
try
{
doc = getDocumentByPath(path);
loadIndexes(doc);
} catch (Exception e) {
logger.error("加载index.xml文件失败",e);
}
}
/**
* 根据xml文件路径拿到dom对象
* @param path 文件路径
* @return
* @throws javax.xml.parsers.ParserConfigurationException
* @throws java.io.IOException
* @throws org.xml.sax.SAXException
*/
private Document getDocumentByPath(String path) throws Exception
{
// 获取DOM解析器工厂对象
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
// 获取DOM解析器对象
DocumentBuilder db = dbf.newDocumentBuilder();
File file=new File(path);
// 加载要解析xml文档
Document doc = db.parse(file);
return doc;
}
/**
* 加载索引配置
* @param doc
* @return
* @throws javax.xml.parsers.ParserConfigurationException
* @throws java.io.IOException
* @throws org.xml.sax.SAXException
*/
private void loadIndexes(Document doc)
{
NodeList indexNodes = doc.getElementsByTagName(INDEX_NODE_NAME);
Node node = null;
tableConfigs.clear();
for (int i = 0; i < indexNodes.getLength() ; i++)
{
node = indexNodes.item(i);
if(!node.hasChildNodes())
{
continue;
}
tableConfigs.add(newIndex(node));
}
}
/**
* 封装一个index
* @param parent
* @return
*/
private Index newIndex(Node parent)
{
Node node= null;
Index index = null;
String name = null;
String all = null;
String add = null;
String update = null;
String delete = null;
String blob = null;
NodeList nodes = parent.getChildNodes();
for(int i = 0; i < nodes.getLength(); i++)
{
node = nodes.item(i);
if(!node.hasChildNodes())
{
continue;
}
if(node.getNodeName().equals(NAME_NODE_NAME))
{
name = node.getTextContent().trim();
}
if(node.getNodeName().equals(ALL_NODE_NAME))
{
all = node.getTextContent().trim();
}
if(node.getNodeName().equals(ADD_NODE_NAME))
{
add = node.getTextContent().trim();
}
if(node.getNodeName().equals(UPDATE_NODE_NAME))
{
update = node.getTextContent().trim();
}
if(node.getNodeName().equals(DELETE_NODE_NAME))
{
delete = node.getTextContent().trim();
}
if(node.getNodeName().equals(BLOB_NODE_NAME))
{
blob = node.getTextContent().trim();
}
index = new Index(name,all,add,update,delete,blob);
}
return index;
}
/**
* 返回结果数据
* @return
*/
public List<Index> getTableConfigs()
{
return tableConfigs;
}
}
*************************************************2.定时创建索引****************************************
如果创建索引的数据量较大,创建索引需要花很长的时间,建议创建定时任务创建索引
由于第一次是索引的全部创建,之后就可以更新索引(新增,更新,删除)即可不用每次全部创建,
所以要记录索引的最大ID和上一次更新时间
1>////////////////////////创建索引的定时任务CreateIndexJob.java:
package com.lhzq.ibms.lucene.job;
import com.htsc.abms.lucene.model.Index;
import com.htsc.abms.lucene.service.CreateIndexService;
import com.lhzq.ibms.commons.util.Configuration;
import com.lhzq.ibms.lucene.util.*;
import com.lhzq.leap.core.utils.DateUtils;
import com.lhzq.leap.core.utils.FileUtility;
import com.lhzq.leap.core.utils.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.File;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* 创建索引的定时任务
*/
@Service("createIndexJob")
public class CreateIndexJob
{
/**
* 日志
*/
private static Logger logger = LoggerFactory.getLogger(CreateIndexJob.class);
/**
* 示例用户业务处理
*/
@Autowired
private CreateIndexService indexService;
/**
* 创建索引工具
*/
private BuildIndex buildIndex;
/**
* 记录最大的Id和更新索引的时间
*/
private IndexLog indexLog;
/**
* 全部加载索引
*/
public String loadIndex()
{
StringBuffer message = new StringBuffer();
message.append("["+DateUtils.now()+"]:开始创建索引***********!\r\n");
logger.info("开始创建索引***************************");
long begin=System.currentTimeMillis();
List<Index> indexes = IndexConfigMgr.getInstance().getTableConfigs();
try
{
// 先删除目录
String indexPath = Configuration.getLuceneIndexDir();
FileUtility.deleteDir(indexPath);
message.append("删除index目录成功!\r\n");
logger.info("删除index目录成功**********");
// 创建日志文件
CreateLog.init();
BigDecimal maxId = null;
for(Index index : indexes)
{
message.append("开始创建["+index.getName()+"]模块的索引!\r\n");
logger.info("开始创建["+index.getName()+"]索引======");
// 设置索引参数
buildIndex = new BuildIndex(index.getName());
indexLog = new IndexLog(index.getName());
// 写入索引
buildIndex.setDocType(BuildIndex.DOC_TYPE_CREATE);
maxId = pageAddDoc(buildIndex,index,new HashMap());
// 关闭
buildIndex.close();
// 写如参数
after(indexLog,maxId);
message.append("创建["+index.getName()+"]模块索引完成!\r\n");
logger.info("创建["+index.getName()+"]索引完成=======");
}
message.append("["+DateUtils.now()+"]:创建索引完成***********!\r\n");
long end=System.currentTimeMillis();
message.append("创建索引一共花费:"+(float)(end-begin)/1000+"秒");
logger.info("创建索引完成********************************");
} catch (Exception e) {
message.append("创建索引异常:"+e.getMessage());
logger.error("加载所有的索引失败", e);
}
return message.toString();
}
/**
* 定时更新索引
*/
public String updateIndex()
{
StringBuffer message = new StringBuffer();
long begin=System.currentTimeMillis();
message.append("["+DateUtils.now()+"]:开始更新索引***********!\r\n");
List<Index> indexes = IndexConfigMgr.getInstance().getTableConfigs();
try
{
BigDecimal maxId = null;
BigDecimal addMaxId = null;
HashMap<String,Object> params = null;
for(Index index : indexes)
{
message.append("开始更新["+index.getName()+"]模块的索引!\r\n");
// 读取参数
buildIndex = new BuildIndex(index.getName());
indexLog = new IndexLog(index.getName());
params = before(indexLog);
// 拿出最大ID
maxId =(BigDecimal)params.get("ID");
// 添加索引
buildIndex.setDocType(BuildIndex.DOC_TYPE_ADD);
addMaxId = pageAddDoc(buildIndex,index,params);
// 更新最大ID
if(null != addMaxId){
maxId = addMaxId;
}
// 更新索引
buildIndex.setDocType(BuildIndex.DOC_TYPE_UPDATE);
pageAddDoc(buildIndex,index,params);
// 删除索引
buildIndex.setDocType(BuildIndex.DOC_TYPE_DELETE);
pageAddDoc(buildIndex,index,params);
// 关闭
buildIndex.close();
// 写如参数
after(indexLog,maxId);
message.append("更新["+index.getName()+"]模块索引完!\r\n");
}
message.append("["+DateUtils.now()+"]:更新索引完成***********!\r\n");
long end=System.currentTimeMillis();
message.append("更新索引花费了时间:" + (float)(end-begin)/1000+"秒");
} catch (Exception e) {
message.append("更新索引异常:" + e.getMessage());
logger.error("更新索引失败", e);
}
return message.toString();
}
/**
* 读取索引文件内容
* @param indexLog
* @return
*/
private HashMap<String,Object> before(IndexLog indexLog)
{
HashMap<String,Object> params = new HashMap<String, Object>();
String content = indexLog.readText();
if(!StringUtils.isEmpty(content))
{
String id = content.split(",")[0];
String now = content.split(",")[1];
// 封装参数
params.put("ID", new BigDecimal(id));
params.put("UPDATE_TIME",DateUtils.toDate(now));
logger.info("索引库中最大的ID:"+ id+",上次更新时间:"+now);
}
return params;
}
/**
* 写入新的最大ID和时间
* @param indexLog
* @param maxId
*/
private void after(IndexLog indexLog,BigDecimal maxId)
{
if(null == maxId){
return;
}
String now = DateUtils.toString(new Date());
indexLog.WriteText(maxId + "," +now );
logger.info("写入最大的ID:"+ maxId+",记录更新时间:"+now);
}
/**
* 分页操作添加索引
* @param buildIndex
* @param index
* @param param
* @return
* @throws Exception
*/
private BigDecimal pageAddDoc(BuildIndex buildIndex,Index index,Map param) throws IOException {
DataPage dataPage = new DataPage(this.indexService,index.getBlob(),param);
BigDecimal maxId = null;
int count = 0;
switch (buildIndex.getDocType())
{
case BuildIndex.DOC_TYPE_CREATE:
{
dataPage.setBaseSql(index.getAll());
count = (int)Math.ceil((float)dataPage.getCount()/DataPage.PAGE_SIZE);
dataPage.setTotalPage(count);
for(int i =1;i<=count;i++){
buildIndex.addDoc(dataPage.queryPage(i));
}
maxId = dataPage.getMaxId();
break;
}
case BuildIndex.DOC_TYPE_ADD:
{
dataPage.setBaseSql(index.getAdd());
count = (int)Math.ceil((float)dataPage.getCount()/DataPage.PAGE_SIZE);
dataPage.setTotalPage(count);
for(int i =1;i<=count;i++){
buildIndex.addDoc(dataPage.queryPage(i));
}
maxId = dataPage.getMaxId();
break;
}
case BuildIndex.DOC_TYPE_UPDATE:
{
dataPage.setBaseSql(index.getUpdate());
count = (int)Math.ceil((float)dataPage.getCount()/DataPage.PAGE_SIZE);
dataPage.setTotalPage(count);
for(int i =1;i<=count;i++){
buildIndex.updateDoc(dataPage.queryPage(i));
}
break;
}
case BuildIndex.DOC_TYPE_DELETE:
{
dataPage.setBaseSql(index.getDelete());
count = (int)Math.ceil((float)dataPage.getCount()/DataPage.PAGE_SIZE);
dataPage.setTotalPage(count);
for(int i =1;i<=count;i++){
buildIndex.deleteDoc(dataPage.queryPage(i));
}
break;
}
}
return maxId;
}
public CreateIndexService getIndexService() {
return indexService;
}
public void setIndexService(CreateIndexService indexService) {
this.indexService = indexService;
}
}
2>.//////////////////////////创建索引工具类BuildIndex.java:
package com.lhzq.ibms.lucene.util;
import com.lhzq.ibms.commons.util.Configuration;
import com.lhzq.leap.core.config.CommonConfig;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
/**
* Created with IntelliJ IDEA.
* User: 陈圣林
* Date: 14-5-12
* Time: 下午4:06
* Lucene创建索引工具类
*/
public class BuildIndex
{
/**
* 操作类型
*/
public static final int DOC_TYPE_CREATE = 0;
public static final int DOC_TYPE_ADD = 1;
public static final int DOC_TYPE_UPDATE = 2;
public static final int DOC_TYPE_DELETE = 3;
/**
* 索引写入器
*/
private IndexWriter indexWriter;
/**
* 操作类型
*/
private int docType;
/**
* 构造方法创建索引写入器
*
* @param name
*/
public BuildIndex(String name) throws IOException {
// 创建IndexWriter
String indexPath = Configuration.getLuceneIndexDir();
indexWriter = getIndexWriter(indexPath + "/" + name);
}
// 索引写入器
private IndexWriter getIndexWriter(String indexDir) throws IOException {
// 存储索引在硬盘中
Directory dir = DirCenter.getDir(indexDir);
// Version操作开始变得非常常见
// 中文分词器的引入,好像4.7.0对庖丁等第三方分词器兼容得并不好,可能也是因为apache对原生的做了一些整合的缘故
Analyzer analyzer = AnalyzerCenter.getAnalyzer();
// 同时引入了IndexWriterConfig对象,封装了早期版本的一大堆参数
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
IndexWriter writer = new IndexWriter(dir, config);
return writer;
}
// 创建document对象
private Document createDoc(Map<String, Object> record) throws UnsupportedEncodingException {
Document doc = new Document();
Iterator<String> it = record.keySet().iterator();
String key = null;
String value = null;
while (it.hasNext()) {
key = it.next();
value = String.valueOf(record.get(key));
doc.add(new Field(key, value, TextField.TYPE_STORED));
}
return doc;
}
// 添加索引
public void addDoc(List<Map<String, Object>> data) throws IOException {
for (Map<String, Object> record : data) {
Document doc = createDoc(record);
indexWriter.addDocument(doc);
}
}
// 更新索引
public void updateDoc(List<Map<String, Object>> data) throws IOException {
for (Map<String, Object> record : data) {
Document doc = createDoc(record);
Term term = new Term("ID", "" + record.get("ID"));
indexWriter.updateDocument(term, doc);
}
}
// 删除索引
public void deleteDoc(List<Map<String, Object>> data) throws IOException {
for (Map<String, Object> record : data) {
Term term = new Term("ID", "" + record.get("ID"));
indexWriter.deleteDocuments(term);
}
}
// 关闭
public void close() throws IOException {
if (null != this.indexWriter) {
this.indexWriter.close();
this.indexWriter = null;
}
}
public int getDocType() {
return docType;
}
public void setDocType(int docType) {
this.docType = docType;
}
}
3>.////////////////////////////单例拿到解析器
package com.lhzq.ibms.lucene.util;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Version;
/**
* Created with IntelliJ IDEA.
* User: 陈圣林
* Date: 14-5-15
* Time: 上午10:21
* 单例模式 获取解析器
*/
public class AnalyzerCenter
{
private static Analyzer analyzer;
private AnalyzerCenter(){}
public static Analyzer getAnalyzer()
{
if(null == analyzer)
{
analyzer = new StandardAnalyzer(Version.LUCENE_47);
}
return analyzer;
}
}
4>.///////////////////////////////打开一个索引目录工具类
package com.lhzq.ibms.lucene.util;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import java.io.File;
import java.io.IOException;
/**
* Created with IntelliJ IDEA.
* User: 陈圣林
* Date: 14-5-15
* Time: 上午10:31
* 打开一个目录
*/
public class DirCenter
{
private DirCenter(){}
public static Directory getDir(String path) throws IOException
{
// 检查参数
if(null == path)
{
return null;
}
File indexDir = new File(path);
// 如果文件不存在,则创建目录
if(!indexDir.exists())
{
indexDir.mkdir();
}
// 存储索引在硬盘中
Directory dir = FSDirectory.open(indexDir);
return dir;
}
}
5>./////////////////////////////一次创建索引太多,会导致内存溢出,需要分页创建
package com.lhzq.ibms.lucene.util;
import com.htsc.abms.lucene.service.CreateIndexService;
import java.math.BigDecimal;
import java.util.List;
import java.util.Map;
/**
* Created with IntelliJ IDEA.
* User: 陈圣林
* Date: 14-6-3
* Time: 下午4:09
* 包装分页查询数据
*/
public class DataPage
{
/**
* 每页条数
*/
public static final int PAGE_SIZE = 20;
/**
* 业务操作
*/
private CreateIndexService indexService;
/**
* 原始sql
*/
private String baseSql;
/**
* blob字段
*/
private String blob;
/**
* 参数
*/
private Map param;
/**
* 最大的id
*/
private BigDecimal maxId;
/**
* 总页数
*/
private Integer totalPage;
/**
* 构造方法设置查询条件
* @param indexService
* @param blob
* @param param
*/
public DataPage(CreateIndexService indexService,String blob,Map param)
{
this.indexService = indexService;
this.blob = blob;
this.param = param;
}
/**
* 查询一页数据
* @param pageNo
* @return
*/
public List<Map<String,Object>> queryPage(int pageNo)
{
String sql = "SELECT * FROM (SELECT A.*,ROWNUM RN FROM ("+this.baseSql+") A WHERE ROWNUM <= "+pageNo * PAGE_SIZE+")"
+" WHERE RN >= "+((pageNo-1) * PAGE_SIZE + 1);
List<Map<String,Object>> data = indexService.queryPageData(sql,this.blob,this.param);
if(pageNo == totalPage)
{
this.maxId = (BigDecimal)data.get(data.size() -1).get("ID");
}
return data;
}
/**
* 查询总数量
* @return
*/
public Integer getCount()
{
String sql = "SELECT COUNT(*) CNT FROM ("+this.baseSql+") A";
return indexService.getCount(sql,this.param);
}
public BigDecimal getMaxId() {
return maxId;
}
public void setBaseSql(String baseSql) {
this.baseSql = baseSql;
}
public void setTotalPage(Integer totalPage) {
this.totalPage = totalPage;
}
}
6>.///////////////////////////////记录上一次更新索引的时间和最大ID,方便更新索引
package com.lhzq.ibms.lucene.util;
import com.lhzq.ibms.commons.util.Configuration;
import com.lhzq.leap.core.config.CommonConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
/**
* Created with IntelliJ IDEA.
* User: 陈圣林
* Date: 14-5-13
* Time: 下午1:25
* 更新索引时,修改保存最大Id和更新时间
*/
public class IndexLog
{
/**
* 日志
*/
private static Logger logger = LoggerFactory.getLogger(IndexLog.class);
/**
* 保存最大的Id和更新索引的时间的文件
*/
private File logFile;
/**
* 设置日志文件
* @param dir
*/
public IndexLog(String dir) throws IOException
{
String indexPath = Configuration.getLuceneIndexDir()+"/" + dir;;
File fileDir = new File(indexPath);
if(!fileDir.exists()){
fileDir.mkdir();
}
File file = new File(fileDir,dir + ".txt");
if(!file.exists()){
file.createNewFile();
}
logFile = file;
}
/**
* 读取上一次更新索引的时间和最大ID
* @return
* @throws java.io.IOException
*/
public String readText()
{
BufferedReader br = null;
String content = null;
try
{
br = new BufferedReader(new FileReader(logFile));
content = br.readLine();
}
catch (IOException e)
{
logger.error("读取最大ID和上次更新索引时间失败", e);
}
finally
{
try {
if (br != null) {
br.close();
br = null;
}
} catch (IOException e) {
logger.error("读取最大ID和上次更新索引时,关闭IO失败", e);
}
}
return content;
}
/**
* 写入创建或者更新索引日志
* @param text
* @throws java.io.IOException
*/
public void WriteText(String text)
{
BufferedWriter bw = null;
try {
bw = new BufferedWriter(new FileWriter(logFile));
bw.write(text);
} catch (IOException e) {
logger.error("写入最大ID和上次更新索引失败", e);
}
finally
{
try {
if(bw!=null){
bw.close();
bw = null;
}
} catch (IOException e) {
logger.error("写入最大ID和上次更新索引时,关闭IO失败", e);
}
}
}
}
***********************************************************3.手动创建索引******************************************
1>.////////////////////////页面:
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
<%@ page contentType="text/html;charset=UTF-8" language="java" %>
<%@include file="/modules/comm/loadingData.jsp"%>
<%@include file="/common/path_header.jsp" %>
<%@include file="/common/jqgrid_header.jsp" %>
<!--dwr-->
<script type="text/javascript" src="<%=path%>/dwr/engine.js"></script>
<script type="text/javascript" src="<%=path%>/dwr/util.js"></script>
<script type="text/javascript" src="<%=path%>/dwr/interface/dwrIndexManage.js"></script>
<html>
<head>
<title>索引维护</title>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
<style type="text/css">
.indexInfo{
width: 800px;
height: 200px;
border: 2px solid #E5E5E5;
}
</style>
</head>
<body>
<div style="text-align: center">
<ul style="text-align:left;list-style-type:none;">
<li>
<a class="button glow button-rounded button-flat-primary button-tiny" onclick="createIndex();"> 创建索引 </a>
</li>
<li>
<div class="indexInfo" id="createInfo">创建索引日志...</div>
</li>
<li style="margin-top: 10px;">
<a class="button glow button-rounded button-flat-primary button-tiny" onclick="updateIndex();"> 更新索引 </a>
</li>
<li>
<div class="indexInfo" id="updateInfo">更新索引日志...</div>
</li>
</ul>
</div>
</body>
</html>
<script type="text/javascript">
var interval = null;
var time = null;
// 创建索引
function createIndex(){
// 显示加载
createDiv();
// 创建索引
dwrIndexManage.createIndex();
// 延迟执行
time = setTimeout(function(){
interval = setInterval(showCtResult, "1000");
},60000);
}
// 显示创建结果
function showCtResult(){
dwrIndexManage.queryCtResult(function(data){
if(null!=data&&data!=''){
clearTimeout(time);
clearInterval(interval)
setValue("createInfo",data);
// 加载完成移出
removeDiv();
}
})
}
// 更新索引
function updateIndex(){
// 显示加载
createDiv();
dwrIndexManage.updateIndex({
//回调函数
callback: function(data){
setValue("updateInfo",data);
// 加载完成移出
removeDiv();
},
//超时,单位是毫秒,默认为20分钟,设置为0代表关闭超时
timeout: 0,
//超时后调用的处理函数
errorHandler:function(message) { alert(message); }
});
}
// 设置值
function setValue(id,data){
document.getElementById(id).innerHTML="";
document.getElementById(id).innerHTML="<pre>"+data+"</pre>";
}
</script>
2>.////////////////////////////////DWR操作
package com.lhzq.ibms.lucene.dwr;
import com.lhzq.ibms.lucene.job.CreateIndexJob;
import com.lhzq.ibms.lucene.util.CreateLog;
import org.springframework.beans.factory.annotation.Autowired;
/**
* Created with IntelliJ IDEA.
* User: 陈圣林
* Date: 14-6-18
* Time: 下午5:24
* 手动索引的创建和更新
*/
public class DwrIndexManage
{
@Autowired
private CreateIndexJob indexJob;
/**
* 创建索引
* @return
*/
public void createIndex()
{
String logInfo = indexJob.loadIndex();
CreateLog.write(logInfo);
}
/**
* 查询创建索引结果
* @return
*/
public String queryCtResult()
{
return CreateLog.read().trim();
}
/**
* 更新索引
* @return
*/
public String updateIndex()
{
return indexJob.updateIndex();
}
}
3>.////////////////////////////页面返回创建操作日志:
package com.lhzq.ibms.lucene.util;
import com.lhzq.ibms.commons.util.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
/**
* Created with IntelliJ IDEA.
* User: 陈圣林
* Date: 14-5-13
* Time: 下午1:25
* 记录创建和更新索引的操作日志
*/
public class CreateLog
{
/**
* 日志
*/
private static Logger logger = LoggerFactory.getLogger(CreateLog.class);
/**
* 创建日志文件
*/
private static String path = Configuration.getLuceneIndexDir()+"/createLog.txt";
/**
* 创建文件
*/
public static void init(){
try {
File indexDir = new File(Configuration.getLuceneIndexDir());
// 如果文件不存在,则创建目录
if(!indexDir.exists())
{
indexDir.mkdir();
}
// 创建文件
File createLogFile = new File(path);
if(!createLogFile.exists()){
createLogFile.createNewFile();
}
} catch (IOException e) {
logger.error("创建日志文件失败",e);
}
}
/**
* 读取日志文件
* @return
*/
public static String read()
{
BufferedReader br = null;
StringBuffer log =new StringBuffer();
try
{
br = new BufferedReader(new FileReader(path));
String line = null;
while((line = br.readLine())!=null)
{
log.append(line).append("\r\n");
}
}
catch (IOException e)
{
logger.error("创建索引读取日志异常", e);
}
finally
{
try {
if (br != null) {
br.close();
}
} catch (IOException e) {
logger.error("创建索引读取日志,关闭IO失败", e);
}
}
return log.toString();
}
/**
* 写入创建或者更新索引日志
*/
public static void write(String log)
{
BufferedWriter bw = null;
try {
bw = new BufferedWriter(new FileWriter(path));
bw.write(log);
} catch (IOException e) {
logger.error("创建索引写入日志错误", e);
}
finally
{
try {
if(bw!=null){
bw.close();
}
} catch (IOException e) {
logger.error("创建索引写入日志,关闭IO失败", e);
}
}
}
}
三、查询索引
1>.///////////////////////////////////页面
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
<%@ page contentType="text/html;charset=UTF-8" language="java" %>
<%@ taglib prefix="struts" uri="/struts-tags" %>
<!--引入path java中的path和js中的path-->
<%@include file="/common/path_header.jsp" %>
<%@include file="/common/jqgrid_header.jsp" %>
<html>
<head>
<title>全文检索</title>
<script type="text/javascript" src="${path}/script/common/rims.js"></script>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
</head>
<body style="text-align: center">
<!--查询条件-->
<form id="riskReferencePoint" method="post">
<div style="text-align: center">
<div style="overflow:auto;zoom:1;padding:10px 0px 5px 0px;">
<ul style="text-align:left;list-style-type:none;">
<li style="float:left;">
检索信息:
<input type="text" id="KEYWORD" name="keyword" onkeydown="if(event.keyCode==13){ enterSearch();}" size="80" />
</li>
<li style="float:left;">
<a href="javascript:search();" class="button glow button-rounded button-flat-primary button-tiny" id="save"> 检索 </a>
</li>
</ul>
</div>
</div>
</form>
<!--检索规章制度结果集-->
<table id="riskRolesGrid">
</table>
<br>
<!--检索风险问题结果集-->
<table id="riskProblemGrid">
</table>
</body>
<script type="text/javascript">
//
no_data();
// 首次加载的时候,不到后台查询数据
function no_data()
{
var keyword = $("#KEYWORD").val();
if(keyword ==undefined||keyword ==null||keyword=='')
{
return;
}
}
// 检索规章制度
new AbmsGrid('riskRolesGrid',{
colNames:['id','标题','内容','文档下载'],
colModel:[
{
name:'ID'
,key:true
,width:55
,hidden:true
}
,{
name:'NAME'
,width:100
}
,{
name:'BODY'
,width:400
,formatter:function(value){
return "<pre>"+trimToSummary(value)+"</pre>";
}
}
,{
width:60
,align:'center'
,formatter:function( value,options,rowData ){
//自定义渲染函数
if(rowData.BODY==undefined||rowData.BODY==null||rowData.BODY==''){
return '--';
}
return '<a href="javascript:uploadRiskRules('+rowData.ID+');" style="color:#fff" class="button glow button-rounded button-flat-primary button-tiny">文档下载</a>';
}
}
],
postParamNames:['KEYWORD'],
_gridDatasourceClass:'com.htsc.abms.auditrisk.web.RiskRuleDatasource',
showPagerTool:true,
loadDataFlag:false,
caption:"风险规章制度"
});
// 检索风险问题
new AbmsGrid('riskProblemGrid',{
colNames:['id','风险问题', '审计意见','处罚意见','详细信息'],// ,'处罚内容'
colModel:[
{
name:'ID'
,key:true
,width:55
,hidden:true
},
{
name:'TITLE'
,width:220
},
{
name:'CONTENTS'
,width:250
},
{
name:'PUNISH'
,width:200
}
// ,{
// name:'CRITERION_CONTENT'
// ,width:200
// }
,{
width:60
,align:'center'
,formatter:function( value,options,rowData ){
//自定义渲染函数
return '<a href="javascript:findRiskProblem('+rowData.ID+');" style="color:#fff" class="button glow button-rounded button-flat-primary button-tiny">详细信息</a>';
}
}
],
postParamNames:['KEYWORD'],
_gridDatasourceClass:'com.htsc.abms.auditrisk.web.RiskProblemDatasource',
showPagerTool:true,
loadDataFlag:false,
caption:"风险问题"
});
// 检索
function search(){
$("#riskRolesGrid").trigger("reloadGrid");
$("#riskProblemGrid").trigger("reloadGrid");
}
// 显示制度详情
function uploadRiskRules(id)
{
var inputs='<input type="hidden" name="id" value="'+id+'"/>';
jQuery('<form action="/htsc.abms/riskRules/uploadRiskRules.do" method="post">'+inputs+'</form>').appendTo('body').submit().remove();
}
// 显示详细信息
function findRiskProblem(id) {
var _url="${path}/risk/viewRiskById.do?riskId="+id;
rims.window.showWindow(_url,900,900,null);
}
// 点击回车键查询
function enterSearch(){
$("#KEYWORD").blur();
search();
}
// 截取字符串
function trimToSummary(str){
var endLength = 30;
if(null==str||str==''){
return str;
}
if(str.length > endLength){
return str.substring(0,endLength) +'...';
} else{
return str;
}
}
</script>
</html>
2>./////////////////////////////////////后台数据读取
package com.htsc.abms.auditrisk.web;
import com.htsc.abms.jqgrid.model.GridData;
import com.htsc.abms.jqgrid.model.GridPostParam;
import com.htsc.abms.jqgrid.util.GridDatasourceInterface;
import com.lhzq.ibms.lucene.util.Searcher;
import com.lhzq.leap.core.utils.AppUtils;
import com.lhzq.leap.core.utils.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.Map;
/**
* User: 陈圣林
* Date: 14-5-27
* Time: 下午2:19
* 风险问题查选检索
*/
@Component
public class RiskProblemDatasource implements GridDatasourceInterface {
/**
* 日志
*/
private static Logger logger = LoggerFactory.getLogger(RiskProblemDatasource.class);
/**
* 索引的字段
*/
private static final String[] INDEX_FIELDS = {"ID", "TITLE", "CONTENTS", "PUNISH", "CRITERION_CONTENT"};
/**
* 根据参数查询检索信息
*
* @param gridPostParam
* @return jqgrid数据对象
*/
public GridData getGridData(GridPostParam gridPostParam) {
// 拿到关键字参数
String keyword = (String) gridPostParam.getParamMap().get("KEYWORD");
if (StringUtils.isEmpty(keyword)) {
return new GridData();
}
// 那到当前页
Integer currentPage = gridPostParam.getPage();
// 每页显示的行数
Integer pageSize = gridPostParam.getPageSize();
// 全文检索查询器
Searcher searcher = null;
List<Map<String, String>> data = null;
// 处理关键字
String [] keywords = AppUtils.keywords(keyword);
try {
searcher = new Searcher("riskProblem");
data = searcher.search(keywords, INDEX_FIELDS);
} catch (Exception e) {
logger.error("全文检索异常", e);
}
if (AppUtils.isBlank(data)) {
return new GridData();
}
// 返回当前对象
GridData gridData = new GridData(pageSize, currentPage, data);
return gridData;
}
}
单个解析词,是根据单个字查询的,为了按词组查询,需要做处理
package com.lhzq.leap.core.utils;
import java.io.Serializable;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.GregorianCalendar;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import javax.servlet.http.Cookie;
import javax.servlet.http.HttpServletRequest;
import org.apache.commons.beanutils.BeanUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 应用帮助工具
*/
public class AppUtils{
/**
* 加特殊字符做整体分词
*
* @param keyword
* @return
*/
public static String[] keywords(String keyword) {
String[] keywords = keyword.trim().split("\\s+");
for (int i = 0; i < keywords.length; i++) {
keywords[i] = "\"" + keywords[i] + "\"";
}
return keywords;
}
}
3>.////////////////////////封装的查询器
package com.lhzq.ibms.lucene.util;
import com.lhzq.ibms.commons.util.Configuration;
import com.lhzq.leap.core.config.CommonConfig;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Created with IntelliJ IDEA.
* User: 陈圣林
* Date: 14-5-12
* Time: 下午5:40
* 全文索引收索工具类
*/
public class Searcher
{
/**
* 最大获取的匹配文档数,比如100个总文档,
* 你的query表达式匹配了50个,但是你传的maxCount为5,那就是选最优的前5个
*/
private static final int MAX_COUNT = 1000;
/**
* 查询器
*/
private IndexSearcher indexSearcher = null;
/**
* 创建索引查询器
* @param name 索引目录
* @throws java.io.IOException
*/
public Searcher(String name) throws IOException
{
// 创建索引的位置
String indexPath = Configuration.getLuceneIndexDir() + "/" + name;
// 打开索引目录
Directory indexDir = DirCenter.getDir(indexPath);
// 读取器
IndexReader reader = DirectoryReader.open(indexDir);
// 创建索引
indexSearcher = new IndexSearcher(reader);
}
/**
* 根据关键字搜索
* @param keywords 关键字
* @return
* @throws Exception
*/
public List<Map<String,String>> search(String keywords,String []indexFields) throws Exception
{
// 解析器
Analyzer analyzer = AnalyzerCenter.getAnalyzer();
MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_47,indexFields,analyzer);
// 查询对象
Query query = parser.parse(keywords);
return search(query);
}
/**
* 根据多个关键字搜索
* @param keywords 关键字
* @return
* @throws Exception
*/
public List<Map<String,String>> search(String [] keywords,String []indexFields) throws Exception
{
// 解析器
Analyzer analyzer = AnalyzerCenter.getAnalyzer();
MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_47,indexFields,analyzer);
// 多关键子查询
BooleanQuery bq = new BooleanQuery();
// 查询对象
Query query = null;
for(String keyword : keywords)
{
query=parser.parse(keyword);
// 是表示And关系
bq.add(query, BooleanClause.Occur.MUST);
}
return search(bq);
}
/**
* 根据Query查询结果集
* @param query
* @return
* @throws Exception
*/
private List<Map<String,String>> search(Query query)throws Exception
{
// 查询匹配的前50个
ScoreDoc[] hits = indexSearcher.search(query, null, MAX_COUNT).scoreDocs;
// 封装检索的数据
List<Map<String,String>> data = new ArrayList<Map<String,String>>();
Map<String,String> record = null;
Document hitDoc = null;
for (int i = 0; i < hits.length; i++) {
hitDoc = indexSearcher.doc(hits[i].doc);
record = getDocsItem(hitDoc);
data.add(record);
}
return data;
}
/**
* 转换Doc对象为map数据结构
* @param hitDoc 检索的doc对象
* @return
* @throws java.io.IOException
*/
private Map<String,String> getDocsItem(Document hitDoc) throws IOException
{
// 文档的字段
List<IndexableField> indexes = hitDoc.getFields();
// 封装数据
String name = null;
String value = null;
Map<String,String> record = new HashMap<String, String>();
for(IndexableField index : indexes)
{
name = index.name();
value = index.stringValue();
record.put(name,value);
}
return record;
}
}