一个搜索功能:要求将所有包括搜索字段的文章的标题列出来(文章的内容存储在Oracle的CLOB字段中),也就是要用Lucene实现对数据库的大字段进行索引和搜索。创建lucene通过java定时任务来完成。
定时调用建立索引方法
package com.qqw.index;
import java.util.Timer;
public class IndexerServer {
/**
* 定时调用建立索引任务
* @author matieli
* @create 2011-10-23
*/
public static void main(String[] args) {
String propFile = "directory.properties";
Config.setConfigFileName(propFile);
Timer timer = new Timer();
LuceneDBIndexerTask luceneTask=LuceneDBIndexerTask.getInstance();
timer.scheduleAtFixedRate(luceneTask, 0,DataTypeUtil.toLong(Constant.CREATE_INDEX_SLEEP_TIME));
}
}
建立索引的核心实现
package com.qqw.index;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
import java.util.TimerTask;
import oracle.sql.CLOB;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
/**
* 建立索引的任务类
* @author matieli
* @create 2011-10-23
*/
public class LuceneDBIndexerTask extends TimerTask {
//缺省索引目录
private static String DEFAULT_INDEX_DIR="C:\\IndexDB";
//临时索引目录的父目录
private File parentDir=null;
//被搜索的索引文件
private static LuceneDBIndexerTask index=new LuceneDBIndexerTask();
//构造方法
private LuceneDBIndexerTask(){
String dirStr=Constant.INDEX_STORE_DIRECTORY;
if(dirStr!=null&&!"".equals(dirStr)){
this.parentDir=new File(dirStr);
}else{
this.parentDir=new File(DEFAULT_INDEX_DIR);
}
if(!this.parentDir.exists()){
this.parentDir.mkdir();
}
}
/**
* 单实例访问接口
* @return
*/
public static LuceneDBIndexerTask getInstance(){
return index;
}
/**
* 锁定目录以及文件
* 只允许单线程访问
*
*/
/*public synchronized void singleRunning(){
if(flag==false){
flag=true;
run(parentDir);
}
}*/
/**
* 为数据库字段建立索引
*/
public void run() {
System.out.println("====LuceneDBIndexerTask$run()===============");
System.out.println("~~~开始建立索引文件~~~~~~~~~~~~~~~");
Connection conn=null;
Statement stmt=null;
ResultSet rs=null;
String filedir="d:\\fileIndex\\blogs";
File indexDir = new File(filedir);
Analyzer analyzer = new IKAnalyzer();
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_33,
analyzer);
conf.setOpenMode(OpenMode.CREATE);
try {
Class.forName(Constant.DB_DRIVER_STRING).newInstance();
conn = DriverManager.getConnection(Constant.DB_URI_STRING, Constant.DB_USERNAME, Constant.DB_PWD);
stmt = conn.createStatement();
rs = stmt.executeQuery(Constant.DB_QUERY_STRING);
File file=new File(parentDir+File.separator+new SimpleDateFormat("yyyyMMddHHmmss").format(new Date())+File.separator);
if(!file.exists()){
file.mkdir();
}
IndexWriter writer = new IndexWriter(FSDirectory.open(indexDir),
conf);
long startTime = new Date().getTime();
while (rs.next()) {
Document doc = new Document();
doc.add(new Field("ARTICLEID", rs.getString("ARTICLEID"), Field.Store.YES,Field.Index.ANALYZED));
doc.add(new Field("TITLE", rs.getString("TITLE"), Field.Store.YES,Field.Index.ANALYZED));
doc.add(new Field("USERNAME", rs.getString("USERNAME"), Field.Store.YES,Field.Index.ANALYZED));
doc.add(new Field("USERID", rs.getString("USERID"), Field.Store.YES,Field.Index.ANALYZED));
//对日期建立索引
String createdate=new SimpleDateFormat("yyyy-MM-dd").format(rs.getTimestamp("CREATEDATE"));
doc.add(new Field("CREATEDATE", createdate, Field.Store.YES,Field.Index.ANALYZED));
//对大字段建立索引
BufferedReader in=null;
String content="";
CLOB clob = (CLOB) rs.getClob("CONTENT");
if (clob != null) {
//得到一个读入流
in=new BufferedReader(clob.getCharacterStream());
StringWriter out=new StringWriter();
int c;
while((c=in.read())!=-1){
out.write(c);
}
content=out.toString();
}
doc.add(new Field("CONTENT", content, Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);
}
writer.optimize();
writer.close();
//测试一下索引的时间
long endTime = new Date().getTime();
System.out.println("索引文件"+file.getPath()+"建立成功...");
System.out.println("这花费了" + (endTime - startTime) + " 毫秒来把文档增加到索引里面去!");
//判断文件目录file下的文件个数如果大于3,就将文件建立最早的文件给删除掉
checkFiles(parentDir);
} catch (IOException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (InstantiationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IllegalAccessException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
if(rs!=null){
rs.close();
}
if(stmt!=null){
stmt.close();
}
if(conn!=null){
conn.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
}
}
/**
* 判断文件目录file下的文件个数如果大于3,就将文件建立最早的文件给删除掉
*/
public void checkFiles(File dir) {
int length=dir.listFiles().length;
while(length>3){
//删除生成最早的文件
File [] files=dir.listFiles();
String[] names=dir.list();
Arrays.sort(names);
File deletefile=files[0];
deleteDirectory(deletefile);
length--;
}
}
/*
* 递归删除一个目录以及下面的文件
*/
public boolean deleteDirectory(File path) {
if( path.exists() ) {
File[] files = path.listFiles();
for(int i=0; i<files.length; i++) {
if(files[i].isDirectory()) {
deleteDirectory(files[i]);
}
else {
//删除文件
files[i].delete();
}
}
}
//删除目录
boolean hasdelete=path.delete();
if(hasdelete){
System.out.println("删除索引目录"+path);
}
return hasdelete;
}
public static void main(String[] args) {
new LuceneDBIndexerTask().run();
}
}
配置文件管理类:
package com.qqw.index;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
/**
*
* @author matieli
* @create 2011-10-23
*
*/
public class Config {
private static Config cfg = null;
private static String configFileName = null;
private Properties props;
public Config() {
props = new java.util.Properties();
}
/**
* 单例访问接口
* @return
*/
public synchronized static Config getInstance() {
if (cfg == null) {
cfg = new Config();
cfg.loadConfig();
return cfg;
} else {
return cfg;
}
}
private int loadConfig() {
if (configFileName != null || configFileName.length() > 0) {
InputStream inputStream = Config.class.getClassLoader()
.getResourceAsStream("directory.properties");
System.out.println("configFileName=" + configFileName);
try {
props.load(inputStream);
} catch (IOException e) {
e.printStackTrace();
}
return 1;
}
return 0;
}
public static void setConfigFileName(String cfg) {
configFileName = cfg;
}
public String getProperty(String keyName) {
return props.getProperty(keyName);
}
}
常量配置
package com.qqw.index;
/**
* 常量配置类 *
* @author matieli
* @create 2011-10-23
*/
public class Constant {
// 隔多长时间建立一次索引
public static final String CREATE_INDEX_SLEEP_TIME = Config.getInstance()
.getProperty("create_index_sleep_time");
// 索引文件存放路径
public static final String INDEX_STORE_DIRECTORY = Config.getInstance()
.getProperty("index_store_directory");
//数据库驱动程序
public static final String DB_DRIVER_STRING = Config.getInstance()
.getProperty("db_driver_string");
//数据库连接URI
public static final String DB_URI_STRING = Config.getInstance()
.getProperty("db_uri_string");
//数据库连接username
public static final String DB_USERNAME= Config.getInstance()
.getProperty("db_username");
//数据库连接pwd
public static final String DB_PWD= Config.getInstance()
.getProperty("db_pwd");
//数据库查询语句db_query_str
public static final String DB_QUERY_STRING= Config.getInstance()
.getProperty("db_query_string");
}
数据类型处理类:
package com.qqw.index;
/**
* 数据类型转换工具类
* @author matieli
* @create 2011-10-23
*/
public class DataTypeUtil {
/**
* 将对象转换为整数型
* @param o 源对象
* @return 对应的Long值,如果出错,则返回Long.MIN_VALUE
*/
public static long toLong(Object o) {
if (o == null) {
throw new IllegalArgumentException("该对象为空");
}
String s = o.toString();
try {
return Long.parseLong(s);
} catch (Exception ex) {
return Long.MAX_VALUE;
}
}
}
配置文件 :
#== the directory for store lucene-index ========#
index_store_directory=D\:/lucene/indexDB/
#======== two hours ========#
#create_index_sleep_time=7200000
#======== two minutes ========#
create_index_sleep_time=7200000
db_driver_string=oracle.jdbc.driver.OracleDriver
db_uri_string=jdbc\:oracle\:thin\:@localhost\:1521\:orcl
db_username=test
db_pwd=test
db_query_string=SELECT * from journalarticle
核心搜索类:
package com.qqw.search;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.wltea.analyzer.lucene.IKQueryParser;
import org.wltea.analyzer.lucene.IKSimilarity;
/**
* 负责搜索的类
*/
public class LuceneDBQuery {
private static LuceneDBQuery search = new LuceneDBQuery();
// 构造方法
private LuceneDBQuery() {
}
/**
* 单实例访问接口
*
* @return
*/
public static LuceneDBQuery getInstance() {
return search;
}
public List<Map<String,Object>> seacherStr(String[] indexFields,String[] searchFields,String queryString,
String searchdictory,String[] highlighterFields) {
List<Map<String,Object>> list = null;
TopDocs topDocs = null;
Query query = null;
IndexSearcher searcher = null;
try {
searcher = new IndexSearcher(FSDirectory.open(new File(
searchdictory)), true);// read-only
BooleanClause.Occur[] flags=new BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.MUST};
query = IKQueryParser.parseMultiField(searchFields, queryString,flags);// 多个
//在索引器中使用IKSimilarity相似度评估器
searcher.setSimilarity(new IKSimilarity());
// 准备高亮器
Formatter formatter = new SimpleHTMLFormatter(
"<span class=\"highlighter\">", "</span>");
Scorer fragmentScorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
Fragmenter fragmenter = new SimpleFragmenter(100);// 高亮范围
highlighter.setTextFragmenter(fragmenter);
if (searcher != null) {
topDocs = searcher.search(query, 100);// 100是显示队列的Size
ScoreDoc[] hits = topDocs.scoreDocs;
System.out.println("共有" + searcher.maxDoc() + "条索引,命中"
+ hits.length + "条");
list = new ArrayList<Map<String,Object>>();
for (int i = 0; i < hits.length; i++) {//长度遍历
ScoreDoc scoreDoc = topDocs.scoreDocs[i];// 读取第几条记录
int docSn = scoreDoc.doc;
// 文档内部编号
Document document = searcher.doc(docSn);
Map<String,Object> map=new HashMap<String, Object>();
// 高亮
for (int k = 0; k < indexFields.length; k++) {//遍历所有的字段
map.put(indexFields[k], document.get(indexFields[k]));
for (int j = 0; j < highlighterFields.length; j++) {//遍历要高亮的字段,要高亮的字段肯定小于等于所有的字段
// 如果当前属性值中没有出现关键字,则返回null
String hctemp = highlighter.getBestFragment(
new IKAnalyzer(), "\""+highlighterFields[j]+"\"", document.get(highlighterFields[j]));
if (hctemp == null) {
hctemp = document.get(highlighterFields[j]);
}
map.put(highlighterFields[j], hctemp);
}
}
list.add(map);
}
}
}
catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
catch (InvalidTokenOffsetsException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return list;
}
// 查询分页
public List<Map<String,Object>> seacherStrbyPage(String[] indexFields,String[] searchFields,String queryString,
String searchdictory, int firstResult, int maxResult,String[] highlighterFields) {
List<Map<String,Object>> list = null;
TopDocs topDocs = null;
Query query = null;
IndexSearcher searcher = null;
try {
searcher = new IndexSearcher(FSDirectory.open(new File(
searchdictory)), true);// read-only
// QueryParser qp = new QueryParser(Version.LUCENE_33, fields,
// new StandardAnalyzer(Version.LUCENE_33));// 有变化的地方 单个字段关联
//
// 使用IKQueryParser查询分析器构造Query对象
// //声明BooleanClause.Occur[]数组,它表示多个条件之间的关系
// BooleanClause.Occur[] flags=new
// BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.MUST};
query = IKQueryParser.parseMultiField(searchFields, queryString);// 多个
// //在索引器中使用IKSimilarity相似度评估器
searcher.setSimilarity(new IKSimilarity());
// query = IKQueryParser.parse(field, queryString);
// QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_33,
// fields,
// new IKAnalyzer());// 有变化的地方 多个地段关联
//
// query = qp.parse(queryString);
// 准备高亮器
Formatter formatter = new SimpleHTMLFormatter(
"<span class=\"highlighter\">", "</span>");
Scorer fragmentScorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
Fragmenter fragmenter = new SimpleFragmenter(100);// 高亮范围
highlighter.setTextFragmenter(fragmenter);
if (searcher != null) {
topDocs = searcher.search(query, 100);// 100是显示队列的Size
ScoreDoc[] hits = topDocs.scoreDocs;
System.out.println("共有" + searcher.maxDoc() + "条索引,命中"
+ hits.length + "条");
list = new ArrayList<Map<String,Object>>();
for (int i = firstResult - 1; i < firstResult + maxResult - 1; i++) {//按照分页的长度遍历
//for (int i = 0; i < hits.length; i++) {//长度遍历
ScoreDoc scoreDoc = topDocs.scoreDocs[i];// 读取第几条记录
int docSn = scoreDoc.doc;
// 文档内部编号
Document document = searcher.doc(docSn);
Map<String,Object> map=new HashMap<String, Object>();
// 高亮
for (int k = 0; k < indexFields.length; k++) {//遍历所有的字段
map.put(indexFields[k], document.get(indexFields[k]));
for (int j = 0; j < highlighterFields.length; j++) {//遍历要高亮的字段,要高亮的字段肯定小于等于所有的字段
// 如果当前属性值中没有出现关键字,则返回null
String hctemp = highlighter.getBestFragment(
new IKAnalyzer(), "\""+highlighterFields[j]+"\"", document.get(highlighterFields[j]));
if (hctemp == null) {
hctemp = document.get(highlighterFields[j]);
}
map.put(highlighterFields[j], hctemp);
}
}
list.add(map);
}
}
}
catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
catch (InvalidTokenOffsetsException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return list;
}
// 取得符合搜索条件的所有记录总数,以便分页 , 与上面方法类似
public int getResultCount(String[] searchFields,String queryString, String searchdictory)
throws Exception {
TopDocs topDocs = null;
Query query = null;
IndexSearcher searcher = null;
try {
searcher = new IndexSearcher(FSDirectory.open(new File(
searchdictory)), true);// read-only
query = IKQueryParser.parseMultiField(searchFields, queryString);// 多个
// //在索引器中使用IKSimilarity相似度评估器
searcher.setSimilarity(new IKSimilarity());
if (searcher != null) {
topDocs = searcher.search(query, 100);// 100是显示队列的Size
}
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// ScoreDoc[] hits = topDocs.scoreDocs;取得还是hits的length
return topDocs.scoreDocs.length;
}
}
配置文件管理类:
package com.qqw.search;
import java.io.IOException;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
public class LuceneDBQueryUtil {
public static String getIndexPath(){
String filePath = "zxt_index.xml";
String indexPath="";
SAXBuilder builder = new SAXBuilder(false);
try {
Document doc = builder.build(Thread.currentThread().getContextClassLoader().getResource(filePath));
Element rootElement = doc.getRootElement();
Element index=rootElement.getChild("index");
indexPath=index.getText();
System.out.println(indexPath);
} catch (JDOMException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return indexPath;
}
}
通过ServletContextListener配置定时任务
package com.qqw.timer;
import java.util.Timer;
import javax.servlet.ServletContextEvent;
import javax.servlet.ServletContextListener;
import com.qqw.index.Constant;
import com.qqw.index.LuceneDBIndexerTask;
public class MyListener implements ServletContextListener {
private Timer timer = null;
public void contextInitialized(ServletContextEvent event) {
timer = new Timer(true);
//设置任务计划,启动和间隔时间
timer.schedule(LuceneDBIndexerTask.getInstance(), 0,Long.valueOf(Constant.CREATE_INDEX_SLEEP_TIME));
}
public void contextDestroyed(ServletContextEvent event) {
timer.cancel();
}
}
web.xml 配置
<?xml version="1.0" encoding="UTF-8"?>
<web-app version="2.4"
xmlns="http://java.sun.com/xml/ns/j2ee"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://java.sun.com/xml/ns/j2ee
http://java.sun.com/xml/ns/j2ee/web-app_2_4.xsd">
<welcome-file-list>
<welcome-file>index.jsp</welcome-file>
</welcome-file-list>
<!-- 配置servlet -->
<servlet>
<servlet-name>SearchServlet</servlet-name>
<servlet-class>dataFromOracle.servlet.SearchServlet</servlet-class>
</servlet>
<servlet-mapping>
<servlet-name>SearchServlet</servlet-name>
<url-pattern>/SearchServlet</url-pattern>
</servlet-mapping>
<listener>
<listener-class>com.qqw.timer.MyListener</listener-class>
</listener>
</web-app>
数据库表文件
-- Create table
create table JOURNALARTICLE
(
ARTICLEID NUMBER(10) not null,
TITLE VARCHAR2(255) not null,
USERNAME VARCHAR2(4000) not null,
USERID VARCHAR2(255) not null,
CREATEDATE TIMESTAMP(6) not null,
CONTENT CLOB
);
-- Create/Recreate primary, unique and foreign key constraints
alter table JOURNALARTICLE
add constraint ARTICLEID primary key (ARTICLEID);
通过以上的代码,可以做到移植到新项目只需要修改配置文件即可。lucene索引建立,不需要考虑什么时候进行。只要保证数据库连接处于正常状态即可,索引字段和搜索字段都可以通知配置的形式表现出来。分页功能和高亮的功能都在其中