有些网站的搜索功能都是直接使用like %关键词%方式对数据库进行关键词查找,不过这种方式速度比较慢,而且影响数据库服务器性能。
其实我们可以先把数据从数据库查询出来,利用lucene建立索引。以后每次查找都从索引中查找,可以提高查询速度和减轻服务器负担。
本篇用到的技术:lucene3.0.2,IKAnalyzer3.2.5
search.properties主要是配置搜索的信息,内容:
sql=select iId,title,content,credate from archeive //指定查找sql,需要建立索引的数据
update.field=iId
update.value=
search.condition=title,content //搜索时的查找字段
index.path=D:/project/Java/lucene/WebContent/WEB-INF/classes/Index //索引的保存地址
period=10000 //更新索引的时间间隔
update.field=iId
update.value=
search.condition=title,content //搜索时的查找字段
index.path=D:/project/Java/lucene/WebContent/WEB-INF/classes/Index //索引的保存地址
period=10000 //更新索引的时间间隔
com.search.util.SearchConfig主要是读取search.properties的信息。
public
class
SearchConfig
{
private Properties searchPro;
private String searchFile = "search.properties";
private String SQL = "sql";
private String CONDITION = "search.condition";
private String INDEX = "index.path";
public SearchConfig(){
initSearch();
}
public void initSearch(){
searchPro = PropertiesUtil.getProperties(searchFile);
}
public String getSql(){
return searchPro.getProperty(SQL, "");
}
public String getCondition(){
return searchPro.getProperty(CONDITION, "");
}
public File getIndexPath(){
String path = searchPro.getProperty(INDEX, "");
File file = new File(path);
if (!file.exists()) {
file.mkdir();
}
return file;
}
public long getPeriod(){
String period = searchPro.getProperty("period", "0");
return Integer.valueOf(period);
}
public String getUpdateField(){
return searchPro.getProperty("update.field", "");
}
public String getUpdateValue(){
return searchPro.getProperty("update.value", "");
}
public void save(){
PropertiesUtil.saveProperties(searchPro, searchFile);
}
}
private Properties searchPro;
private String searchFile = "search.properties";
private String SQL = "sql";
private String CONDITION = "search.condition";
private String INDEX = "index.path";
public SearchConfig(){
initSearch();
}
public void initSearch(){
searchPro = PropertiesUtil.getProperties(searchFile);
}
public String getSql(){
return searchPro.getProperty(SQL, "");
}
public String getCondition(){
return searchPro.getProperty(CONDITION, "");
}
public File getIndexPath(){
String path = searchPro.getProperty(INDEX, "");
File file = new File(path);
if (!file.exists()) {
file.mkdir();
}
return file;
}
public long getPeriod(){
String period = searchPro.getProperty("period", "0");
return Integer.valueOf(period);
}
public String getUpdateField(){
return searchPro.getProperty("update.field", "");
}
public String getUpdateValue(){
return searchPro.getProperty("update.value", "");
}
public void save(){
PropertiesUtil.saveProperties(searchPro, searchFile);
}
}
com.search.util.LuceneUtil代码介绍,主要是生成索引和搜索。
public
class
LuceneUtil
{
private File indexpath = null;
private String sql = null;
private String condition = null;
private String updateField = null;
private String updateValue = null;
private SearchConfig sc = null;
public LuceneUtil() {
sc = new SearchConfig();
indexpath = sc.getIndexPath();
sql = sc.getSql();
condition = sc.getCondition();
updateField = sc.getUpdateField();
updateValue = sc.getUpdateValue();
if(!updateValue.equals("")){
sql = sql + " where " + updateField + " > " + updateValue;
}
}
public void createIndex() {
System.out.println("==========正在生成数据库索引。");
//把数据库中的数据查询出来,
ResultSet rs = SQLHelper.getResultSet(sql);
try {
//打开索引文件
FSDirectory directory = FSDirectory.open(indexpath);
Analyzer analyzer = new IKAnalyzer();
IndexWriter writer = new IndexWriter(FSDirectory.open(indexpath),
analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
while (rs.next()) {
Document doc = new Document();
doc.add(new Field("id", String.valueOf(rs.getInt(1)),
Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("title", rs.getString(2), Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("content", rs.getString(3), Field.Store.YES,
Field.Index.ANALYZED));
writer.addDocument(doc);
}
writer.close();
directory.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public List<Document> search(String keyword) {
List<Document> list = new ArrayList<Document>();
try {
FSDirectory directory = FSDirectory.open(indexpath);
IndexReader reader = IndexReader.open(directory, true);
IndexSearcher isearcher = new IndexSearcher(reader);
isearcher.setSimilarity(new IKSimilarity());
if(keyword == null || keyword.equals("")){
return list;
}
Query query = IKQueryParser.parseMultiField(condition.split(","), keyword);
// 搜索相似度最高的10条记录
TopDocs topDocs = isearcher.search(query, 10);
// 输出结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (int i = 0; i < topDocs.totalHits; i++) {
Document targetDoc = isearcher.doc(scoreDocs[i].doc);
list.add(targetDoc);
}
isearcher.close();
directory.close();
} catch (Exception e) {
e.printStackTrace();
}
return list;
}
}
private File indexpath = null;
private String sql = null;
private String condition = null;
private String updateField = null;
private String updateValue = null;
private SearchConfig sc = null;
public LuceneUtil() {
sc = new SearchConfig();
indexpath = sc.getIndexPath();
sql = sc.getSql();
condition = sc.getCondition();
updateField = sc.getUpdateField();
updateValue = sc.getUpdateValue();
if(!updateValue.equals("")){
sql = sql + " where " + updateField + " > " + updateValue;
}
}
public void createIndex() {
System.out.println("==========正在生成数据库索引。");
//把数据库中的数据查询出来,
ResultSet rs = SQLHelper.getResultSet(sql);
try {
//打开索引文件
FSDirectory directory = FSDirectory.open(indexpath);
Analyzer analyzer = new IKAnalyzer();
IndexWriter writer = new IndexWriter(FSDirectory.open(indexpath),
analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
while (rs.next()) {
Document doc = new Document();
doc.add(new Field("id", String.valueOf(rs.getInt(1)),
Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("title", rs.getString(2), Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("content", rs.getString(3), Field.Store.YES,
Field.Index.ANALYZED));
writer.addDocument(doc);
}
writer.close();
directory.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public List<Document> search(String keyword) {
List<Document> list = new ArrayList<Document>();
try {
FSDirectory directory = FSDirectory.open(indexpath);
IndexReader reader = IndexReader.open(directory, true);
IndexSearcher isearcher = new IndexSearcher(reader);
isearcher.setSimilarity(new IKSimilarity());
if(keyword == null || keyword.equals("")){
return list;
}
Query query = IKQueryParser.parseMultiField(condition.split(","), keyword);
// 搜索相似度最高的10条记录
TopDocs topDocs = isearcher.search(query, 10);
// 输出结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (int i = 0; i < topDocs.totalHits; i++) {
Document targetDoc = isearcher.doc(scoreDocs[i].doc);
list.add(targetDoc);
}
isearcher.close();
directory.close();
} catch (Exception e) {
e.printStackTrace();
}
return list;
}
}
com.search.listener.Indexlistener启动索引更新程序
public
class
Indexlistener
implements
ServletContextListener
{
public void contextInitialized(javax.servlet.ServletContextEvent arg0) {
new IndexTask();
}
public void contextDestroyed(javax.servlet.ServletContextEvent arg0) {
}
}
public void contextInitialized(javax.servlet.ServletContextEvent arg0) {
new IndexTask();
}
public void contextDestroyed(javax.servlet.ServletContextEvent arg0) {
}
}
com.search.listener.IndexTask定时更新索引
public
class
IndexTask
{
public IndexTask(){
Timer timer = new Timer();
SearchConfig sc = new SearchConfig();
timer.schedule(new Task(), new Date(), sc.getPeriod());
}
static class Task extends TimerTask{
public void run(){
LuceneUtil lu = new LuceneUtil();
lu.createIndex();
}
}
}
public IndexTask(){
Timer timer = new Timer();
SearchConfig sc = new SearchConfig();
timer.schedule(new Task(), new Date(), sc.getPeriod());
}
static class Task extends TimerTask{
public void run(){
LuceneUtil lu = new LuceneUtil();
lu.createIndex();
}
}
}
com.search.util.RedHighlighter关键词高亮显示
public
class
RedHighlighter
{
public static String getBestFragment(String keyword, String field, String word){
SearchConfig sc = new SearchConfig();
String condition = sc.getCondition();
try{
Query query = IKQueryParser.parseMultiField(condition.split(","), keyword);
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
"<font color='red'>", "</font>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter,
new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(100));
String c = highlighter.getBestFragment(new IKAnalyzer(),
field, word);
return c;
}
catch(Exception e){
e.printStackTrace();
}
return "";
}
}
public static String getBestFragment(String keyword, String field, String word){
SearchConfig sc = new SearchConfig();
String condition = sc.getCondition();
try{
Query query = IKQueryParser.parseMultiField(condition.split(","), keyword);
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
"<font color='red'>", "</font>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter,
new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(100));
String c = highlighter.getBestFragment(new IKAnalyzer(),
field, word);
return c;
}
catch(Exception e){
e.printStackTrace();
}
return "";
}
}
index.jsp搜索页面
<%
@ page language="java" contentType="text/html; charset=GBK"
pageEncoding="GBK" %>
<% @page import="com.search.util.LuceneUtil" %>
<% @page import="java.util.*" %>
<% @page import="org.apache.lucene.document.Document" %>
<% @page import="com.search.util.RedHighlighter" %>
<% @page import="java.net.URLEncoder" %> < html >
<! DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd" >
< head >
< meta http-equiv ="Content-Type" content ="text/html; charset=GBK" >
< title > 搜索 </ title >
< link rel ="stylesheet" href ="./style/style.css" type ="text/css" >
</ head >
<%
//request.setCharacterEncoding("GBK");
String w = request.getParameter("w");
int size = 0;
long time = 0;
List<Document> list = null;
if(w != null && !w.equals("")){
w = new String(w.getBytes("ISO8859-1"), "GBK");
}
else{
w = "";
}
LuceneUtil lu = new LuceneUtil();
Date start = new Date();
list = lu.search(w);
Date end = new Date();
size = list.size();
time = end.getTime() - start.getTime();
%>
< script type ="text/javascript" >
function submit(){
}
</ script >
< body >
< div class ="seachInput" align ="center" >
< form method ="get" action ="index.jsp" >< br >
< input type ="text" class ="txtSeach" id ="w" name ="w" value ="<%=w %>"
>< input type ="submit"
class ="btnSearch" onclick ="submit" value ="找一下" > < br >
</ form >
</ div >
< div id ="searchInfo" >< span style ="float: left; margin-left: 15px;" ></ span > 找到相关内容 <% = size %> 篇,
用时 <% = time %> 毫秒
</ div >
< div id ="main" >
< div id ="searchResult" >
< div class ="forflow" >
<%
if(list != null && list.size() > 0){
for(Document doc:list){
String title = RedHighlighter.getBestFragment(w, "title", doc.get("title"));
String content = RedHighlighter.getBestFragment(w, "content", doc.get
("content"));
%>
< div class ="searchItem" >
< a href ="#" class ="searchItemTitle" target ="_blank" > <% = title %> </ a >
< div class ="searchCon" >
<% = content %>
</ div >
</ div >
<%
}
}
%>
</ div >
</ div >
</ div >
</ body >
</ html >
pageEncoding="GBK" %>
<% @page import="com.search.util.LuceneUtil" %>
<% @page import="java.util.*" %>
<% @page import="org.apache.lucene.document.Document" %>
<% @page import="com.search.util.RedHighlighter" %>
<% @page import="java.net.URLEncoder" %> < html >
<! DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd" >
< head >
< meta http-equiv ="Content-Type" content ="text/html; charset=GBK" >
< title > 搜索 </ title >
< link rel ="stylesheet" href ="./style/style.css" type ="text/css" >
</ head >
<%
//request.setCharacterEncoding("GBK");
String w = request.getParameter("w");
int size = 0;
long time = 0;
List<Document> list = null;
if(w != null && !w.equals("")){
w = new String(w.getBytes("ISO8859-1"), "GBK");
}
else{
w = "";
}
LuceneUtil lu = new LuceneUtil();
Date start = new Date();
list = lu.search(w);
Date end = new Date();
size = list.size();
time = end.getTime() - start.getTime();
%>
< script type ="text/javascript" >
function submit(){
}
</ script >
< body >
< div class ="seachInput" align ="center" >
< form method ="get" action ="index.jsp" >< br >
< input type ="text" class ="txtSeach" id ="w" name ="w" value ="<%=w %>"
>< input type ="submit"
class ="btnSearch" onclick ="submit" value ="找一下" > < br >
</ form >
</ div >
< div id ="searchInfo" >< span style ="float: left; margin-left: 15px;" ></ span > 找到相关内容 <% = size %> 篇,
用时 <% = time %> 毫秒
</ div >
< div id ="main" >
< div id ="searchResult" >
< div class ="forflow" >
<%
if(list != null && list.size() > 0){
for(Document doc:list){
String title = RedHighlighter.getBestFragment(w, "title", doc.get("title"));
String content = RedHighlighter.getBestFragment(w, "content", doc.get
("content"));
%>
< div class ="searchItem" >
< a href ="#" class ="searchItemTitle" target ="_blank" > <% = title %> </ a >
< div class ="searchCon" >
<% = content %>
</ div >
</ div >
<%
}
}
%>
</ div >
</ div >
</ div >
</ body >
</ html >
运行效果:
附件: 完整代码