有点样子了,当然还有很多要优化的地方,用lucene做一个站内搜索,我用的是lucene3.0.1.
首先看看数据库,我用的新闻模拟的:新闻表
CREATE TABLE `t_newsitem` (
`Id` int(11) NOT NULL auto_increment,
`editor` varchar(255) default NULL,
`newsContent` longtext,
`newsTitle` varchar(255) default NULL,
`publishTime` datetime default NULL,
`resoure` varchar(255) default NULL,
`t_newsType_id` int(11) default NULL,
`resource` varchar(255) default NULL,
PRIMARY KEY (`Id`),
KEY `FK9CB4BF1923597B2` (`t_newsType_id`),
KEY `FK9CB4BF19FFB60BE` (`t_newsType_id`),
CONSTRAINT `fk` FOREIGN KEY (`t_newsType_id`) REFERENCES `t_newstype` (`Id`),
CONSTRAINT `FK9CB4BF1923597B2` FOREIGN KEY (`t_newsType_id`) REFERENCES `t_newstype` (`Id`),
CONSTRAINT `FK9CB4BF19FFB60BE` FOREIGN KEY (`t_newsType_id`) REFERENCES `t_newstype` (`Id`)
) ENGINE=InnoDB DEFAULT CHARSET=gbk;
新闻类型表(也没啥用):
CREATE TABLE `t_newstype` (
`Id` int(11) NOT NULL auto_increment,
`newsTypeName` varchar(255) default NULL,
PRIMARY KEY (`Id`)
) ENGINE=InnoDB DEFAULT CHARSET=gbk;
系统是ssh么,站内搜索是基于它们的。
要使用lucene,至少要添加lucene-core-3.0.1.jar(核心),lucene-highlighter-3.0.1.jar(高亮显示),lucene-analyzers-3.0.1.jar(分词器)。因为它自带的分词器对中文支持不好,我使用了IKAnalyzer分词器,IKAnalyzer3.2.3Stable.jar。
做好上面的准备工作,下面就是编写下面的两个类了:
package luence;
import java.io.File;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.SimpleFSDirectory;
import org.htmlparser.Parser;
import org.wltea.analyzer.lucene.IKAnalyzer;
import service.NewsService;
import entity.TNewsitem;
/**
* 创建索引库
*
* @author tqc
*
*/
public class IndexCreate {
String path = "C:/index";// 索引所在文件夹
String path2 = "C:/index2";// tag索引所在文件夹
Analyzer analyzer=new IKAnalyzer();
NewsService service = null;
public NewsService getService() {
return service;
}
public void setService(NewsService service) {
this.service = service;
}
/**
* 创建全文新闻索引
*/
@SuppressWarnings("unchecked")
public void createIndexForNews() throws Exception {
IndexWriter indexWriter = new IndexWriter(new SimpleFSDirectory(new File(path)), analyzer, true,IndexWriter.MaxFieldLength.LIMITED);
List<TNewsitem> list = service.getNews();
DateFormat format = new SimpleDateFormat("yyyy年MM月dd日 HH时mm分ss秒");
// 对所有的新闻实体进行索引创建
for (TNewsitem newsItem : list) {
Document doc = new Document();
String newsTitle = newsItem.getNewsTitle();
String newsContent = newsItem.getNewsContent();
String publishDate = format.format(newsItem.getPublishTime());
String id = newsItem.getId() + "";
doc.add(new Field("title", newsTitle, Field.Store.YES,
Field.Index.ANALYZED));
Parser parser = new Parser();
parser.setInputHTML(newsContent);
String strings = parser.parse(null).elementAt(0)
.toPlainTextString().trim();
doc.add(new Field("content", strings, Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("date", publishDate, Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("id", id, Field.Store.YES, Field.Index.NO));
indexWriter.addDocument(doc);
}
// 优化索引
indexWriter.optimize();
indexWriter.close();
}
/**
* 为tag创建索引
* @throws IOException
* @throws LockObtainFailedException
* @throws CorruptIndexException
*/
@SuppressWarnings("unchecked")
public void createIndexForTag() throws CorruptIndexException, LockObtainFailedException, IOException{
IndexWriter indexWriter = new IndexWriter(new SimpleFSDirectory(new File(path2)), analyzer, true,IndexWriter.MaxFieldLength.LIMITED);
List<TNewsitem> list = service.getNews();
for (TNewsitem newsItem : list) {
Document doc = new Document();
String tags = newsItem.getNewsTitle();
doc.add(new Field("tags", tags, Field.Store.YES,
Field.Index.ANALYZED));
indexWriter.addDocument(doc);
}
// 优化索引
indexWriter.optimize();
indexWriter.close();
}
上面这个类主要是用来创建索引。
package luence;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
import entity.SearchResultBean;
/**
* 查询索引库
*
* @author tqc
*
*/
public class IndexSearch {
String path = "C:/index";// 索引所在文件夹
String path2 = "C:/index2";// tag索引所在文件夹
Analyzer analyzer = new IKAnalyzer();// 中文分词器
/**
* 条件查询 显示高亮效果
*
* @param searchParam
* @return
* @throws Exception
*/
public List<SearchResultBean> getSearchResult(String searchParam)
throws Exception {
IndexReader reader = IndexReader.open(new SimpleFSDirectory(new File(
path)));
String[] fileds = { "title", "content" };// 在哪些字段中查询
QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_30,
fileds, analyzer);
IndexSearcher searcher = new IndexSearcher(reader);
Query query = parser.parse(searchParam);
TopDocs docs = searcher.search(query, 10000);
System.out.println("--->匹配总个数:" + docs.totalHits);
List<SearchResultBean> list = new ArrayList<SearchResultBean>();
Formatter formatter = new SimpleHTMLFormatter("<b><font color='red'>",
"</font></b>");
Scorer scorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, scorer);
Fragmenter fragmenter = new SimpleFragmenter(50);
highlighter.setTextFragmenter(fragmenter);
for (ScoreDoc scoredoc : docs.scoreDocs) {
int docSn = scoredoc.doc;
Document doc = searcher.doc(docSn);
SearchResultBean srb = new SearchResultBean();
String id = doc.get("id");
String date = doc.get("date");
String c = highlighter.getBestFragment(analyzer, "content", doc.get("content"));
if (c == null) {
String content = doc.get("content");
int endIndex = Math.min(100, content.length());
c=content.substring(0,endIndex);
}
doc.getField("content").setValue(c);
String t = highlighter.getBestFragment(analyzer, "title", doc.get("title"));
if (t == null) {
String title = doc.get("title");
int endIndex = Math.min(20, title.length());
t=title.substring(0,endIndex);
}
doc.getField("title").setValue(t);
String content = doc.get("content");
String title = doc.get("title");
srb.setContent(content);
srb.setTitle(title);
srb.setDate(date);
srb.setId(id);
list.add(srb);
}
return list;
}
/**
* 关键词查询tag索引
*
* @throws IOException
* @throws CorruptIndexException
* @throws ParseException
*/
@SuppressWarnings("unchecked")
public List TermQuery(String key) throws CorruptIndexException, IOException, ParseException {
IndexReader reader = IndexReader.open(new SimpleFSDirectory(new File(
path2)));
IndexSearcher searcher = new IndexSearcher(reader);
QueryParser parser = new QueryParser(Version.LUCENE_30,"tags",analyzer);
Query query = parser.parse(key);
System.out.println(key);
TopDocs docs = searcher.search(query, 10000);
System.out.println("--->匹配总个数:" + docs.totalHits);
List<SearchResultBean> list = new ArrayList();
for (ScoreDoc scoredoc : docs.scoreDocs) {
int docSn = scoredoc.doc;
Document doc = searcher.doc(docSn);
SearchResultBean srb = new SearchResultBean();
String tag = doc.get("tags");
srb.setTitle(tag);
list.add(srb);
}
return list;
}
}
}
这个类用于查询索引库。
接下来在action中使用了,
/**
* 创建索引
*
* @return
* @throws Exception
*/
public String c() throws Exception {
indexCreate.createIndexForNews();// 创建索引
indexCreate.createIndexForTag();// tag创建索引
System.out.println("-->索引创建成功!");
return "index";
}
/**
* 搜
*
* @return
* @throws Exception
*/
@SuppressWarnings("unchecked")
public String s() throws Exception {
String searchParam = ServletActionContext.getRequest().getParameter(
"key");
if (searchParam == "") {
return "index";
}
List list = indexSearch.getSearchResult(searchParam);
ServletActionContext.getRequest().setAttribute("res", list);
return "index";
}
因为客户要像百度那样的,在文本框输入后可以自动补全提示的,所以下一步我是去客户端看看了
<script type="text/javascript" src="js/jquery.js"></script>
<script type='text/javascript' src='js/jquery.autocomplete.js'></script>
<link type="text/css" rel="stylesheet" href="css/jquery.autocomplete.css" />
我使用的是jquery的autocomplete插件。
<script type="text/javascript">
$(function() {
$("#product").autocomplete("s!s", {
minChars: 1, //最小提示字符
width: 360, //提示框的长度
autoFill: false, //不自动填充
multiple: false, //不允许多个自动填充值出现
dataType: "json", //数据类型
parse: function(data) { //解析数据
return $.map(data, function(row) {
return {
data: row,
value: row.value,
result: row.value //显示在文本框里面的格式
}
});
},
formatItem: function(row, i, max) { //格式化显示的内容
return row.value+" 第"+i+"条记录,共"+max+"条";
},
formatMatch: function(row, i, max) {
return row.name + " " + row.value;
},
formatResult: function(row) {
return row.value;
}
});
});
</script>
因为是ajax的,给看看后台是如何处理的,
/**
* 完成自动补全
*
* @return
* @throws Exception
*/
@SuppressWarnings("unchecked")
public String s() throws Exception {
ServletActionContext.getResponse().setContentType(
"text/json;charset=UTF-8");
HttpServletRequest request = ServletActionContext.getRequest();
String key = request.getParameter("q");
key = new String(key.getBytes("ISO-8859-1"), "utf-8");
List<SearchResultBean> list = indexSearch.TermQuery(key);
StringBuffer sg = new StringBuffer();
sg.append("[");
for (int i = 0; i < list.size(); i++) {
SearchResultBean s = list.get(i);
if (i == list.size() - 1) {
sg.append("{name:'eee',value:'" + s.getTitle() + "'}");
} else {
sg.append("{name:'eee',value:'" + s.getTitle() + "'},");
}
}
sg.append("]");
ServletActionContext.getResponse().getWriter().print(sg.toString());
return null;
}
返回的json格式。到目前基本上就搞定了!