以前做的都是一些应用系统,这次还是第一次接触搜索引擎开发的问题,这个搜索引擎我用的是比较通用的Lucence实现,自己也不是多懂,反正是马马虎虎弄出来了,欢迎高手丢砖。上代码:
1.加lucence核心jar包(我这里用的是Maven管理项目的,所以截图吧,lucence用的是2.4.0)
2.写lucence工具类:
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
public class Digest {
private static String indexPath = null;
/**
* 构造lucence的document集合,将数据以document对象写入lucence 索引库目录
*
* @param rs
* @return
* @throws SQLException
*/
public static void goodsListToLucence(List<Goods> goodslist,//这个goodslist是从数据库里查询出来的Model的列表,也就是要放入索引库的数据(可以随便弄)
ServletContext context) {
indexPath = context.getRealPath("") + "/lucence/";
/* lucence 索引库目录 */
File fileDir = new File(indexPath);
if (!fileDir.exists()) {
fileDir.mkdirs();
}
List<Document> list = new ArrayList<Document>();
Document doc = null;
//将数据便利放入Document中,并创建List<Document>
for (Goods goods : goodslist) {
doc = new Document();
doc.add(new Field("id", goods.getId() + "", Store.COMPRESS,
Index.ANALYZED));
doc.add(new Field("code", goods.getCode(), Store.COMPRESS,
Index.ANALYZED));
doc.add(new Field("goodstitle", goods.getGoodstitle(),
Store.COMPRESS, Index.ANALYZED));
list.add(doc);
}
/**
*上面的都是在构造Document的list数据,因为Lucence好像只会去解析Document
*下面的才是真正的重头戏把数据写入lucence 索引库目录
*
*/
IndexWriter indexWriter;
try {
indexWriter = new IndexWriter(indexPath, new StandardAnalyzer(), true,
IndexWriter.MaxFieldLength.UNLIMITED);
for (Document lucendoc : list) {
indexWriter.addDocument(lucendoc);
}
// optimize()方法是对索引进行优化
indexWriter.optimize();
indexWriter.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 测试 写入lucence和从lucence查询含有关键词的数据,并将关键词高亮显示
* (这里搜索的是goods表中的goodstitle和code,也就是拿着关键词到这两个字段里的数据去找,其中goodstitle要求对其关键字全模糊并且关键字高亮(这里高亮我没有用那个什么highar插件,自己随便写的字符串截取的方式实现的))
* 这里List<goods>就胡乱写些数据
*
*/
public static void main(String args[]) {
/*
*测试把数据写入lucence索引库目录
*
*/
List<Goods> goodsList = new ArrayList<Goods>();
Goods goods = new Goods();
goods.setId(1);goods.setGoodstitle("sdsdsdsddsdsds");goods.setCode("1233code");
goodsList.add(goods);
goodsListToLucence(goodsList,ServletContext context);//第二个数据是lucence索引的目录路径(写入完成)
/*
*测试把数据从lucence索引库目录里根据关键字拿出来
*
*/
String[] fields={"code", "goodstitle"};//要参与关键字查询的字段
String querie="关键字";
BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD};
Query query;
try {
Query query = MultiFieldQueryParser.parse(querie, fields, clauses, new StandardAnalyzer());//多字段查询
Query mquery = new WildcardQuery(new Term("goodstitle", "*"+querie+"*"));//需要进行模糊查询的字段
BooleanQuery bquery = new BooleanQuery();//构造多重符合查询方式的query
bquery.add(query, BooleanClause.Occur.SHOULD);//SHOULD或的意思
bquery.add(mquery, BooleanClause.Occur.SHOULD);
Searcher searcher = new IndexSearcher(indexPath);
Filter filter = null;
TopDocs topDocs = searcher.search(query, filter, 10);
Goods goods2 = null;
// 打印结果
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
Document document = indexSearcher.doc(scoreDoc.doc); // 根据编号取出相应的文档
goods2 = new Goods();
goods2.setId(Long.parseLong(document.get("id")));
if(document.get("goodsId") !=null && !document.get("goodsId").equals("")) {
goods2.setGoodsId(Long.parseLong(document.get("goodsId")));
}
if(document.get("code") !=null && !document.get("code").equals("")) {
goods2.setCode(document.get("code"));
}
if(document.get("goodstitle") !=null && !document.get("goodstitle").equals("")) {
goods2.setGoodstitle(document.get("goodstitle").replaceAll(querie,"<font color='red'>"+ querie+ "</font>"));//高亮显示,我不想用那网上的,太复杂
}
System.out.println(goods2);//查看从lucence索引目录中获得的数据
}
} catch (Exception e) {
e.printStackTrace();
}
}