Jboss 的 hibernate search 支持 hibernate 应该比较好。
所以想在项目 里面使用 hibernate search 进行 搜索。
分词使用的是 IKAnalyzer
网站是 :
http://code.google.com/p/ik-analyzer/
使用的 是 hibernate 3.6.8 + spring 3.0.6 + hibernate search 3.4.1 +IKAnalyzer 3.2.8
数据库是mysql 链接池是 c3p0
在 hibernate search 3.4 版本的时候 就不需要配置 hibernate 的监听了。(Jboss 自家的东西支持就是好点)
下面是配置文件:
com.freewebsys ${hibernate.dialect} ${hibernate.hbm2ddl.auto} true ${hibernate.show_sql} ${hibernate.jdbc.fetch_size} ${hibernate.jdbc.batch_size} ${hibernate.cache.use_query_cache} true 1, false 0, yes 'Y', no 'N' ${hibernate.search.default.directory_provider} ${hibernate.search.default.indexBase} ${hibernate.search.analyzer}
然后就是配置 hiberante 事物。
true PROPAGATION_REQUIRED,-Exception PROPAGATION_REQUIRED,readOnly PROPAGATION_REQUIRED,readOnly PROPAGATION_REQUIRED,readOnly *Service
transactionInterceptor
创建搜索的 bean
package com.freewebsys.demo.pojo; import javax.persistence.Column; import javax.persistence.Entity; import javax.persistence.GeneratedValue; import javax.persistence.GenerationType; import static javax.persistence.GenerationType.IDENTITY; import javax.persistence.Id; import javax.persistence.Table; import javax.persistence.UniqueConstraint; import org.hibernate.search.annotations.Analyzer; import org.hibernate.search.annotations.DocumentId; import org.hibernate.search.annotations.Field; import org.hibernate.search.annotations.Index; import org.hibernate.search.annotations.Indexed; import org.hibernate.search.annotations.Store; import org.wltea.analyzer.lucene.IKAnalyzer; @Entity @Table(name = "user_info") @Indexed public class UserInfo implements java.io.Serializable { private Long id; private String userName; private String passwd; private String city; private String content; public UserInfo() { } @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @Column(name = "id", unique = true, nullable = false) @DocumentId public Long getId() { return id; } public void setId(Long id) { this.id = id; } @Column(name = "user_name", unique = false, nullable = true, length = 100) @Field(name = "user_name", index = Index.TOKENIZED, store = Store.YES) public String getUserName() { return userName; } public void setUserName(String userName) { this.userName = userName; } @Column(name = "passwd", unique = false, nullable = true, length = 100) @Field(name = "passwd", index = Index.TOKENIZED, store = Store.YES) public String getPasswd() { return passwd; } public void setPasswd(String passwd) { this.passwd = passwd; } @Column(name = "city", unique = false, nullable = true, length = 100) @Field(name = "city", index = Index.TOKENIZED, store = Store.YES) public String getCity() { return city; } public void setCity(String city) { this.city = city; } @Column(name = "content", unique = false, nullable = true, length = 4000) @Field(name = "content", index = Index.TOKENIZED, store = Store.YES, analyzer = @Analyzer(impl = IKAnalyzer.class)) public String getContent() { return content; } public void setContent(String content) { this.content = content; } @Override public String toString() { return "UserInfo [id=" + id + ", userName=" + userName + ", passwd=" + passwd + ", city=" + city + ", content=" + content + "]"; } }
@Indexed 标识这个要被索引。
@Field(name = "user_name", index = Index.TOKENIZED, store = Store.YES)
标识 索引字段
@Field(name = "content", index = Index.TOKENIZED, store = Store.YES, analyzer = @Analyzer(impl = IKAnalyzer.class))
具体 其他参数 参考 博客 : http://sin90lzc.iteye.com/blog/1106258
标识索引字段并表示分词 为 IKAnalyzer
然后 就是 service 写的。。这里 省略了 dao 层 service 直接 继承 HibernateDaoSupport
package com.freewebsys.demo.service.impl; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.util.Version; import org.hibernate.Query; import org.hibernate.SessionFactory; import org.hibernate.search.FullTextQuery; import org.hibernate.search.FullTextSession; import org.hibernate.search.Search; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.orm.hibernate3.support.HibernateDaoSupport; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import org.wltea.analyzer.lucene.IKAnalyzer; import com.freewebsys.demo.pojo.UserInfo; import com.freewebsys.demo.service.UserInfoService; @Service("userInfoService") public class UserInfoServiceImpl extends HibernateDaoSupport implements UserInfoService { private static Log log = LogFactory.getLog(UserInfoServiceImpl.class); @Autowired public void setMySessionFactory(SessionFactory sessionFactory) { setSessionFactory(sessionFactory); } @Transactional public void save(UserInfo userInfo) { getHibernateTemplate().save(userInfo); } @Transactional public void delete(UserInfo userInfo) { getHibernateTemplate().delete(userInfo); } /** * 使用hql 进行查询。 */ @Transactional public ListfindUserInfo(String userName) { String hql = " from UserInfo userInfo where userInfo.userName = ? "; return getHibernateTemplate().find(hql, userName); } public List findUserInfoBySearchContent(String content) { FullTextSession fullTextSession = Search .getFullTextSession(getSession()); QueryParser parser = new QueryParser(Version.LUCENE_31, "content", new SimpleAnalyzer(Version.LUCENE_31)); org.apache.lucene.search.Query luceneQuery = null; try { luceneQuery = parser.parse(content); } catch (ParseException e) { e.printStackTrace(); } FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery( luceneQuery, UserInfo.class); List useList = (List ) fullTextQuery.list(); // 高亮设置 SimpleHTMLFormatter formatter = new SimpleHTMLFormatter( "", ""); QueryScorer qs = new QueryScorer(luceneQuery); Highlighter highlighter = new Highlighter(formatter, qs); // 这个20是指定关键字字符串的context的长度,你可以自己设定,因为不可能返回整篇正文内容 highlighter.setTextFragmenter(new SimpleFragmenter(20)); for (UserInfo userInfo : useList) { Analyzer analyzer = new IKAnalyzer(); try { String contentHighLighter = highlighter.getBestFragment( analyzer, content, userInfo.getContent()); System.out.println(contentHighLighter); userInfo.setContent(contentHighLighter); } catch (Exception e) { e.printStackTrace(); } } return useList; } /** * 查询全部数据 */ public List findAllUserInfo() { String hql = " from UserInfo userInfo"; return getHibernateTemplate().find(hql); } }
最关键的就是搜索 然后 显示 高亮并 将文章内容截取 。
也可以使用setFirstResult setMaxResults 对搜索进行分页。
fullTextQuery.getResultSize() 是获得总页数。
fullTextQuery.setFirstResult((pageNo - 1) * pageSize); fullTextQuery.setMaxResults(pageSize);
在使用 高亮显示的时候 二次进行了 分词。找到 查询内容:
// 高亮设置 SimpleHTMLFormatter formatter = new SimpleHTMLFormatter( "", ""); QueryScorer qs = new QueryScorer(luceneQuery); Highlighter highlighter = new Highlighter(formatter, qs); // 这个20是指定关键字字符串的context的长度,你可以自己设定,因为不可能返回整篇正文内容 highlighter.setTextFragmenter(new SimpleFragmenter(20)); for (UserInfo userInfo : useList) { Analyzer analyzer = new IKAnalyzer(); try { String contentHighLighter = highlighter.getBestFragment( analyzer, content, userInfo.getContent()); System.out.println(contentHighLighter); userInfo.setContent(contentHighLighter); } catch (Exception e) { e.printStackTrace(); } } return useList;
在test 里面是测试 先初始化 数据库。
还可以对 数据里面的html 代码去掉。然后再展示:
QueryScorer qs = new QueryScorer(luceneQuery); Highlighter highlighter = new Highlighter(formatter, qs); // 这个20是指定关键字字符串的context的长度,你可以自己设定,因为不可能返回整篇正文内容 highlighter.setTextFragmenter(new SimpleFragmenter(20)); String contentStr = null; for (UserInfo userInfo : useList) { Analyzer analyzer = new IKAnalyzer(); try { contentStr = userInfo.getContent(); // 去掉所有html元素, contentStr = contentStr.replaceAll("<[a-zA-Z]+[1-9]?[^><]*>", "").replaceAll("[a-zA-Z]+[1-9]?>", ""); String contentHighLighter = highlighter.getBestFragment( analyzer, content, contentStr); System.out.println(contentHighLighter); userInfo.setContent(contentHighLighter); } catch (Exception e) { e.printStackTrace(); } }
运行junit 测试:
Listlist = userInfoService .findUserInfoBySearchContent("三个月"); System.out.println(list.size()); System.out.println("Finish ########"); for (UserInfo userInfo : list) { System.out.println(userInfo); }
查询结果如下:
三个月必须到杭州进行全身心开发,三个月之后 三个月可以全身心在杭州专注于项目开发。三个月 开发的三个月内,天使湾将在杭州每周举办分享 三个月内,不同创业团队在确保独立自主的 地。在杭州三个月期间创业团队的住宿餐饮 三个月绝对以一当十! 9.天使湾聚变
到页面就可以显示 高亮并 截取字符串了。
目前有一个问题就是 查询的时候 同时也执行 sql 查询。
如执行:
Hibernate: select this_.id as id0_0_, this_.city as city0_0_, this_.passwd as passwd0_0_, this_.user_name as user4_0_0_ from user_info this_ where (this_.id in (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?))
将 搜索查询 到的数据 从数据库中提取出来。
总的来说 hibernate search 将 搜索简化了很多。
附件是 工程代码: