直接上代码:
1
public
class UserIndexService {
2
3
private
final Log logger = LogFactory.getLog(UserIndexService.
class);
4
private
final String dirPath = "d:/temp/user";
5
6 Analyzer analyzer =
new IKAnalyzer();
7 Directory directory =
null;
8 IndexWriter writer =
null;
9 IndexSearcher indexSearcher =
null;
10
11
private
void confirmDirs() {
12 File indexFile =
new File(dirPath);
13
if (!indexFile.exists()) {
14 indexFile.mkdirs();
15 }
16
if (!indexFile.exists() || !indexFile.canWrite()) {
17
if (logger.isDebugEnabled())
18 logger.error("索引文件目录创建失败或不可写入!");
19 }
20 }
21
22
public
void init() {
23 confirmDirs();
24
try {
25 File f =
new File(dirPath);
26 directory = FSDirectory.open(f);
27
28 }
catch (Exception e) {
29
if (logger.isDebugEnabled()) {
30 logger.error("解除索引文件锁定失败!" + e.getCause());
31 }
32 }
33 }
34
35
public
void createIndex(List<User> userList) {
36 init();
37
try {
38
39
//
第一个参数是存放索引目录有FSDirectory(存储到磁盘上)和RAMDirectory(存储到内存中),
40
//
第二个参数是使用的分词器, 第三个:true,建立全新的索引,false,建立增量索引,第四个是建立的索引的最大长度
41
writer =
new IndexWriter(directory, analyzer,
true,IndexWriter.MaxFieldLength.LIMITED);
42 writer.setMergeFactor(500);
43 writer.setMaxBufferedDocs(155);
44 writer.setMaxFieldLength(Integer.MAX_VALUE);
45 writeIndex(writer, userList);
46 writer.optimize();
47 writer.close();
48 }
catch (IOException e) {
49
//
TODO Auto-generated catch block
50
e.printStackTrace();
51 }
52 }
53
54
public List<User> search(String keyword) {
55
56 File indexFile =
new File(dirPath);
57
if (!indexFile.exists()) {
58
return
null;
59 }
60 Directory dir;
61
try {
62 dir = FSDirectory.open(indexFile);
63 indexSearcher =
new IndexSearcher(dir);
64 indexSearcher.setSimilarity(
new IKSimilarity());
65
//
单字段查询,单条件查询
66
//
Query query = IKQueryParser.parse("userInfo", keyword);
67
68
//
多字段,单条件查询
69
String[] fields =
new String[] { "userInfo", "parameter1" };
70 Query query = IKQueryParser.parseMultiField(fields, keyword);
71
72
//
多字体,单条件,多BooleanClause.Occur[] flags , 查询条件的组合方式(Or/And)
73
//
BooleanClause.Occur[]数组,它表示多个条件之间的关系,
74
//
BooleanClause.Occur.MUST表示 and,
75
//
BooleanClause.Occur.MUST_NOT表示not,
76
//
BooleanClause.Occur.SHOULD表示or.
77
//
String[] fields =new String[]{"userInfo","parameter1"};
78
//
BooleanClause.Occur[] flags=new
79
//
BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD};
80
//
Query query = IKQueryParser.parseMultiField(fields,
81
//
keyword,flags);
82
83
//
//
多Field,多条件查询分析
84
//
String[] fields =new String[]{"userInfo","parameter1"};
85
//
String[] queries = new String[]{keyword,keyword};
86
//
Query query = IKQueryParser.parseMultiField(fields,queries);
87
88
//
多Field,多条件,多Occur 查询
89
//
String[] fields =new String[]{"userInfo","parameter1"};
90
//
String[] queries = new String[]{keyword,keyword};
91
//
BooleanClause.Occur[] flags=new
92
//
BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD};
93
//
Query query =
94
//
IKQueryParser.parseMultiField(fields,queries,flags);
95
96
//
搜索相似度最高的20条记录
97
TopDocs topDocs = indexSearcher.search(query, 20);
98 ScoreDoc[] hits = topDocs.scoreDocs;
99
return hitsToQuery(hits, query);
100
101 }
catch (IOException e) {
102
//
TODO Auto-generated catch block
103
e.printStackTrace();
104 }
105
106
return
null;
107 }
108
109
private List<User> hitsToQuery(ScoreDoc[] hits, Query query) {
110 List<User> list =
new ArrayList<User>();
111
try {
112
for (
int i = 0; i < hits.length; i++) {
113 User u =
new User();
114 Document doc = indexSearcher.doc(hits[i].doc);
115 u.setUserId(Integer.parseInt(doc.get("userId")));
116 u.setUserName(doc.get("userName"));
117 u.setUserAge(Integer.parseInt(doc.get("userAge")));
118
//
高亮设置
119
SimpleHTMLFormatter simpleHtmlFormatter =
new SimpleHTMLFormatter(
120 "<font color=\"red\">", "</font>");
121 Highlighter highlighter =
new Highlighter(simpleHtmlFormatter,
122
new QueryScorer(query));
123 TokenStream tokenStream = analyzer.tokenStream("text",
124
new StringReader(doc.get("userInfo")));
125 String userInfo = highlighter.getBestFragment(tokenStream, doc
126 .get("userInfo"));
127
if (userInfo !=
null) {
128 u.setUserInfo(userInfo);
129 }
else {
130 u.setUserInfo(doc.get("userInfo"));
131 }
132
133 SimpleHTMLFormatter simpleHtmlFormatter1 =
new SimpleHTMLFormatter(
134 "<font color=\"red\">", "</font>");
135 Highlighter highlighter1 =
new Highlighter(
136 simpleHtmlFormatter1,
new QueryScorer(query));
137 TokenStream tokenStream1 = analyzer.tokenStream("text1",
138
new StringReader(doc.get("parameter1")));
139 String p1 = highlighter1.getBestFragment(tokenStream1, doc
140 .get("parameter1"));
141
if (p1 !=
null) {
142 u.setParameter1(p1);
143 }
else {
144 u.setParameter1(doc.get("parameter1"));
145 }
146
147 u.setParameter2(doc.get("parameter2"));
148 u.setParameter3(doc.get("parameter3"));
149 u.setParameter4(doc.get("parameter4"));
150 list.add(u);
151 }
152
153 indexSearcher.close();
154
return list;
155 }
catch (CorruptIndexException e) {
156
//
TODO Auto-generated catch block
157
e.printStackTrace();
158 }
catch (IOException e) {
159
//
TODO Auto-generated catch block
160
e.printStackTrace();
161 }
catch (InvalidTokenOffsetsException e) {
162
//
TODO Auto-generated catch block
163
e.printStackTrace();
164 }
165
return
null;
166 }
167
168
public
void writeIndex(IndexWriter writer, List<User> userList) {
169
170
try {
171
for (User u : userList) {
172 Document doc = getDoc(u);
173 writer.addDocument(doc);
174 }
175 }
catch (IOException e) {
176
//
TODO Auto-generated catch block
177
e.printStackTrace();
178 }
179
180 }
181
182
private Document getDoc(User user) {
183 System.out.println("用户ID 为" + user.getUserId() + " 索引被创建");
184 Document doc =
new Document();
185 addField2Doc(doc, user, "userId", Store.YES, Index.NOT_ANALYZED);
186 addField2Doc(doc, user, "userName", Store.YES, Index.NOT_ANALYZED);
//
Index.NOT_ANALYZED
187
//
不分词,但建立索引
188
addField2Doc(doc, user, "userAge", Store.YES, Index.NOT_ANALYZED);
//
Index.ANALYZED
189
//
分词并且建立索引
190
addField2Doc(doc, user, "userInfo", Store.YES, Index.ANALYZED);
191 addField2Doc(doc, user, "parameter1", Store.YES, Index.ANALYZED);
192 addField2Doc(doc, user, "parameter2", Store.YES, Index.ANALYZED);
193 addField2Doc(doc, user, "parameter3", Store.YES, Index.ANALYZED);
194 addField2Doc(doc, user, "parameter4", Store.YES, Index.ANALYZED);
195
return doc;
196 }
197
198
private
void addField2Doc(Document doc, Object bean, String name, Store s,
199 Index i) {
200 String value;
201
try {
202 value = BeanUtils.getProperty(bean, name);
203
if (value !=
null) {
204 doc.add(
new Field(name, value, s, i,
205 Field.TermVector.WITH_POSITIONS_OFFSETS));
206 }
207 }
catch (IllegalAccessException e) {
208 logger.error("get bean property error", e);
209 }
catch (InvocationTargetException e) {
210 logger.error("get bean property error", e);
211 }
catch (NoSuchMethodException e) {
212 logger.error("get bean property error", e);
213 }
214 }
下面继续:
1
/**
2
* 没有排序,有高亮,有分页
3
*
4
*
@param
pageNo
5
*
@param
pageSize
6
*
@param
keyword
7
*
@return
8
*/
9
public PageBean getPageQuery(
int pageNo,
int pageSize, String keyword) {
10 List result =
new ArrayList();
11 File indexFile =
new File(dirPath);
12
if (!indexFile.exists()) {
13
return
null;
14 }
15 Directory dir;
16
try {
17 dir = FSDirectory.open(indexFile);
18 indexSearcher =
new IndexSearcher(dir);
19 indexSearcher.setSimilarity(
new IKSimilarity());
20
21 String[] fields =
new String[] { "userInfo", "parameter1" };
22 BooleanClause.Occur[] flags =
new BooleanClause.Occur[] {
23 BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD };
24 Query query = IKQueryParser.parseMultiField(fields, keyword, flags);
25
26 TopScoreDocCollector topCollector = TopScoreDocCollector.create(
27 indexSearcher.maxDoc(),
true);
28 indexSearcher.search(query, topCollector);
29
//
查询当页的记录
30
ScoreDoc[] docs = topCollector.topDocs((pageNo - 1) * pageSize,
31 pageSize).scoreDocs;
32
33
//
String[] highlightCol = {"userInfo", "parameter1"};
34
//
高亮设置
35
SimpleHTMLFormatter simpleHtmlFormatter =
new SimpleHTMLFormatter(
36 "<font color=\"red\">", "</font>");
37 Highlighter highlighter =
new Highlighter(simpleHtmlFormatter,
38
new QueryScorer(query));
39
40
for (ScoreDoc scdoc : docs) {
41 User u =
new User();
42 Document doc = indexSearcher.doc(scdoc.doc);
43
//
44
//
for (Fieldable fa : doc.getFields()) {
45
//
System.out.println(fa.name());
46
//
String value = doc.get(fa.name());
47
//
for (String col : highlightCol) {
48
//
if(fa.name().equals(col)) {
49
//
//
设置高显内容
50
//
TokenStream tokenStream = analyzer.tokenStream("text",new
51
//
StringReader(value));
52
//
value = highlighter.getBestFragment(tokenStream, value);
53
//
}
54
//
}
55
//
56
//
}
57
58 u.setUserId(Integer.parseInt(doc.get("userId")));
59 u.setUserName(doc.get("userName"));
60 u.setUserAge(Integer.parseInt(doc.get("userAge")));
61
62 TokenStream tokenStream = analyzer.tokenStream("text",
63
new StringReader(doc.get("userInfo")));
64 String userInfo = highlighter.getBestFragment(tokenStream, doc
65 .get("userInfo"));
66
if (userInfo !=
null) {
67 u.setUserInfo(userInfo);
68 }
else {
69 u.setUserInfo(doc.get("userInfo"));
70 }
71
72 TokenStream tokenStream1 = analyzer.tokenStream("text1",
73
new StringReader(doc.get("parameter1")));
74 String p1 = highlighter.getBestFragment(tokenStream1, doc
75 .get("parameter1"));
76
if (p1 !=
null) {
77 u.setParameter1(p1);
78 }
else {
79 u.setParameter1(doc.get("parameter1"));
80 }
81
82 u.setParameter2(doc.get("parameter2"));
83 u.setParameter3(doc.get("parameter3"));
84 u.setParameter4(doc.get("parameter4"));
85 result.add(u);
86
87 }
88 PageBean pb =
new PageBean();
89 pb.setCurrentPage(pageNo);
//
当前页
90
pb.setPageSize(pageSize);
91 pb.setAllRow(topCollector.getTotalHits());
//
hit中的记录数目
92
pb.setList(result);
93
return pb;
94
95 }
catch (IOException e) {
96
//
TODO Auto-generated catch block
97
e.printStackTrace();
98 }
catch (InvalidTokenOffsetsException e) {
99
//
TODO Auto-generated catch block
100
e.printStackTrace();
101 }
102
103
return
null;
104 }
再来:
1
/**
2
* 排序,有高亮,有分页
3
*
4
*
@param
pageNo
5
*
@param
pageSize
6
*
@param
keyword
7
*
@return
8
*/
9
public PageBean getPageQuery2(
int pageNo,
int pageSize, String keyword) {
10 List result =
new ArrayList();
11 File indexFile =
new File(dirPath);
12
if (!indexFile.exists()) {
13
return
null;
14 }
15 Directory dir;
16
try {
17 dir = FSDirectory.open(indexFile);
18 indexSearcher =
new IndexSearcher(dir);
19 indexSearcher.setSimilarity(
new IKSimilarity());
20
21 String[] fields =
new String[] { "userInfo", "parameter1" };
22 BooleanClause.Occur[] flags =
new BooleanClause.Occur[] {
23 BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD };
24 Query query = IKQueryParser.parseMultiField(fields, keyword, flags);
25
26
//
多字段排序,设置在前面的会优先排序
27
SortField[] sortFields =
new SortField[2];
28 SortField sortField =
new SortField("userId", SortField.INT,
false);
//
false升序,true降序
29
SortField FIELD_SEX =
new SortField("userAge", SortField.INT,
true);
30 sortFields[0] = sortField;
31 sortFields[1] = FIELD_SEX;
32 Sort sort =
new Sort(sortFields);
33
34 TopDocs topDocs = indexSearcher.search(query,
null, 50, sort);
35
36
if (topDocs.totalHits != 0) {
37
//
for(ScoreDoc sd : topDocs.scoreDocs) {
38
//
39
//
}
40
//
高亮设置
41
SimpleHTMLFormatter simpleHtmlFormatter =
new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
42 Highlighter highlighter =
new Highlighter(simpleHtmlFormatter,
new QueryScorer(query));
43
44
for (
int i = (pageNo - 1) * pageSize; i < pageSize * pageNo; i++) {
45 ScoreDoc scdoc = topDocs.scoreDocs[i];
46 User u =
new User();
47 Document doc = indexSearcher.doc(scdoc.doc);
48 u.setUserId(Integer.parseInt(doc.get("userId")));
49 u.setUserName(doc.get("userName"));
50 u.setUserAge(Integer.parseInt(doc.get("userAge")));
51 TokenStream tokenStream = analyzer.tokenStream("text",
new StringReader(doc.get("userInfo")));
52 String userInfo = highlighter.getBestFragment(tokenStream,doc.get("userInfo"));
53
if (userInfo !=
null) {
54 u.setUserInfo(userInfo);
55 }
else {
56 u.setUserInfo(doc.get("userInfo"));
57 }
58
59 TokenStream tokenStream1 = analyzer.tokenStream("text1",
new StringReader(doc.get("parameter1")));
60 String p1 = highlighter.getBestFragment(tokenStream1, doc.get("parameter1"));
61
if (p1 !=
null) {
62 u.setParameter1(p1);
63 }
else {
64 u.setParameter1(doc.get("parameter1"));
65 }
66
67 u.setParameter2(doc.get("parameter2"));
68 u.setParameter3(doc.get("parameter3"));
69 u.setParameter4(doc.get("parameter4"));
70 result.add(u);
71
72 }
73 PageBean pb =
new PageBean();
74 pb.setCurrentPage(pageNo);
//
当前页
75
pb.setPageSize(pageSize);
76 pb.setAllRow(topDocs.totalHits);
//
hit中的记录数目
77
pb.setList(result);
78
return pb;
79
80 }
81 }
catch (IOException e) {
82
//
TODO Auto-generated catch block
83
e.printStackTrace();
84 }
catch (InvalidTokenOffsetsException e) {
85
//
TODO Auto-generated catch block
86
e.printStackTrace();
87 }
88
89
return
null;
90 }
91
92
/**
93
* 删除索引
94
*
@param
userId
95
*/
96
public
void deleIndex(String userId){
97
98
try {
99 File f =
new File(dirPath);
100 directory = FSDirectory.open(f);
101 IndexReader reader = IndexReader.open(directory,
false);
102 Term term =
new Term("userId", userId);
103 reader.deleteDocuments(term);
104 reader.close();
105 }
catch (IOException e) {
106
//
TODO Auto-generated catch block
107
e.printStackTrace();
108 }
109
110
111 }
112
113 }
114
115
116 高亮设置集成抽取成一个方法
117
118
public String toHighlighter(Query query,Document doc,String field){
119
try {
120 SimpleHTMLFormatter simpleHtmlFormatter =
new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
121 Highlighter highlighter =
new Highlighter(simpleHtmlFormatter,
new QueryScorer(query));
122 TokenStream tokenStream1 = analyzer.tokenStream("text",
new StringReader(doc.get(field)));
123 String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field));
124
125
return highlighterStr ==
null ? doc.get(field):highlighterStr;
126 }
catch (IOException e) {
127
//
TODO Auto-generated catch block
128
e.printStackTrace();
129 }
catch (InvalidTokenOffsetsException e) {
130
//
TODO Auto-generated catch block
131
e.printStackTrace();
132 }
133
return
null;
134 }