文章来源:http://my.oschina.net/MrMichael/blog/261231
拼音检索的大致思路是这样的:
①将需要使用拼音检索的字段汇集到一个拼音分词字段里(我的拼音分词字段使用pinyin4j+NGram做的);
加入两个jar包:pinyin4j-2.5.0.jar、pinyinAnalyzer.jar;
schema.xml文件设置:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
<
field
name
=
"pinyin"
type ="text_pinyin" indexed ="true" stored ="false" multiValued ="true"/>
<
copyField
source
=
"name"
dest
=
"pinyin"
/>
<
copyField
source
=
"author"
dest
=
"pinyin"
/>
<
copyField
source
=
"region"
dest
=
"pinyin"
/>
<
copyField
source
=
"theme"
dest
=
"pinyin"
/>
<!-- by michael: pinyin -->
<
fieldType
name
=
"text_pinyin"
class
=
"solr.TextField"
positionIncrementGap
=
"0"
>
<
analyzer
type
=
"index"
>
<
tokenizer
class
=
"org.apache.lucene.analysis.cn.smart.SmartChineseSentenceTokenizerFactory"
/>
<
filter
class
=
"org.apache.lucene.analysis.cn.smart.SmartChineseWordTokenFilterFactory"
/>
<
filter
class
=
"com.shentong.search.analyzers.PinyinTransformTokenFilterFactory"
minTermLenght
=
"2"
/>
<
filter
class
=
"com.shentong.search.analyzers.PinyinNGramTokenFilterFactory"
minGram
=
"6"
maxGram
=
"20"
/>
</
analyzer
>
<
analyzer
type
=
"query"
>
<
tokenizer
class
=
"org.apache.lucene.analysis.cn.smart.SmartChineseSentenceTokenizerFactory"
/>
<
filter
class
=
"org.apache.lucene.analysis.cn.smart.SmartChineseWordTokenFilterFactory"
/>
<
filter
class
=
"com.shentong.search.analyzers.PinyinTransformTokenFilterFactory"
minTermLenght
=
"2"
/>
<
filter
class
=
"com.shentong.search.analyzers.PinyinNGramTokenFilterFactory"
minGram
=
"6"
maxGram
=
"20"
/>
</
analyzer
>
</
fieldType
>
|
②使用solrj进行拼音检索:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
|
/**
* @method: testPhoneticize
* @Description: 拼音检索
* ??? 也会把不符合条件的检索出来
* (解决方法:把汉子和相应的拼音建立同义词)
* @return void
*
* @author: ChenYW
* @date 2014-4-16 下午01:44:57
*/
public
List<Map<String, String>> phoneticize(String pinyin){
try
{
List<Map<String, String>> list =
new
ArrayList<Map<String, String>>();
SolrQuery query =
new
SolrQuery();
query.set(
"q"
,
"pinyin:"
+pinyin);
//高亮查询字段
QueryResponse qr=server.query(query);
//执行查询
SolrDocumentList dlist=qr.getResults();
System.out.println(
"总数:"
+dlist.getNumFound());
Map<String, String> mapRe =
null
;
for
(SolrDocument sd:dlist){
mapRe =
new
HashMap<String, String>();
mapRe.put(
"name"
, sd.getFieldValue(
"name"
).toString());
mapRe.put(
"content"
, sd.getFieldValue(
"content"
).toString().substring(
0
,
200
));
list.add(mapRe);
}
return
list;
}
catch
(SolrServerException e) {
e.printStackTrace();
}
return
null
;
}
|