文章来源:http://my.oschina.net/MrMichael/blog/261242
①拼写检查不同于其他域,它在建立索引时需要分词,但是检索时不需要分词,所以要建立一个特殊的域,以供拼写检查:
在schema.xml文件里设置所需的拼写检查域都有哪些字段:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
<
field
name
=
"spell"
type
=
"text_spell"
indexed
=
"true"
stored
=
"false"
multiValued
=
"true"
/>
<
copyField
source
=
"name"
dest
=
"spell"
/>
<
copyField
source
=
"content"
dest
=
"spell"
/>
<
fieldType
name
=
"text_spell"
class
=
"solr.TextField"
positionIncrementGap
=
"100"
>
<
analyzer
type
=
"index"
>
<
tokenizer
class
=
"org.wltea.analyzer.lucene.IKTokenizerFactory"
useSmart
=
"false"
conf
=
"ik.conf"
/>
<
filter
class
=
"solr.StopFilterFactory"
ignoreCase
=
"true"
words
=
"stopwords.txt"
enablePositionIncrements
=
"true"
/>
</
analyzer
>
<
analyzer
type
=
"query"
>
<
tokenizer
class
=
"solr.WhitespaceTokenizerFactory"
/>
</
analyzer
>
</
fieldType
>
|
②在solrconfig.xml文件里设置:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
|
<
searchComponent
name
=
"spellerror"
class
=
"solr.SpellCheckComponent"
>
<
str
name
=
"queryAnalyzerFieldType"
>text_spell</
str
>
<!-- a spellchecker built from a field of the main index -->
<
lst
name
=
"spellchecker"
>
<
str
name
=
"name"
>default</
str
>
<
str
name
=
"field"
>spell</
str
>
<
str
name
=
"classname"
>solr.DirectSolrSpellChecker</
str
>
<
str
name
=
"distanceMeasure"
>internal</
str
>
<
float
name
=
"accuracy"
>0.5</
float
>
<
int
name
=
"maxEdits"
>2</
int
>
<
int
name
=
"minPrefix"
>1</
int
>
<
int
name
=
"maxInspections"
>5</
int
>
<
int
name
=
"minQueryLength"
>2</
int
>
<
float
name
=
"maxQueryFrequency"
>0.01</
float
>
</
lst
>
<
lst
name
=
"spellchecker"
>
<
str
name
=
"classname"
>solr.FileBasedSpellChecker</
str
>
<
str
name
=
"name"
>file</
str
>
<
str
name
=
"sourceLocation"
>spellings.txt</
str
>
<
str
name
=
"characterEncoding"
>UTF-8</
str
>
<
str
name
=
"spellcheckIndexDir"
>spellcheckerFile</
str
>
</
lst
>
</
searchComponent
>
<
requestHandler
name
=
"/spell"
class
=
"solr.SearchHandler"
startup
=
"lazy"
>
<
lst
name
=
"defaults"
>
<
str
name
=
"df"
>spell</
str
>
<
str
name
=
"spellcheck.dictionary"
>default</
str
>
<
str
name
=
"spellcheck"
>on</
str
>
<!-- <str name="spellcheck.extendedResults">true</str>
<str name="spellcheck.count">10</str>
<str name="spellcheck.alternativeTermCount">5</str>
<str name="spellcheck.maxResultsForSuggest">5</str> -->
<
str
name
=
"spellcheck.collate"
>true</
str
>
<
str
name
=
"spellcheck.collateExtendedResults"
>true</
str
>
<!-- <str name="spellcheck.maxCollationTries">10</str>
<str name="spellcheck.maxCollations">5</str> -->
</
lst
>
<
arr
name
=
"last-components"
>
<
str
>spellerror</
str
>
</
arr
>
</
requestHandler
>
|
③solrj里的代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
/**
* @method: testSpellCheck
* @Description: 拼写检查
*
* @author: ChenYW
* @date 2014-4-15 下午06:14:56
*/
public
String spellCheck(String word){
SolrQuery query =
new
SolrQuery();
query.set(
"defType"
,
"edismax"
);
//加权
query.set(
"qf"
,
"name^20.0"
);
query.set(
"spellcheck"
,
"true"
);
query.set(
"spellcheck.q"
, word);
query.set(
"qt"
,
"/spell"
);
query.set(
"spellcheck.build"
,
"true"
);
//遇到新的检查词,会自动添加到索引里面
query.set(
"spellcheck.count"
,
5
);
try
{
QueryResponse rsp = server.query(query);
SpellCheckResponse re=rsp.getSpellCheckResponse();
if
(re !=
null
) {
if
(!re.isCorrectlySpelled()){
String t = re.getFirstSuggestion(word);
//获取第一个推荐词
System.out.println(
"推荐词:"
+ t);
return
t;
}
}
}
catch
(SolrServerException e) {
e.printStackTrace();
}
return
null
;
}
|