Lucene的标准分析器

//Lucene的标准分析器


package analyzer;


import java.io.IOException;
import java.io.Reader;
import java.util.Set;


import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.WordlistLoader;




public class StandardAnalyzer extends Analyzer{


//一个被忽略掉的集合
private Set stopSet;

//一个需要被忽略掉的词的数组
public static final String[] STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS_SET;

//不带参数的构造函数, 使用默认的忽略词集合
public StandardAnalyzer(Set stopWords){
stopSet = stopWords;
}

//构造函数的参数为一个需要被忽略词的数组
public StandardAnalyzer(String[] stopWords){
stopSet = StopFilter.makeStopSet(stopWords);
}

//构造函数,从一个文件装入忽略词的集合
public StandardAnalyzer(Reader stopwords)throws IOException{
stopSet = WordlistLoader.getWordSet(stopwords);
}

//tokenStream方法,返回一个TokenStream对象
//使用管道过滤结构的处理结构
public TokenStream tokenStream(String filename, Reader reader){
TokenStream result = new StandardFilter(result);
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopSet);
return result;
}
}

你可能感兴趣的:(Lucene的标准分析器)