1. 下载 lucene-2.9.1 和 ictclas4j
修改ictclas4j里的丢字的BUG
2. 代码
package com.lucene.tools;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.FSDirectory;
import org.ictclas4j.bean.SegResult;
import org.ictclas4j.segment.SegTag;
/**
* 分词工具类
* 使用Lucene和ictclas4j.jar
*
* @author Administrator
*
*/
public class LuceneAnalyzer {
// 建立索引的路径
public static final String path = "G://index2";
/**
* 除去字符串中最后的reg字符
*
* @param str
* @return
*/
public static String getDelReg(String str, String reg) {
String temp = "";
String[] strs = str.split(reg);
for (int i = 0; i < strs.length; i++) {
if (!"".equals(strs[i]) || strs[i].length() > 0) {
temp = temp + strs[i] + reg;
}
}
if (temp.length() > 0) {
temp = temp.substring(0, temp.length() - 1);
}
return temp;
}
/**
* 分词处理
*
* @param str
* @return
*/
public void _LuceneHandle(String indexName, String str){
try{
if(""!=str){
int len = str.length();
SegTag st = new SegTag(1);
SegResult strs = st.split(str);
String[] temp = strs.getFinalResult().split(" ");
String[] _temp = new String[2];
IndexWriter writer = new IndexWriter(FSDirectory.getDirectory(path, true), new StandardAnalyzer(), true);;
for(int i=0;i<temp.length;i++){
for(int j=0;j<2;j++){
_temp = temp[i].split("/");
}
_LuceneWinteIndex(indexName, _temp[0], len, writer);
}
writer.close();
}
else{
System.out.println("分词字符串不能为空");
}
}
catch(Exception e){
e.printStackTrace();
}
}
/**
* 创建索引
*
* @param str
* @return
*/
public void _LuceneWinteIndex(String indexName, String str, int len, IndexWriter writer) {
try{
if(""!=indexName && ""!=str){
Document doc = new Document();
doc.add(new Field(indexName, str, Field.Store.YES, Field.Index.TOKENIZED));
writer.setMaxFieldLength(len);
writer.addDocument(doc);
}
else{
System.out.println("索引名称和检索字符串不能为空");
}
}
catch(Exception e){
e.printStackTrace();
}
}
/**
* 检索
*
* @param name
*/
public int _LuceneSearchIndex(String name, String keyword){
int result = 0;
try{
IndexSearcher searcher = new IndexSearcher(path);
Hits hits = null;
Query query = null;
QueryParser qp = new QueryParser(name, new StandardAnalyzer());
query = qp.parse(keyword);
hits = searcher.search(query);
result = hits.length();
}
catch(Exception e){
e.printStackTrace();
}
return result;
}
}