Lucene分词器测试

1.代码

package com.ccy.lucene;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

/**
 * 
 * <p> 
 * Title: AnalyzerTest.java 
 * Package com.ccy.lucene 
 * </p>
 * <p>
 * Description: 分词器测试
 * <p>
 * @author Tom.Cai
 * @created 2015-11-2 下午10:02:09 
 * @version V1.0 
 *
 */
public class AnalyzerTest {
	
	String text ="Adds room a document to this room index. If the room document contains room  more than setMaxFieldLength(int) terms for a given field, the remainder are discarded.room";
	
	String zhText ="我是中国人";
	
	Analyzer analyzer = new StandardAnalyzer(); //单字分词 适用于英文
	Analyzer ckhAnalyzer = new CJKAnalyzer(); // 二分法分词
	Analyzer smartAnalyzer = new SmartChineseAnalyzer(); //lucene官方提供的中文词典分词器,开箱即用!  文档警告:这个API是实验性的和不兼容的方式在下一版本中可能会改变。
	//国人所做词典分词器,支持中英文,数字混合分词  http://www.oschina.net/p/ikanalyzer
	Analyzer IKAnalyzer = new IKAnalyzer(); //()暂不支持最新版lucene5.3.1
	
	
	/**
	 * StandardAnalyzer
	 * @throws IOException
	 */
	
	
	@Test
	public void enAnalyzer() throws IOException{
		 TokenStream ts = analyzer.tokenStream("content", text);  
         CharTermAttribute ch = ts.addAttribute(CharTermAttribute.class);  
         ts.reset();  
         while (ts.incrementToken()) {  
             System.out.println(ch.toString());  
         }  
         ts.end();  
         ts.close();
	}
	
	/**
	 * CJKAnalyzer
	 * @throws IOException
	 */
	@Test
	public void zhCJKAnalyzer() throws IOException{
		 TokenStream ts = ckhAnalyzer.tokenStream("content", zhText);  
         CharTermAttribute ch = ts.addAttribute(CharTermAttribute.class);  
         ts.reset();  
         while (ts.incrementToken()) {  
             System.out.println(ch.toString());  
         }  
         ts.end();  
         ts.close();
	}
	
	/**
	 * SmartChineseAnalyzer
	 * @throws IOException
	 */
	@Test
	public void zhSMARTAnalyzer() throws IOException{
		 TokenStream ts = smartAnalyzer.tokenStream("content", zhText);  
         CharTermAttribute ch = ts.addAttribute(CharTermAttribute.class);  
         ts.reset();   
         while (ts.incrementToken()) {  
             System.out.println(ch.toString());  
         }  
         ts.end();  
         ts.close();
	}
	
	
	/**
	 * IKAnalyzer
	 * @throws IOException
	 */
	@Test
	public void zhIKAnalyzer() throws IOException{
		 TokenStream ts = IKAnalyzer.tokenStream("content", zhText);  
         CharTermAttribute ch = ts.addAttribute(CharTermAttribute.class);  
         ts.reset();   
         while (ts.incrementToken()) {  
             System.out.println(ch.toString());  
         }  
         ts.end();  
         ts.close();
	}
	
	
	
	
	
}

2.其他

我的CSDN博客地址:  http://blog.csdn.net/caicongyang 


你可能感兴趣的:(Lucene,IKAnalyzer,analyzer,analyzer,Lucene分词器)