今天不练习lucene2.0.1了练习一下最新版本的lucene3.5
package jim.Lucene35; import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.Version; import tool.FileList; import tool.FileText; public class Lucene35 { /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub new creatIndex(); } } class creatIndex{ Directory directory = null;//用来决定引索目录的储存方式 IndexWriter writer = null;//引索器 Document document = null;//引索文件 Field field = null;//字段信息 IndexWriterConfig iwc = null;//用来选择lucene的版本以及分词器的版本 String indexPath = "index";//引索目录的储存地址 String title = "title";//文件的标题 String content = "content";//文件的内容 String [] files = null;//用来记录文件夹里所有文件的地址 public creatIndex(){//构造器 try { directory = FSDirectory.open(new File(indexPath));//创建directory,其储存方式为在硬盘上储存 } catch (IOException e) { System.out.println("创建Directory时发生错误!"); // TODO Auto-generated catch block e.printStackTrace(); } iwc = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35));//选择lucene的版本以及分词器的版本 try { writer = new IndexWriter(directory,iwc);//创建引索器 } catch (CorruptIndexException e) { // TODO Auto-generated catch block System.out.println("创建IndexWriter时发生错误!"); e.printStackTrace(); } catch (LockObtainFailedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } try { files = FileList.getFiles("testFiles");//记录文件夹里所有文件的地址 } catch (IOException e) { // TODO Auto-generated catch block System.out.println("无法打开存放要搜索的文件的文件夹"); e.printStackTrace(); }//存放要搜索的文件的文件夹 int num = files.length;//记录文件夹里所有文件的总数 for( int i = 0; i < num ; i++ ){ document = new Document();//创建索引文件 File file = new File(files[i]); title = file.getName();//取得文件的名字 field = new Field("title",title,Field.Store.YES,Index.NOT_ANALYZED);//创建索引字段 document.add(field); content = FileText.getText(file); field = new Field("content",content,Field.Store.YES,Index.ANALYZED); document.add(field); String Path = file.getPath();//获取文件的路径 field = new Field("path",Path,Field.Store.YES,Index.NOT_ANALYZED); document.add(field); System.out.println("File: "+title+" Indexed"); try { writer.addDocument(document); } catch (CorruptIndexException e) { // TODO Auto-generated catch block System.out.println("将Document写入IndexWriter时错误!"); e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } try { writer.close();//关闭索引器 } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block System.out.println("IndexWriter关闭时错误!"); e.printStackTrace(); } System.out.println("Index is Created!"); } }
创建引索的步骤:
1.创建Directory
2.创建IndexWriter
3.创建Document
4.为Document添加Field对象
5.将Document添加到IndexWriter中
今天的收获:
能用lucene3.5写一个引索程序.
今天的不足:
对IK分词器摸不着头绪.