lucene入门-索引网页

 

package bindex;
import java.io.File;
import tool.FileText;
import java.io.IOException;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
public class FileIndexer {

/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
String indexPath ="indexes";
try {

IndexWriter indexWriter = new IndexWriter(indexPath,new StandardAnalyzer());
Document doc=new Document();
//第一个文档
File f=new File("htmls/hao123.htm");
String name=f.getName();
Field field=new Field("name",name,Field.Store.YES,Field.Index.TOKENIZED);
doc.add(field);
String content=FileText.getText(f);
field=new Field("conent",content,Field.Store.YES,Field.Index.TOKENIZED);
doc.add(field);
String path=f.getPath();
field=new Field("path",path,Field.Store.YES,Field.Index.NO);
doc.add(field);
indexWriter.addDocument(doc);
//第二个文档
f=new File("htmls/home.htm");
name=f.getName();
field=new Field("name",name,Field.Store.YES,Field.Index.TOKENIZED);
doc.add(field);
content=FileText.getText(f);
field=new Field("conent",content,Field.Store.YES,Field.Index.TOKENIZED);
doc.add(field);
path=f.getPath();
field=new Field("path",path,Field.Store.YES,Field.Index.NO);
doc.add(field);
indexWriter.addDocument(doc);
indexWriter.close();

System.out.println("OK!");
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

}

package tool;
import java.io.*;

public class FileText {

/**
* @param args
*/
public static String getText(File f){

StringBuffer sb=new StringBuffer("");
try{
FileReader fr=new FileReader(f);
BufferedReader br=new BufferedReader(fr);
String s=br.readLine();
while(s!=null){
sb.append(s);
s=br.readLine();
}
br.close();
}
catch (Exception e){
sb.append("");
}
return sb.toString();
}
public static String getText(String s){
String t="";
try{
File f=new File(s);
t=getText(f);
}
catch (Exception e){
t="";
}
return t;
}
}

你可能感兴趣的:(apache,F#,Lucene)