package
indexer;
// package ch2.lucenedemo.process;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;
import jeasy.analysis.MMAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import parameters.Param;
import pretreat.FileControl;
public class IndexOnFS implements IIndexTool{
// 成员变量存储创建的索引文件存放的位置
private String INDEX_STORE_PATH = Param.INDEX_STORE_PATH;
// 建立索引的目标文件
private String INDEX_WANTED_PATH = " e:\\ " ;
// 目录数组总数
private int NumOfDir = 0 ;
// 存放根目录下的所有子目录
private ArrayList < String > DirList = new ArrayList < String > ();
// 地址映射
private Directory dir = null ;
private IndexWriter writer;
public IndexOnFS(String path){
try {
dir = FSDirectory.getDirectory(INDEX_STORE_PATH);
} catch (IOException ex) {
Logger.getLogger(IndexOnFS. class .getName()).log(Level.SEVERE, null , ex);
}
INDEX_WANTED_PATH = path;
makeSegments();
searchDirectorys(path);
}
// 建立索引之前遍历所有目录并存放,这是为了迎合IndexWriter的同步机制
public void searchDirectorys(String rootDir){
File rootfile = new File(rootDir);
File[] files = rootfile.listFiles();
if (files != null )
for ( int i = 0 ; i < files.length; i ++ ){
if (files[i].isDirectory()){
DirList.add(files[i].getPath());
searchDirectorys(files[i].getPath());
}
}
}
public void printAllDirectorys(){
for ( int i = 0 ;i < DirList.size();i ++ )
System.out.println(DirList.get(i));
}
public void createIndexs() {
createIndex(INDEX_WANTED_PATH);
for ( int k = 0 ;k < DirList.size();k ++ )
createIndex(DirList.get(k));
}
public Document preIndexWrite(File file){
// 创建一个新的Document
Document doc = new Document();
// 文件名对应的Field
Field field = new Field( " filename " , file.getName(),
Field.Store.YES, Field.Index.TOKENIZED);
doc.add(field);
// 文件内容对应的Filed
field = new Field( " content " , FileControl.fileToString(file), // 转到控制器
Field.Store.NO, Field.Index.TOKENIZED);
doc.add(field);
// 文件路径对应的Filed
field = new Field( " filepath " , file.getPath(),
Field.Store.YES, Field.Index.TOKENIZED);
doc.add(field);
return doc;
}
/* 单目录创建索引 */
public void createIndex(String inputDir) {
try {
/* MMAnalyzer作为分词工具创建一个IndexWriter */
writer = new IndexWriter(dir, new MMAnalyzer(), false ); /* 第一次创建索引时为true */
File filesDir = new File(inputDir);
/* 取得所有需要建立索引的文件数组 */
File[] files = filesDir.listFiles();
/* 遍历数组 */
if (files != null )
for ( int i = 0 ; i < files.length; i ++ ) {
/* 判断是否为文件 */
if (files[i].isFile()){
/* 把Document加入IndexWriter */
writer.addDocument(preIndexWrite(files[i]));
System.out.println( files[i].getPath());
}
}
writer.optimize(); /* 索引优化 */
} catch (Exception e) { e.printStackTrace(); }
finally {
try {writer.close();
} catch (Exception ee){ ee.printStackTrace(); }
}
}
// 初始化空索引库
public void makeSegments(){
if ( new File(INDEX_STORE_PATH).list().length == 0 ){
try {
IndexWriter iw = new IndexWriter(dir, new MMAnalyzer(), true );
writer.addDocument(preIndexWrite( new File(Param.INITFILE_PATH)));
} catch (Exception ex) { ex.printStackTrace(); }
finally {
try {writer.close();
} catch (Exception ee){ ee.printStackTrace(); }
}
}
}
public ArrayList getDirs(){
return this .DirList;
}
public void startIndex() {
makeSegments();
createIndexs();
}
public static void main(String[] args) {
IndexOnFS processor = new IndexOnFS( " e:\\毕业论文 " );
// processor.searchDirectorys("e:\\1");
processor.startIndex();
}
}
// package ch2.lucenedemo.process;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;
import jeasy.analysis.MMAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import parameters.Param;
import pretreat.FileControl;
public class IndexOnFS implements IIndexTool{
// 成员变量存储创建的索引文件存放的位置
private String INDEX_STORE_PATH = Param.INDEX_STORE_PATH;
// 建立索引的目标文件
private String INDEX_WANTED_PATH = " e:\\ " ;
// 目录数组总数
private int NumOfDir = 0 ;
// 存放根目录下的所有子目录
private ArrayList < String > DirList = new ArrayList < String > ();
// 地址映射
private Directory dir = null ;
private IndexWriter writer;
public IndexOnFS(String path){
try {
dir = FSDirectory.getDirectory(INDEX_STORE_PATH);
} catch (IOException ex) {
Logger.getLogger(IndexOnFS. class .getName()).log(Level.SEVERE, null , ex);
}
INDEX_WANTED_PATH = path;
makeSegments();
searchDirectorys(path);
}
// 建立索引之前遍历所有目录并存放,这是为了迎合IndexWriter的同步机制
public void searchDirectorys(String rootDir){
File rootfile = new File(rootDir);
File[] files = rootfile.listFiles();
if (files != null )
for ( int i = 0 ; i < files.length; i ++ ){
if (files[i].isDirectory()){
DirList.add(files[i].getPath());
searchDirectorys(files[i].getPath());
}
}
}
public void printAllDirectorys(){
for ( int i = 0 ;i < DirList.size();i ++ )
System.out.println(DirList.get(i));
}
public void createIndexs() {
createIndex(INDEX_WANTED_PATH);
for ( int k = 0 ;k < DirList.size();k ++ )
createIndex(DirList.get(k));
}
public Document preIndexWrite(File file){
// 创建一个新的Document
Document doc = new Document();
// 文件名对应的Field
Field field = new Field( " filename " , file.getName(),
Field.Store.YES, Field.Index.TOKENIZED);
doc.add(field);
// 文件内容对应的Filed
field = new Field( " content " , FileControl.fileToString(file), // 转到控制器
Field.Store.NO, Field.Index.TOKENIZED);
doc.add(field);
// 文件路径对应的Filed
field = new Field( " filepath " , file.getPath(),
Field.Store.YES, Field.Index.TOKENIZED);
doc.add(field);
return doc;
}
/* 单目录创建索引 */
public void createIndex(String inputDir) {
try {
/* MMAnalyzer作为分词工具创建一个IndexWriter */
writer = new IndexWriter(dir, new MMAnalyzer(), false ); /* 第一次创建索引时为true */
File filesDir = new File(inputDir);
/* 取得所有需要建立索引的文件数组 */
File[] files = filesDir.listFiles();
/* 遍历数组 */
if (files != null )
for ( int i = 0 ; i < files.length; i ++ ) {
/* 判断是否为文件 */
if (files[i].isFile()){
/* 把Document加入IndexWriter */
writer.addDocument(preIndexWrite(files[i]));
System.out.println( files[i].getPath());
}
}
writer.optimize(); /* 索引优化 */
} catch (Exception e) { e.printStackTrace(); }
finally {
try {writer.close();
} catch (Exception ee){ ee.printStackTrace(); }
}
}
// 初始化空索引库
public void makeSegments(){
if ( new File(INDEX_STORE_PATH).list().length == 0 ){
try {
IndexWriter iw = new IndexWriter(dir, new MMAnalyzer(), true );
writer.addDocument(preIndexWrite( new File(Param.INITFILE_PATH)));
} catch (Exception ex) { ex.printStackTrace(); }
finally {
try {writer.close();
} catch (Exception ee){ ee.printStackTrace(); }
}
}
}
public ArrayList getDirs(){
return this .DirList;
}
public void startIndex() {
makeSegments();
createIndexs();
}
public static void main(String[] args) {
IndexOnFS processor = new IndexOnFS( " e:\\毕业论文 " );
// processor.searchDirectorys("e:\\1");
processor.startIndex();
}
}