使用lucene构建简易的全文检索系统

1.        在eclipse中安装配置lucene

a)        下载lucene-core-2.3.2.jar

b)        File->new java project->properties->javabuild path->add external jars: lucene-core-2.3.2.jar

c)        为支持中文搜索,和导入lucene-core-2.3.2.jar相同的方式导入je-analysis-1.5.3.jar


在电脑中D盘建立lucene/docs的文件夹,其中包含要被检索的文档集合

在电脑中D盘建立index文件夹,用于存储索引


2.        索引相关的程序

a)        建立索引

package index;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;

import jeasy.analysis.MMAnalyzer;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.LockObtainFailedException;


public class Create {
	
	static String docDirPath = "D://lucene/docs";
	static String indexDirPath = "D://lucene/index"; 
	
	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		File docDir = new File(docDirPath);
		File indexDir = new File(indexDirPath);
	    File[] docFiles  = docDir.listFiles();   

		//StandardAnalyzer analyzer = new StandardAnalyzer();
	    Analyzer analyzer = new MMAnalyzer();
	    
		try {
			IndexWriter writer = new IndexWriter(indexDir,analyzer,true);
			
			for(int i=0; i<docFiles.length; i++){
				if(docFiles[i].isFile() && docFiles[i].getName().endsWith(".txt")){
					 Document document = new Document();   
					 document.add(new Field("path", docFiles[i].getCanonicalPath(), Field.Store.YES, Field.Index.UN_TOKENIZED));   
					 document.add(new Field("contents",new FileReader(docFiles[i])));
					 //设置此参数,表明该文档比其他文档更重要,这时该document的所有字段都被增量,可以对field进行相同的操作以增量某个字段
					 document.setBoost((float)1.50);
					 writer.addDocument(document);
				}
			}
			
			writer.optimize();   
	        writer.close(); 
			
		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

}

b)        删除索引中包含某个term的document

package index;

import java.io.IOException;

import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.StaleReaderException;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.LockObtainFailedException;

public class Delete_index_term {

	static String indexDirPath = "D://lucene/index";
	
	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		
		try {
			//索引中term的删除
			IndexReader reader = IndexReader.open(indexDirPath); 
			//此处标记删除索引中带有“liu”的文档
			reader.deleteDocuments(new Term("contents", "liu"));
			reader.close();
			
			//索引中document的更新:先删除该document,再添加该document
			
		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (StaleReaderException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} 

	}

}


3.        查询相关的程序

a)        Term查询、phrase查询

package search;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;

public class Term_Phrase_Search {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub

		File indexDir = new File("D://lucene/index");
		try {
			if (!indexDir.exists()) {
				System.out.println("The Lucene index is not exist");
				return;
			}
			FSDirectory directory = FSDirectory.getDirectory(indexDir, false);
			IndexSearcher searcher = new IndexSearcher(directory);

			/*// 严格包含词语“北京邮电”
			PhraseQuery query = new PhraseQuery();
			query.add(new Term("contents", "北京"));
			query.add(new Term("contents", "邮电"));*/

			// 查询包含term=“北京”
			String queryStr = "liu"; 
			Term term = new Term("contents",queryStr); 
			TermQuery query = new TermQuery(term);

			Hits hits = searcher.search(query);
			if(hits.length() == 0){
				System.out.println("no result matches");
				return;
			}
			for (int i = 0; i < hits.length(); i++) {
				Document document = hits.doc(i);
				System.out.println("File: " + document.get("path"));
			}

		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
}

b)        Boolean查询

package search;

import java.io.File;
import java.io.IOException;

import jeasy.analysis.MMAnalyzer;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class Boolean_search {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		try {
			File indexDir = new File("D://lucene/index");
			if (!indexDir.exists()) {
				System.out.println("The Lucene index is not exist");
				return;
			}
			Directory dir = FSDirectory.getDirectory(indexDir, false);
			IndexSearcher indexSearcher = new IndexSearcher(dir);

			Analyzer analyzer = new MMAnalyzer();
			// Analyzer analyzer = new StandardAnalyzer();
			QueryParser qp = new QueryParser("contents", analyzer);
			// queryParser默认是求并搜索,此处设置为求交搜索
			qp.setDefaultOperator(QueryParser.AND_OPERATOR);

			Query query;
			String[] searchWords = { "北京 liu" };
			
			for (int i = 0; i < searchWords.length; i++) {
				query = qp.parse(searchWords[i]);
				Hits results = indexSearcher.search(query);
				System.out.println(results.length()
						+ " search results for query " + searchWords[i]);
			}

		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (ParseException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

	}

}




你可能感兴趣的:(String,File,Lucene,search,query,全文检索)