lucece 对txt建立索引并且查询的案例

package case1;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

public class IndexFiles {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO 自动生成方法存根
	
		File   indexDir = new File("C:\\luceneIndex");
		try {
			IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
			File files=new File("D:\\chenzk\\lucene");//要对这个文件目录下的所有文件建立索引,保存在indexDir中
			String [] Fnamelist=files.list();
			for(int i=0;i<Fnamelist.length;i++)
			{
				File file=new File(files,Fnamelist[i]);
				
				Document doc=new Document();
				Field fld=Field.Text("path",file.getPath());
				doc.add(fld);
				fld=Field.Keyword("modified",DateField.timeToString(file.lastModified()));
			    doc.add(fld);
				//下面很重要要将查询的东西保存索引文件中去  文件内容
			    FileInputStream in = new FileInputStream(file);
			    Reader reader = new BufferedReader(new InputStreamReader(in));
			    fld = Field.Text("contents", reader);
			    doc.add(fld); 

				writer.addDocument(doc);
				System.out.println("Added:"+doc.get("path"));
				//索引中有3个字段 文件路径,最后修订时间,文件内容
				
			}
		    writer.optimize();
		    writer.close();
		    System.out.println(" Has Added Total "+Fnamelist.length);
		
		
		} catch (IOException e) {
			// TODO 自动生成 catch 块
			e.printStackTrace();
		}
		
		
		
	}

}

  以上对文件目录建立索引的过程

 

package case1;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;

public class SearchFile {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO 自动生成方法存根
		Analyzer  anlzr=new StandardAnalyzer();
     
		try {
			Query q=QueryParser.parse("java","contents",anlzr);
		    System.out.println(" Searching for :"+q.toString("contents")) ;
		    
		    Searcher serch=new IndexSearcher("C:\\luceneIndex");//索引所在的目录
		    Hits hts=serch.search(q);
		    for(int i=0;i<hts.length();i++)
		    {
		    	Document doc=hts.doc(i);
		    	String path=doc.get("path");
		    	System.out.println("Find "+i+":" +path);
		    	System.out.println("Find :"+doc.get("modified"));
		    	System.out.println("Find: " + doc.get("contents"));

		    	
		    }
		    System.out.println("Find Total: " + hts.length());

		
		} catch (ParseException e) {
			// TODO 自动生成 catch 块
			e.printStackTrace();
		} catch (IOException e) {
			// TODO 自动生成 catch 块
			e.printStackTrace();
		}
		
		
		
		
	}

}

 

以上是对索引中关键字查询的过程

 

下面是将索引放在内存中的案例:

package case2;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;

public class IndexTest {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO 自动生成方法存根
		try {
			Directory directory = new RAMDirectory();// 将索引放在内存中
			Analyzer analyzer = new SimpleAnalyzer();

			IndexWriter writer = new IndexWriter(directory, analyzer, true);
			String[] docs = { "a b c d e", "a b c d e a b c d e",
					"a b c d e f g h i j", "a c e", "a c e a c e",
					"a c e a b c" };
			for (int j = 0; j < docs.length; j++) {
				Document d = new Document();
				d.add(Field.Text("contents", docs[j]));
				writer.addDocument(d);
			}
			writer.close();
			// 以上是生成索引
			Searcher searcher = new IndexSearcher(directory);
			String[] queries = { "a ", };
			Hits hits = null;
			QueryParser parser = new QueryParser("contents", analyzer);
			parser.setPhraseSlop(0);

			for (int j = 0; j < queries.length; j++) {
				Query query = parser.parse(queries[j]);
				System.out.println("Query :" + query.toString("contents"));
				hits = searcher.search(query);
				// 以上是初始化查询,分析编号1,2
				System.out.println(hits.length() + " total results");

				for (int i = 0; i < hits.length() && i < 10; i++) {
					Document d = hits.doc(i);

					System.out.println(i + "  " + hits.score(i) + "  " + " "
							+ d.get("contents"));

				}

			}
			searcher.close();
		} catch (IOException e) {
			// TODO 自动生成 catch 块
			e.printStackTrace();
		} catch (ParseException e) {
			// TODO 自动生成 catch 块
			e.printStackTrace();
		}

	}

}

 

 

 

你可能感兴趣的:(apache,C++,c,C#,Lucene)