package case1; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.DateField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; public class IndexFiles { /** * @param args */ public static void main(String[] args) { // TODO 自动生成方法存根 File indexDir = new File("C:\\luceneIndex"); try { IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true); File files=new File("D:\\chenzk\\lucene");//要对这个文件目录下的所有文件建立索引,保存在indexDir中 String [] Fnamelist=files.list(); for(int i=0;i<Fnamelist.length;i++) { File file=new File(files,Fnamelist[i]); Document doc=new Document(); Field fld=Field.Text("path",file.getPath()); doc.add(fld); fld=Field.Keyword("modified",DateField.timeToString(file.lastModified())); doc.add(fld); //下面很重要要将查询的东西保存索引文件中去 文件内容 FileInputStream in = new FileInputStream(file); Reader reader = new BufferedReader(new InputStreamReader(in)); fld = Field.Text("contents", reader); doc.add(fld); writer.addDocument(doc); System.out.println("Added:"+doc.get("path")); //索引中有3个字段 文件路径,最后修订时间,文件内容 } writer.optimize(); writer.close(); System.out.println(" Has Added Total "+Fnamelist.length); } catch (IOException e) { // TODO 自动生成 catch 块 e.printStackTrace(); } } }
以上对文件目录建立索引的过程
package case1; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Searcher; public class SearchFile { /** * @param args */ public static void main(String[] args) { // TODO 自动生成方法存根 Analyzer anlzr=new StandardAnalyzer(); try { Query q=QueryParser.parse("java","contents",anlzr); System.out.println(" Searching for :"+q.toString("contents")) ; Searcher serch=new IndexSearcher("C:\\luceneIndex");//索引所在的目录 Hits hts=serch.search(q); for(int i=0;i<hts.length();i++) { Document doc=hts.doc(i); String path=doc.get("path"); System.out.println("Find "+i+":" +path); System.out.println("Find :"+doc.get("modified")); System.out.println("Find: " + doc.get("contents")); } System.out.println("Find Total: " + hts.length()); } catch (ParseException e) { // TODO 自动生成 catch 块 e.printStackTrace(); } catch (IOException e) { // TODO 自动生成 catch 块 e.printStackTrace(); } } }
以上是对索引中关键字查询的过程
下面是将索引放在内存中的案例:
package case2; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.document.DateField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Searcher; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; public class IndexTest { /** * @param args */ public static void main(String[] args) { // TODO 自动生成方法存根 try { Directory directory = new RAMDirectory();// 将索引放在内存中 Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true); String[] docs = { "a b c d e", "a b c d e a b c d e", "a b c d e f g h i j", "a c e", "a c e a c e", "a c e a b c" }; for (int j = 0; j < docs.length; j++) { Document d = new Document(); d.add(Field.Text("contents", docs[j])); writer.addDocument(d); } writer.close(); // 以上是生成索引 Searcher searcher = new IndexSearcher(directory); String[] queries = { "a ", }; Hits hits = null; QueryParser parser = new QueryParser("contents", analyzer); parser.setPhraseSlop(0); for (int j = 0; j < queries.length; j++) { Query query = parser.parse(queries[j]); System.out.println("Query :" + query.toString("contents")); hits = searcher.search(query); // 以上是初始化查询,分析编号1,2 System.out.println(hits.length() + " total results"); for (int i = 0; i < hits.length() && i < 10; i++) { Document d = hits.doc(i); System.out.println(i + " " + hits.score(i) + " " + " " + d.get("contents")); } } searcher.close(); } catch (IOException e) { // TODO 自动生成 catch 块 e.printStackTrace(); } catch (ParseException e) { // TODO 自动生成 catch 块 e.printStackTrace(); } } }