Indexer.java
package com.shosu.chapter1; /** * Copyright Manning Publications Co. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific lan */ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Version; import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.io.FileReader; // From chapter 1 /** * This code was originally written for Erik's Lucene intro java.net article */ public class Indexer { public static void main(String[] args) throws Exception { // 如果在运行程序是 args[] 没有连个参数则报错-->com.shosu.test.IllegalArgumentExceptionTest.java /*if (args.length != 2) { throw new IllegalArgumentException("Usage: java " + Indexer.class.getName() + " <index dir> <data dir>"); }*/ String indexDir = null;//args[0]; // 1 String dataDir = null;//args[1]; // 2 // add indexDir = "d:\\lucene\\index"; dataDir = "d:\\lucene\\resource\\data"; // 开始时间 long start = System.currentTimeMillis(); Indexer indexer = new Indexer(indexDir); int numIndexed; // 被索引文档数 try { numIndexed = indexer.index(dataDir, new TextFilesFilter()); } finally { indexer.close(); } // 结束时间 long end = System.currentTimeMillis(); System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " milliseconds"); } // main end private IndexWriter writer; // 构造函数初始化 public Indexer(String indexDir) throws IOException { // 打开对应索引文件夹 Directory dir = FSDirectory.open(new File(indexDir)); // 写 writer = new IndexWriter(dir, // 3 new StandardAnalyzer( // 3 分词器 Version.LUCENE_30),// 3 true, // 3 IndexWriter.MaxFieldLength.UNLIMITED); // 3 } public void close() throws IOException { writer.close(); // 4 } /* * 参数一: 资源路径 * 参数二: 文件 */ public int index(String dataDir, FileFilter filter) throws Exception { // 得到路径下的所有文件 File[] files = new File(dataDir).listFiles(); for (File f : files) { // isDirectory 如果 f 是目录则返回 true // isHidden 如果 f 是隐藏文件则返回 true // exists 如果 f 存在则返回 true // canRead 如果 f 可读则返回 true // accept 如果 f 是 .txt 文件返回 true if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead() && (filter == null || filter.accept(f))) { indexFile(f); } } // forech end // 被索引文档数 return writer.numDocs(); // 5 } private static class TextFilesFilter implements FileFilter { public boolean accept(File path) { // 将文件名转换成小写,然后判断是否为 .txt 文件 return path.getName().toLowerCase() // 6 .endsWith(".txt"); // 6 } } // 将 File 转换成 Document protected Document getDocument(File f) throws Exception { Document doc = new Document(); // 创建域 如: contents 、 filename 、 fullpath doc.add(new Field("contents", new FileReader(f))); // 7 doc.add(new Field("filename", f.getName(), // 8 Field.Store.YES, Field.Index.NOT_ANALYZED));// 8 储存 不分词 doc.add(new Field("fullpath", f.getCanonicalPath(), // 9 Field.Store.YES, Field.Index.NOT_ANALYZED));// 9 return doc; } /* * 将文件加入索引 */ private void indexFile(File f) throws Exception { // File.getCanonicalPath-->http://blog.csdn.net/lskyne/article/details/9037233 System.out.println("Indexing " + f.getCanonicalPath()); Document doc = getDocument(f); // 将文档对象加入索引 writer.addDocument(doc); // 10 } } /* * #1 Create index in this directory #2 Index *.txt files from this directory #3 * Create Lucene IndexWriter #4 Close IndexWriter #5 Return number of documents * indexed #6 Index .txt files only, using FileFilter #7 Index file content #8 * Index file name #9 Index file full path #10 Add document to Lucene index */
Searcher.java
package com.shosu.chapter1; /** * Copyright Manning Publications Co. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific lan */ import org.apache.lucene.document.Document; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.util.Version; import java.io.File; import java.io.IOException; // From chapter 1 /** * This code was originally written for Erik's Lucene intro java.net article */ public class Searcher { public static void main(String[] args) throws IllegalArgumentException, IOException, ParseException { /*if (args.length != 2) { throw new IllegalArgumentException("Usage: java " + Searcher.class.getName() + " <index dir> <query>"); }*/ String indexDir = null;//args[0]; // 1 String q = null;//args[1]; // 2 // add indexDir = "d:\\lucene\\index"; q = "patent"; search(indexDir, q); } // 搜索 public static void search(String indexDir, String q) throws IOException, ParseException { // 打开索引文件 Directory dir = FSDirectory.open(new File(indexDir)); // 3 IndexSearcher is = new IndexSearcher(dir); // 3 // 解析查询字符--->配置Query QueryParser parser = new QueryParser(Version.LUCENE_30, // 4 "contents", // 4 查询域 new StandardAnalyzer( // 4 分词器 Version.LUCENE_30)); // 4 版本号 Query query = parser.parse(q); // 4 // 查询开始时间 long start = System.currentTimeMillis(); TopDocs hits = is.search(query, 10); // 5 // 查询结束时间 long end = System.currentTimeMillis(); // 记录搜索状态 System.err.println("Found " + hits.totalHits + // 6 " document(s) (in " + (end - start) + // 6 " milliseconds) that matched query '" + // 6 q + "':"); // 6 // 搜索结果 for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); // 7 System.out.println(doc.get("fullpath")); // 8 } is.close(); // 9 } } /* * #1 Parse provided index directory #2 Parse provided query string #3 Open * index #4 Parse query #5 Search index #6 Write search stats #7 Retrieve * matching document #8 Display filename #9 Close IndexSearcher */