Lucene截止目前,最新版本为v7.4.0。它是一个开放源代码的全文检索引擎工具包,但它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎(英文与德文两种西方语言)。
目录:
$ uname -a
Linux ubuntu 4.15.0-32-generic #35-Ubuntu SMP Fri Aug 10 17:58:07 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux
$ java -version
java version “1.8.0_181”
Java(TM) SE Runtime Environment (build 1.8.0_181-b13)
Java HotSpot(TM) 64-Bit Server VM (build 25.181-b13, mixed mode)
补充:对应的安装包名称为jdk-8u181-linux-x64.tar.gz,Ubuntu 18.04.1 LTS,MyEclipse CI 2018.8.0
References:
[1] https://www.genuitec.com/products/myeclipse/download/
[2] http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html
[3] http://www.apache.org/dyn/closer.lua/lucene/java/7.4.0
下载Lucene:http://www.apache.org/dyn/closer.lua/lucene/java/7.4.0
$ sudo apt install git
$ git config --global user.name "qingdujun"
$ git config --global user.email "[email protected]"
$ ssh-keygen -t rsa -C "[email protected]"
Generating public/private rsa key pair.
Enter file in which to save the key (/home/qingdujun/.ssh/id_rsa):
Created directory ‘/home/qingdujun/.ssh’.
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /home/qingdujun/.ssh/id_rsa.
Your public key has been saved in /home/qingdujun/.ssh/id_rsa.pub.
$ vim /home/qingdujun/.ssh/id_rsa.pub
拷贝里面的内容至github.com->settings->SSH and GPG keys->New SSH key
References:
[1] https://www.liaoxuefeng.com/wiki/0013739516305929606dd18361248578c67b8067c8c017b000
[2] https://git-scm.com/book/zh/v1/Git-%E5%9F%BA%E7%A1%80-%E6%89%93%E6%A0%87%E7%AD%BE
[3] http://www.ruanyifeng.com/blog/2014/06/git_remote.html
git clone https://github.com/qingdujun/LuceneRes/releases/tag/v0.1
~/Documents/LuceneRes/Index$ ls
_0.cfe _0.cfs _0.si segments_2 write.lock
Console
Indexing /home/qingdujun/Documents/LuceneRes/Data/LuceneConstants.java
Indexing /home/qingdujun/Documents/LuceneRes/Data/Searcher.java
Indexing /home/qingdujun/Documents/LuceneRes/Data/LuceneTester.java
Indexing /home/qingdujun/Documents/LuceneRes/Data/TextFileFilter.java
Indexing /home/qingdujun/Documents/LuceneRes/Data/Indexer.java
5 File indexed, time taken: 128 ms
3 documents found. Time: 40 ms
File: /home/qingdujun/Documents/LuceneRes/Data/Indexer.java
File: /home/qingdujun/Documents/LuceneRes/Data/Searcher.java
File: /home/qingdujun/Documents/LuceneRes/Data/LuceneTester.java
package com.cfs.lucene;
import java.io.File;
import java.io.FileFilter;
public class TextFileFilter implements FileFilter{
public static final String TEXT_TYPE = ".java";
@Override
public boolean accept(File pathname) {
return pathname.getName().toLowerCase().endsWith(TEXT_TYPE);
}
}
package com.cfs.lucene;
public class LuceneConstants {
public static final String CONTENTS = "contents";
public static final String FILE_NAME = "filename";
public static final String FILE_PATH = "filepath";
public static final int MAX_SEARCH = 10;
}
package com.cfs.lucene;
import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class Indexer {
private IndexWriter indexWriter = null;
/**
* Construct index
* @param directoryPath The directory where the index is located
* @throws IOException
*/
public Indexer(String directoryPath) throws IOException{
Directory directory = FSDirectory.open(Paths.get(directoryPath));
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
indexWriter = new IndexWriter (directory, indexWriterConfig);
}
/**
* Close write index
* @throws IOException
*/
public void close() throws IOException{
indexWriter.close();
}
/**
* Convert a file to an index object(document)
* @param file
* @return
* @throws IOException
*/
private Document getDocument(File file) throws IOException{
Document document = new Document();
//TextField, A field that is indexed and tokenized
Field contentsField = new TextField(LuceneConstants.CONTENTS, new FileReader(file));
document.add(contentsField);
//StringField, A field that is indexed but not tokenized
Field fileNameField = new StringField(LuceneConstants.FILE_NAME, file.getName(),Store.YES);
document.add(fileNameField);
Field filePathField = new StringField(LuceneConstants.FILE_PATH, file.getCanonicalPath(), Store.YES);
document.add(filePathField);
return document;
}
/**
* Index file
* @param file
* @throws IOException
*/
private void indexFile(File file) throws IOException{
System.out.println("Indexing "+file.getCanonicalPath());
Document document = getDocument(file);
indexWriter.addDocument(document);
}
/**
* Index all files
* @param dataDirPath Indexed data storage directory
* @param fileFilter
* @return
* @throws IOException
*/
public int createIndex(String dataDirPath, FileFilter fileFilter) throws IOException{
File[] files = new File(dataDirPath).listFiles();
for (File file : files) {
if (!file.isDirectory() && !file.isHidden() && file.exists()
&& file.canRead() && fileFilter.accept(file)) {
indexFile(file);
}
}
return indexWriter.numDocs();
}
/**
* Read the entire contents of the file and return the bytes
* @param file
* @return
* @throws IOException
*/
// private byte[] read2Bytes(File file) throws IOException {
// Long fileLength = file.length();
// byte[] contents = new byte[fileLength.intValue()];
// FileInputStream fileInputStream = new FileInputStream(file);
// fileInputStream.read(contents);
// fileInputStream.close();
// return contents;
// }
//
package com.cfs.lucene;
import java.io.IOException;
import java.nio.file.Paths;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class Searcher {
IndexSearcher indexSearcher = null;
QueryParser queryParser = null;
Query query = null;
Directory directory = null;
DirectoryReader directoryReader = null;
public Searcher(String directoryPath) throws IOException{
//Store the index in memory
//Directory directory = new RAMDirectory();
//Store an index on disk
directory = FSDirectory.open(Paths.get(directoryPath));
directoryReader = DirectoryReader.open(directory);
indexSearcher = new IndexSearcher(directoryReader);
queryParser = new QueryParser(LuceneConstants.CONTENTS, new StandardAnalyzer());
}
public TopDocs search(String searchQuery) throws ParseException, IOException {
query = queryParser.parse(searchQuery);
return indexSearcher.search(query, LuceneConstants.MAX_SEARCH);
}
public Document getDocument(ScoreDoc scoreDoc) throws IOException {
return indexSearcher.doc(scoreDoc.doc);
}
public void close() throws IOException {
directoryReader.close();
directory.close();
}
}
package com.cfs.lucene;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
public class LuceneTester {
/*
*basePath Need to be modified to your root dorectory
*/
public static final String basePath = "/home/qingdujun/Documents/LuceneRes/";
public static final String indexDir = basePath+"Index";
public static final String dataDir = basePath+"Data";
private Indexer indexer = null;
private Searcher searcher = null;
/**
* Test the function of create index
* @throws IOException
*/
private void createIndex() throws IOException {
indexer = new Indexer(indexDir);
int numIndexed = 0;
long startTime = System.currentTimeMillis();
numIndexed = indexer.createIndex(dataDir, new TextFileFilter());
long endTime = System.currentTimeMillis();
indexer.close();
System.out.println(numIndexed+" File indexed, time taken: "+(endTime-startTime)+" ms");
}
/**
* Test the function of search result
* @param searchQuery What you want to search for
* @throws IOException
* @throws ParseException
*/
private void search(String searchQuery) throws IOException, ParseException {
searcher = new Searcher(indexDir);
long startTime = System.currentTimeMillis();
TopDocs hits = searcher.search(searchQuery);
long endTime = System.currentTimeMillis();
System.out.println(hits.totalHits+" documents found. Time: "+(endTime-startTime)+" ms");
for (ScoreDoc scoreDoc : hits.scoreDocs) {
Document document = searcher.getDocument(scoreDoc);
System.out.println("File: "+document.get(LuceneConstants.FILE_PATH));
}
searcher.close();
}
public static void main(String[] args) {
LuceneTester luceneTester = new LuceneTester();
try {
luceneTester.createIndex();
luceneTester.search("throws IOException");
} catch (Exception e) {
e.printStackTrace();
}
}
}