依赖
org.apache.lucene
lucene-core
4.7.2
org.apache.lucene
lucene-queryparser
4.7.2
org.apache.lucene
lucene-analyzers-common
4.7.2
org.apache.lucene
lucene-highlighter
4.7.2
建立索引
本次增加了Float、Int类型的域
package top.yuyufeng.learn.lucene.demo2;
/**
* @author yuyufeng
* @date 2017/11/21
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.IOException;
/**
* @author yuyufeng
* @date 2017/11/21
*/
public class LuceneIndexDemo {
public static void main(String[] args) {
// 建立5条索引
String content = "10月11日杭州云栖大会上,马云表达了对新建成的阿里巴巴全球研究院—阿里巴巴达摩院的愿景,希望达摩院二十年内成为世界第一大经济体,服务世界二十亿人,创造一亿个工作岗位。";
Long createTime = System.currentTimeMillis();
String id = createTime + "";
int readCount =10;
float score =9.5f;
index(content, createTime, id, readCount, score);
content = "中国互联网界,阿里巴巴被认为是技术实力最弱的公司。我确实不懂技术,承认不懂技术不丢人,不懂装懂才丢人。";
createTime = System.currentTimeMillis();
id = createTime + "";
readCount =3;
score =9.7f;
index(content, createTime, id, readCount, score);
content = "阿里巴巴未来二十年的目标是打造世界第五大经济体,不是我们狂妄,而是世界需要这么一个经济体,也一定会有这么一个经济体。";
createTime = System.currentTimeMillis();
id = createTime + "";
readCount =69;
score =5.6f;
index(content, createTime, id, readCount, score);
content = "达摩院一定也必须要超越英特尔,必须超越微软,必须超越IBM,因为我们生于二十一世纪,我们是有机会后发优势的。";
createTime = System.currentTimeMillis();
id = createTime + "";
readCount =38;
score =4.7f;
index(content, createTime, id, readCount, score);
content = "阿里巴巴有很多争议,似乎无处不在,我还真想不出有什么东西是我们不做的。互联网是一种思想,是一种技术革命,不应该有界限。跨界乐趣无穷。我觉得阿里巴巴的跨界还不错";
createTime = System.currentTimeMillis();
id = createTime + "";
readCount =73;
score =1.7f;
index(content, createTime, id, readCount, score);
}
private static void index(String content, Long createTime, String id, int readCount, float score) {
// 实例化IKAnalyzer分词器
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
Directory directory = null;
IndexWriter iwriter;
try {
// 索引目录
directory = new SimpleFSDirectory(new File("D://test/lucene_index_blog"));
// 配置IndexWriterConfig
IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_47, analyzer);
iwConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
iwriter = new IndexWriter(directory, iwConfig);
// 写入索引
Document doc = new Document();
doc.add(new StringField("ID", id, Field.Store.YES));
doc.add(new TextField("content", content, Field.Store.YES));
doc.add(new LongField("createTime", createTime, Field.Store.YES));
doc.add(new IntField("readCount", readCount, Field.Store.YES));
doc.add(new FloatField("score", score, Field.Store.YES));
iwriter.addDocument(doc);
iwriter.close();
System.out.println("建立索引成功:" + id);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (directory != null) {
try {
directory.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
搜索排序
package top.yuyufeng.learn.lucene.demo2;
/**
* @author yuyufeng
* @date 2017/11/21
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.IOException;
/**
* created by yuyufeng on 2017/11/13.
*/
public class LuceneSearchDemo {
public static void main(String[] args) {
String content = "content";
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
Directory directory = null;
IndexReader ireader = null;
IndexSearcher isearcher;
try {
//索引目录
directory = new SimpleFSDirectory(new File("D://test/lucene_index_blog"));
// 配置IndexWriterConfig
IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_47, analyzer);
iwConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
// 实例化搜索器
ireader = DirectoryReader.open(directory);
isearcher = new IndexSearcher(ireader);
//查询所有
Query query = new MatchAllDocsQuery();
System.out.println("Query = " + query);
// 排序的关键地方
SortField sortField = new SortField("score",SortField.Type.FLOAT,true);
Sort sort = new Sort(sortField);
TopDocs topDocs = isearcher.search(query, 5,sort);
System.out.println("命中:" + topDocs.totalHits);
// 遍历输出结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (int i = 0; i < topDocs.totalHits; i++) {
Document targetDoc = isearcher.doc(scoreDocs[i].doc);
System.out.println("内容:" + targetDoc.toString());
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (ireader != null) {
try {
ireader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (directory != null) {
try {
directory.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
范围搜索
查询score范围在1~5之间的文档,对于上面的代码中的Query进行改造
Query query = NumericRangeQuery.newFloatRange("score",1f,5f,true,true);
对搜索结果的分页
package top.yuyufeng.learn.lucene.demo2;
/**
* @author yuyufeng
* @date 2017/11/21
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.IOException;
/**
* created by yuyufeng on 2017/11/13.
*/
public class LuceneSearchDemo {
public static void main(String[] args) {
page(2, 3);
}
private static void page(int page, int size) {
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
Directory directory = null;
IndexReader ireader = null;
IndexSearcher isearcher;
try {
//索引目录
directory = new SimpleFSDirectory(new File("D://test/lucene_index_blog"));
// 配置IndexWriterConfig
IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_47, analyzer);
iwConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
// 实例化搜索器
ireader = DirectoryReader.open(directory);
isearcher = new IndexSearcher(ireader);
//查询所有
Query query = new MatchAllDocsQuery();
TopDocs topDocs = isearcher.search(query, 100);
int total = topDocs.totalHits;
System.out.println("命中:" + topDocs.totalHits);
// 遍历输出结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (int i = (page - 1) * size; i < ((page - 1) * size + size > total ? total : (page - 1) * size + size); i++) {
Document targetDoc = isearcher.doc(scoreDocs[i].doc);
System.out.println("内容:" + targetDoc.toString());
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (ireader != null) {
try {
ireader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (directory != null) {
try {
directory.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
多条件查询
package top.yuyufeng.learn.lucene.demo2;
/**
* @author yuyufeng
* @date 2017/11/21
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.IOException;
/**
* created by yuyufeng on 2017/11/13.
*/
public class LuceneSearchDemo {
public static void main(String[] args) {
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
Directory directory = null;
IndexReader ireader = null;
IndexSearcher isearcher;
try {
//索引目录
directory = new SimpleFSDirectory(new File("D://test/lucene_index_blog"));
// 配置IndexWriterConfig
IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_47, analyzer);
iwConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
// 实例化搜索器
ireader = DirectoryReader.open(directory);
isearcher = new IndexSearcher(ireader);
BooleanQuery booleanQuery = new BooleanQuery();
String keyword = "达摩院";
// 条件一
QueryParser qp = new QueryParser(Version.LUCENE_47, "content", analyzer);
Query query = qp.parse(keyword);
booleanQuery.add(query,BooleanClause.Occur.MUST);
//条件二
query = NumericRangeQuery.newFloatRange("score",1f,5f,true,true);
booleanQuery.add(query,BooleanClause.Occur.MUST);
TopDocs topDocs = isearcher.search(booleanQuery,100);
System.out.println("命中:" + topDocs.totalHits);
// 遍历输出结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (int i = 0; i < topDocs.totalHits; i++) {
Document targetDoc = isearcher.doc(scoreDocs[i].doc);
System.out.println("内容:" + targetDoc.toString());
}
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
} finally {
if (ireader != null) {
try {
ireader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (directory != null) {
try {
directory.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
BooleanClause.Occur
| 组合方式| 结果 |
| ------------- |-------------|
|MUST和MUST | 取得连个查询子句的交集。|
|MUST和MUST_NOT| 表示查询结果中不能包含MUST_NOT所对应得查询子句的检索结果。|
|SHOULD与MUST_NOT|连用时,功能同MUST和MUST_NOT。|
|SHOULD与MUST|结果为MUST子句的检索结果,但是SHOULD可影响排序|
|SHOULD与SHOULD|表示“或”关系,最终检索结果为所有检索子句的并集|
|MUST_NOT和MUST_NOT|无意义,检索无结果|