索引
。这种先建立索引,再对索引进行搜索的过程就叫全文检索
Lucene
可以实现全文检索,它是为软件开发人员提供的简单易用的工具包,以方便的再目标系统中实现全文检索的功能。
索引过程:对要搜索的原始内容进行索引构建一个索引库
确定原始内容(搜索的内容) → \to →采集文档 → \to →创建文档 → \to →分析文档 → \to →索引文档
注意:
每个Doucument可以有多个Field,不同的Document可以有不同的Field,同一个Document可以有相同的Field(域值和域名都相同);每个文档都有一个唯一的编号,就是文档的id.注意:
创建索引是对语汇单元索引,通过词语找文档,这种索引结构叫做倒排索引结构,也叫反向索引结构,包括索引和文档两部分,索引即词汇表,它的规模较小,而文档集合较大。搜索过程:从索引库中搜索内容搜素界面 → \to →创建查询 → \to →执行搜索 → \to →从索引库搜索 t o to to渲染搜索结果
luke.bat
,选择自己的索引库地址,显示如下:方法 | 说明 |
---|---|
indexSearcher.search(query, n) | 根据Query搜索,返回评分最高的n条记录 |
indexSearcher.search(query, filter, n) | 根据Query搜索,添加过滤策略,返回评分最高的n条记录 |
indexSearcher.search(query, n, sort) | 根据Query搜索,添加排序策略,返回评分最高的n条记录 |
indexSearcher.search(query,filter ,n, sort) | 根据Query搜索,添加过滤策略,添加排序策略,返回评分最高的n条记录 |
java.lang.AbstractMethodError
,可以百度一下package com.zcs;
/*
* 查询索引
*/
import java.io.File;
import java.nio.file.Paths;
import java.util.concurrent.ExecutionException;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class FirstLucene{
@Test
public void testIndex() throws Exception {
// 1. 创建一个java过程,并导入jar包
// 2. 创建一个indexwriter对象
Directory directory = FSDirectory.open(Paths.get("E:\\Lucene\\tmp\\index"));
Analyzer analyzer = new IKAnalyzer();//官方推荐
//获得
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
// 1.) 指定索引库的存放位置Directory对象
// 2.) 指定一个分析器,对文档内容进行分析
// 3. 创建document对象
// 4. 创建field对象,将field添加到document对象中
File f = new File("E:\\Lucene\\source");
File[] listFiles = f.listFiles();
for(File file : listFiles) {
Document document = new Document();
//文件名称
String file_name = file.getName();
Field fileNameField = new TextField("fileName", file_name, Store.YES);
//文件路径
String file_path = file.getPath();
Field filePathField = new StoredField("filePath", file_path);
//文件大小
long file_size = FileUtils.sizeOf(file);
//同时添加排序支持
// doc.add(new NumericDocValuesField("fileSize",file.length()));
Field fieSizeField = new LongPoint("fileSize", file_size);
//文件内容
String file_content = FileUtils.readFileToString(file);
Field fileContenTextField = new TextField("fileContent", file_content, Store.YES);
document.add(fileNameField);
document.add(filePathField);
document.add(fieSizeField);
//同时添加存储支持
document.add(new StoredField("fileSize",file_size));
document.add(new NumericDocValuesField("fileSize",file_size));
document.add(fileContenTextField);
indexWriter.addDocument(document);
}
// 5. 使用indexwriter对象将document对象写入索引库,此过程进行索引创建,并将索引和document对象写入索引库
// 6.关闭IndexWriter对象
indexWriter.close();
}
@Test
public void testSearch() throws Exception {
//1.创建一个Directory对象,也就是索引库存放的位置
Directory directory = FSDirectory.open(Paths.get("E:\\Lucene\\tmp\\index"));
//2.创建一个indexReader对象,需要指定Directory对象
IndexReader indexReader = DirectoryReader.open(directory);
//3.创建一个indexsearch对象,需要指定IndexReader对象
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
//4.创建一个TermQuery对象,指定查询的域和查询的对象
Query query = new TermQuery( new Term("fileName", "txt") );
//5.执行查询。
TopDocs topDocs = indexSearcher.search(query, 2);
//6.返回查询的结果,遍历查询的 结果并输出
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for(ScoreDoc scoreDoc : scoreDocs) {
int doc = scoreDoc.doc;
Document document = indexSearcher.doc(doc);
//文件名字
String fileName = document.get("fileName");
System.out.println(fileName);
//文件路径
String filePath = document.get("filePath");
System.out.println(filePath);
//文件大小
String fileSize = document.get("fileSize");
System.out.println(fileSize);
//文件内容
String fileContent = document.get("fileContent");
System.out.println(fileContent);
System.out.println("----------------------");
}
//7.关闭IndexReader对象
indexReader.close();
}
}
使用query的子类查询
使用queryparse查询
package com.zcs;
import java.io.IOException;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class LuceneManager {
public IndexWriter getIndexWriter() throws IOException {
// 1. 创建一个java过程,并导入jar包
// 2. 创建一个indexwriter对象
Directory directory = FSDirectory.open(Paths.get("E:\\Lucene\\tmp\\index"));
Analyzer analyzer = new IKAnalyzer();//官方推荐
//获得
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
return new IndexWriter(directory, indexWriterConfig);
}
//删除
@Test
public void testAllDelete() throws IOException {
IndexWriter indexWriter = getIndexWriter();
indexWriter.deleteAll();
indexWriter.close();
}
//条件删除
@Test
public void testDelete() throws IOException {
IndexWriter indexWriter = getIndexWriter();
Query query = new TermQuery(new Term("fileName", "spring"));
indexWriter.deleteDocuments(query);
indexWriter.close();
}
//修改
@Test
public void testUpdate() throws IOException {
IndexWriter indexWriter = getIndexWriter();
Document document = new Document();
document.add(new TextField("fileA", "测试修改文件名",Store.YES));
document.add(new TextField("fileB", "测试修改文件名",Store.YES));
indexWriter.updateDocument(new Term("filename", "springboot"), document);
indexWriter.close();
}
//查询
public IndexSearcher getIndexSearcher() throws IOException {
//1.创建一个Directory对象,也就是索引库存放的位置
Directory directory = FSDirectory.open(Paths.get("E:\\Lucene\\tmp\\index"));
//2.创建一个indexReader对象,需要指定Directory对象
IndexReader indexReader = DirectoryReader.open(directory);
//3.创建一个indexsearch对象,需要指定IndexReader对象
return new IndexSearcher(indexReader);
}
//执行查询的 结果
public void printResult(IndexSearcher indexSearcher, Query query) throws IOException {
TopDocs topDocs = indexSearcher.search(query, 12);
//6.返回查询的结果,遍历查询的 结果并输出
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for(ScoreDoc scoreDoc : scoreDocs) {
int doc = scoreDoc.doc;
Document document = indexSearcher.doc(doc);
//文件名字
String fileName = document.get("fileName");
System.out.println(fileName);
//文件路径
String filePath = document.get("filePath");
System.out.println(filePath);
//文件大小
String fileSize = document.get("fileSize");
System.out.println(fileSize);
//文件内容
String fileContent = document.get("fileContent");
System.out.println(fileContent);
System.out.println("----------------------");
}
}
//查询所有
@Test
public void testMatchAllDocsQuery() throws IOException {
IndexSearcher indexSearcher = getIndexSearcher();
Query query = new MatchAllDocsQuery();
printResult(indexSearcher, query);
//关闭资源
indexSearcher.getIndexReader().close();
}
//根据数值范围查询
@Test
public void testNumericRangeQuery() throws IOException {
IndexSearcher indexSearcher = getIndexSearcher();
Query query = LongPoint.newRangeQuery("fileSize", 100L, 300L);
printResult(indexSearcher, query);
indexSearcher.getIndexReader().close();
}
//组合查询
@Test
public void testBooleanQuery() throws IOException {
IndexSearcher indexSearcher = getIndexSearcher();
Query query1 = new TermQuery(new Term("fileName", "spring"));
Query query2 = new TermQuery(new Term("fileContent", "java"));
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(query1, BooleanClause.Occur.MUST);
builder.add(query2, BooleanClause.Occur.MUST);
BooleanQuery query = builder.build();
printResult(indexSearcher, query);
indexSearcher.getIndexReader().close();
}
//条件解析查询
@Test
public void testQueryParser() throws IOException, ParseException {
IndexSearcher indexSearcher = getIndexSearcher();
QueryParser queryParser = new QueryParser("fileName", new IKAnalyzer());
//*:* ->查询所有
Query query = queryParser.parse("fileSize:[200 TO 300]");
printResult(indexSearcher, query);
indexSearcher.getIndexReader().close();
}
//条件解析的对象查询多个默认域
@Test
public void testMultiFieldQueryParser() throws IOException, ParseException {
IndexSearcher indexSearcher = getIndexSearcher();
String[] fields = {"fileName", "fileContent"};
MultiFieldQueryParser queryParser = new MultiFieldQueryParser(fields, new IKAnalyzer());
//*:* ->查询所有
Query query = queryParser.parse("fileName: spring AND fileContent: java");
printResult(indexSearcher, query);
indexSearcher.getIndexReader().close();
}
}
Lucene是一个开放源代码的全文检索引擎工具包,即它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎(英文与德文两种西方语言)。Lucene的目的是为软件开发人员提供一个简单易用的工具包,以方便的在目标系统中实现全文检索的功能,或者是以此为基础建立起完整的全文检索引擎.
Solr是一个高性能,采用Java5开发,基于Lucene的全文搜索服务器。同时对其进行了扩展,提供了比Lucene更为丰富的查询语言,同时实现了可配置、可扩展并对查询性能进行了优化,并且提供了一个完善的功能管理界面,是一款非常优秀的全文搜索引擎。它对外提供类似于Web-service的API接口。用户可以通过http请求,向搜索引擎服务器提交一定格式的XML文件,生成索引;也可以通过Http Solr Get操作提出查找请求,并得到XML格式的返回结果;
Solr和Lucene的本质区别有以下三点:搜索服务器,企业级和管理。Lucene本质上是搜索库,不是独立的应用程序,而Solr是。Lucene专注于搜索底层的建设,而Solr专注于企业应用。Lucene不负责支撑搜索服务所必须的管理,而Solr负责。所以说,一句话概括Solr: Solr是Lucene面向企业搜索应用的扩展
solr-7.7.2/server/solr-webapp/webapp/
目录下的所有文件复制到刚才创建的solr目录。tomcat下的webapps目录下创建solr目录
的WEB-INF/lib/下
solr-7.7.2\server\lib\ext的ja
r复制过去,如果有冲突的,跳过solr-7.7.2\server\lib下的jar
复制过去。solrhome
目录,并将solr-7.7.2server/solr/*
下的内容复制过去。tomcat下的webapps目录下创建solr目录
的WEB-INF/web.xml
文件内容,指定刚才的solrhome的位置,(如果web.xml里面没有修改路径的这部分代码,就添加进去)并注释security-constraint权限内容。
<env-entry>
<env-entry-name>solr/homeenv-entry-name>
<env-entry-value>D:\GIS\apache-tomcat-7.0.86-windows-x64\solrhomeenv-entry-value>
<env-entry-type>java.lang.Stringenv-entry-type>
env-entry>
tomcat/bin/start.bat
就启动tomcat服务了,然后再浏览器中输入http://localhost:8080/solr
,就进入solr的管理页面。tomcat/conf/loggin.properties,
将java.util.logging.ConsoleHandler.encoding = UTF-8
修改为java.util.logging.ConsoleHandler.encoding = GBK
solrhome/${collection}/conf/managed-schema
文件 中添加如下配置:
<fieldType name="text_ik" class="solr.TextField">
<analyzer type="index">
<tokenizer class="org.apache.lucene.analysis.ik.IKTokenizerFactory" useSmart="false"/>
analyzer>
<analyzer type="query">
<tokenizer class="org.apache.lucene.analysis.ik.IKTokenizerFactory" useSmart="false"/>
analyzer>
fieldType>
<field name="title_ik" type="text_ik" indexed="true" stored="false"/>
<field name="content_ik" type="text_ik" indexed="true" stored="false"/>
webapps\solr\WEB-INF\lib
添加刚才下载的jar包webapps\solr\WEB-INF
创建文件夹classes,在其下面添加ik-analyzer 的配置文件如下:tomcat\webapps\solr\WEB-INF\lib
添加依赖的jar包
solr-7.7.2\dist
)solr-7.7.2\dist
)solr-7.7.2\example\example-DIH\solr\solr\conf\solr-data-config.xml
移动到solr\solr\solrhome\new_core\conf`下并编辑 <dataConfig>
<dataSource type="JdbcDataSource"
driver="com.mysql.jdbc.Driver"
url="jdbc:mysql://172.21.3.210:3306/solr"
user="MHadoop"
password="12345678" />
<document>
<entity name="goods"
query="select id,name,price,introduce from goods">
<field column="id" name="id" />
<field column="name" name="name" />
<field column="price" name="price" />
<field column="introduce" name="introduce" />
entity>
document>
dataConfig>
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
<lst name="defaults">
<str name="config">solr-data-config.xmlstr>
lst>
requestHandler>
<field name="name" type="text_ik" indexed="true" stored="true" multiValued="false" />
<field name="price" type="pdouble" indexed="false" stored="true" multiValued="false" />
<field name="introduce" type="text_ik" indexed="true" stored="true" multiValued="false" />
package com.zcs.entity;
public class Goods {
private int id;
private String name;
private double price;
private String introduce;
//无参的构造方法
// getter setter
public int getId() {
return id;
}
public Goods() {
super();
}
public Goods(int id, String name, double price, String introduce) {
super();
this.id = id;
this.name = name;
this.price = price;
this.introduce = introduce;
}
public void setId(int id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public double getPrice() {
return price;
}
public void setPrice(double price) {
this.price = price;
}
public String getIntroduce() {
return introduce;
}
public void setIntroduce(String introduce) {
this.introduce = introduce;
}
//重写的toString的方法
@Override
public String toString() {
return "Goods [id=" + id + ", name=" + name + ", price=" + price + ", introduce=" + introduce + "]";
}
}
package com.zcs.solrDao;
import static org.hamcrest.CoreMatchers.nullValue;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import javax.sound.sampled.LineListener;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import com.zcs.entity.Goods;
public class GoodsSolrDao{
//添加或更新
public void insertOrUpdate(Goods goods) {
//创建solrClient同时指定超时时间
HttpSolrClient client = new HttpSolrClient.Builder("http://localhost:8080/solr/new_core").withConnectionTimeout(5000).build();
//创建文档
SolrInputDocument document = new SolrInputDocument();
document.addField("id", goods.getId());
document.addField("name", goods.getName());
document.addField("price", goods.getPrice());
document.addField("introduce", goods.getIntroduce());
//添加到client,并且添加commit
try {
client.add(document);
client.commit();
} catch (Exception e) {
e.printStackTrace();
}
}
//删除
public void delete(int id) {
//获取链接
HttpSolrClient client = new HttpSolrClient.Builder("http://localhost:8080/solr/new_core").withConnectionTimeout(5000).build();
try {
client.deleteById(String.valueOf(id));
client.commit();
client.close();
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
//根据关键字查询
public List<Goods> search(String keyword, boolean isHighLight) {
//创建solrClient同时指定超时时间
HttpSolrClient client = new HttpSolrClient.Builder("http://localhost:8080/solr/new_core").withConnectionTimeout(5000).build();
//创建solrquery
SolrQuery solrQuery = new SolrQuery();
//设置关键词
solrQuery.setQuery(keyword);
//过滤条件
// solrQuery.set("fq", "price:[0 TO *]");
//分页
// solrQuery.setStart(0);
// solrQuery.setRows(10);
//默认搜索字段
solrQuery.set("df", "name");
// 只查询指定域 相当于select id,name,price,introduce from goods.
solrQuery.set("fl", "id, name, price, introduce");
//打开高亮的开关
solrQuery.setHighlight(true);
//指定高亮域
solrQuery.addHighlightField("name");
// 关键词前缀
solrQuery.setHighlightSimplePre("");
// 关键词后缀
solrQuery.setHighlightSimplePost("");
//执行查询
QueryResponse response = null;
try {
response = client.query(solrQuery);
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
//普通结果集
SolrDocumentList docs = response.getResults();
// 获取高亮结果集
// 1.Map>>的key是id,
// value是查询到的数据
// 2.Map>的key是字段名,value是值,
// 如果支持多值,它的值就是集合,不支持值就是该集合的第一个元素
Map<String, Map<String, List<String>>> highlighting = response.getHighlighting();
//总条数
long totalNum = docs.getNumFound();
System.out.println("totalNum: " + totalNum);
//普通查询对象
List<Goods> goods = new ArrayList<Goods>();
//高亮查询对象
List<Goods> highListGoods = new ArrayList<Goods>();
//遍历结果集
for(SolrDocument document : docs) {
Goods _good = new Goods();
Goods _highLightGood = new Goods();
//设置普通结果到_good中
_good.setId(Integer.valueOf((String)document.get("id")));
_good.setName((String)document.get("name"));
_good.setPrice((double)document.get("price"));
_good.setIntroduce((String)document.get("introduce"));
// 设置高亮结果到_highLightGood中
// 得到该条数据
Map<String, List<String>> map =
highlighting.get((String)document.get("id"));
// 设置各个字段 不支持多值的字段,
// 其值就是List中的第一个值
// 如果被搜索的字段不包含搜索的关键字,则会被置为空,
// 所以需要判定是否为空,为空则使用普通结果
_highLightGood.setId(_good.getId());
String name = map.get("name").get(0);
_highLightGood.setName(name == null ? _good.getName() : name);
_highLightGood.setPrice(_good.getPrice());
_highLightGood.setIntroduce(_good.getIntroduce());
//将_good与_highLightGood分别添加到普通对象列表与高亮对象列表中
goods.add(_good);
highListGoods.add(_highLightGood);
}
if(isHighLight) {
return highListGoods;
}
return goods;
}
}
package com.zcs.TestSolrDao;
import java.util.List;
import org.junit.Test;
import com.zcs.entity.Goods;
import com.zcs.solrDao.GoodsSolrDao;
public class TestGoodsSolrDao{
@Test
public void TestInsertOrUpdate() {
GoodsSolrDao goodsSolrDao = new GoodsSolrDao();
Goods goods = new Goods(6, "meizu", 2333, "公司以设计研发优雅、简单易用的智能设备和系统为依托,致力于打造开放共赢的移动互联网生态,为用户创造融合设计和技术的新价值,打磨「人、科技、自然和谐互动」的未");
goodsSolrDao.insertOrUpdate(goods);
}
@Test
public void Testdelete() {
GoodsSolrDao goodsSolrDao = new GoodsSolrDao();
goodsSolrDao.delete(6);
}
@Test
public void testSearch() {
GoodsSolrDao goodsSolrDao = new GoodsSolrDao();
List<Goods> goods = goodsSolrDao.search("小米", false);
System.out.println(goods);
}
}