<dependencies>
<dependency>
<groupId>org.apache.lucenegroupId>
<artifactId>lucene-coreartifactId>
<version>4.10.3version>
dependency>
<dependency>
<groupId>org.apache.lucenegroupId>
<artifactId>lucene-queryparserartifactId>
<version>4.10.3version>
dependency>
<dependency>
<groupId>commons-iogroupId>
<artifactId>commons-ioartifactId>
<version>2.4version>
dependency>
<dependency>
<groupId>junitgroupId>
<artifactId>junitartifactId>
<version>4.10version>
dependency>
<dependency>
<groupId>com.janeluogroupId>
<artifactId>ikanalyzerartifactId>
<version>2012_u6version>
dependency>
dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.pluginsgroupId>
<artifactId>maven-compiler-pluginartifactId>
<version>3.1version>
<configuration>
<target>1.8target>
<source>1.8source>
configuration>
plugin>
plugins>
build>
IKAnalyzer.cfg.xml
<properties>
<comment>IK Analyzer 扩展配置comment>
<entry key="ext_dict">ext.dic;entry>
<entry key="ext_stopwords">stopword.dic;entry>
properties>
ext.dic 和 stopword.dic 注意路径
package com.itheima;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.apache.lucene.search.*;
import java.io.File;
public class Lucence {
@Test //创建索引
public void CreateIndex() throws Exception {
//1.通过FSDirectory.open指定索引目录
FSDirectory directory = FSDirectory.open(new File("E:\\LUCENE"));
//2.声明分词器为标准分词器 StandardAnalyzer 参数1:设置分词器版本
// Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_3);
//将标准分词器改成ik分词器
Analyzer analyzer = new IKAnalyzer();
//3.声明索引库写出配置 IndexWriterConfig 参数1:设置版本 参数2:需要分词器
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3,analyzer);
//4.声明索引库写入对象IndexWriter(参数1:需要索引库位置 ,参数2:)
IndexWriter indexWriter = new IndexWriter(directory, config);
//5.声明File指定读取本地磁盘文件路径
File fileD = new File("E:\\findAllLUCENE");
//6.file对象通过listFiles得到文件路径下的所有文件
File[] files = fileD.listFiles();
//7.循环显示并通过writer将doc保存索引库
for (File file : files) {
//文件名 file.getName()文件名,
System.out.println("文件名:"+file.getName());
//文件内容 FileUtils.readFileToString(file) //读取文件内容
System.out.println("文件内容:"+ FileUtils.readFileToString(file));
//文件大小 FileUtils.sizeOf(file)
System.out.println("文件大小:"+ FileUtils.sizeOf(file));
//文件路径 file.getPath()
System.out.println("文件路径"+file.getPath());
//8.声明Document文档对象
Document document = new Document();
/**
* TextField 文本存储
* LongField 存储数值
* Field.Store.YES 需要存储内容到索引库
*/
document.add(new TextField( "fileName", file.getName(), Field.Store.YES));
document.add(new TextField( "fileContent", FileUtils.readFileToString(file), Field.Store.YES));
document.add(new LongField( "fileSize", FileUtils.sizeOf(file), Field.Store.YES));
document.add(new TextField( "filePath", file.getPath(), Field.Store.YES));
//9.writer对象将doc对象写入索引库
indexWriter.addDocument(document);
}
//10.提交数据
indexWriter.commit();
//11.writer关闭
indexWriter.close();
}
@Test //直接查询
public void queryIndex01() throws Exception {
//1.创建索引库读取对象DirectoryReader.open
DirectoryReader reader = DirectoryReader.open(FSDirectory.open(new File("E:\\LUCENE")));
//2.声明IndexSearcher的搜索对象
IndexSearcher searcher = new IndexSearcher(reader);
//3.声明MatchAllDocsQuery查询全部对象
// Query query = new MatchAllDocsQuery();//全部查询
//3.TermQuery词条查询 param1词条域范围 param2词条内容
//Query query = new TermQuery(new Term("fileName","传智播客"));
//3.数值范围查询 参数1:查询的域,参数2:起始字节数值,参数3:结束字节数值,参数4:包含起始,参数5:包含结束
Query query = NumericRangeQuery.newLongRange("fileSize",1L,50L,true,true);
//4.通过indexSearcher对象的search方法进行查询 参数1:需要查询条件对象 参数2:查询条数
TopDocs docs = searcher.search(query, 10);
//5.topDocs.scoreDocs方法获取文档集合
ScoreDoc[] scoreDocs = docs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
//6.循环文档集合,通过文档.doc获取到文档ID
System.out.println("文档id===="+scoreDoc.doc);
//7.循环内通过indexSearcher.doc(文档.doc)获取到文档对象
Document doc = searcher.doc(scoreDoc.doc);
//显示文档内容
System.out.println("名称"+doc.getField("fileName"));
System.out.println("内容"+doc.getField("fileContent"));
System.out.println("大小"+doc.getField("fileSize"));
System.out.println("路径"+doc.getField("filePath"));
}
//8.topDocs.totalHits可以获取符合条件的文档总数
System.out.println("查询总记录数="+docs.totalHits);
}
@Test //分词查询
public void queryIndex() throws Exception {
//1.创建索引库读取对象DirectoryReader.open
DirectoryReader reader = DirectoryReader.open(FSDirectory.open(new File("E:\\LUCENE")));
//2.声明IndexSearcher的搜索对象
IndexSearcher searcher = new IndexSearcher(reader);
//3.通过QueryParser 可以将搜索内容也通过分词器进行分词 单域查询
String searchStr="传智播客的发射点发生";
QueryParser parser = new QueryParser("fileName", new IKAnalyzer());
Query query = parser.parse(searchStr);
//多域查询
/*String[] fileds = {"fileName", "fileContent"};
MultiFieldQueryParser parser = new MultiFieldQueryParser(fileds,new IKAnalyzer());
Query query = parser.parse(searchStr);*/
//4.通过indexSearcher对象的search方法进行查询 参数1:需要查询条件对象 参数2:查询条数
TopDocs docs = searcher.search(query, 10);
//5.topDocs.scoreDocs方法获取文档集合
ScoreDoc[] scoreDocs = docs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
//6.循环文档集合,通过文档.doc获取到文档ID
System.out.println("文档id===="+scoreDoc.doc);
//7.循环内通过indexSearcher.doc(文档.doc)获取到文档对象
Document doc = searcher.doc(scoreDoc.doc);
//显示文档内容
System.out.println("名称"+doc.getField("fileName"));
System.out.println("内容"+doc.getField("fileContent"));
System.out.println("大小"+doc.getField("fileSize"));
System.out.println("路径"+doc.getField("filePath"));
}
//8.topDocs.totalHits可以获取符合条件的文档总数
System.out.println("查询总记录数="+docs.totalHits);
}
@Test //删除索引
public void deleteIndex() throws Exception {
//指定索引目录
FSDirectory directory = FSDirectory.open(new File("D:\\LUCENE"));
//指定分词器为标准分词器 StandardAnalyzer
//StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_3);
Analyzer analyzer = new IKAnalyzer();
//指定索引库写出配置 参数一,版本 参数二,分词器
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3,analyzer);
//创建索引库写入对象,参数一指定目录 参数二指定配置
IndexWriter writer = new IndexWriter(directory,config);
//删除索引库中的文档对象
writer.deleteDocuments(new Term("fileName","传智播客"));
//删除所有索引库中数据
// writer.deleteAll();
//提交数据
writer.commit();
//关闭
writer.close();
}
}