LUCENE入门案例

1.创建mevan项目 引入需要坐标

    <dependencies>
        
        <dependency>
            <groupId>org.apache.lucenegroupId>
            <artifactId>lucene-coreartifactId>
            <version>4.10.3version>
        dependency>
        
        <dependency>
            <groupId>org.apache.lucenegroupId>
            <artifactId>lucene-queryparserartifactId>
            <version>4.10.3version>
        dependency>
        
        <dependency>
            <groupId>commons-iogroupId>
            <artifactId>commons-ioartifactId>
            <version>2.4version>
        dependency>
        <dependency>
            <groupId>junitgroupId>
            <artifactId>junitartifactId>
            <version>4.10version>
        dependency>
        
        <dependency>
            <groupId>com.janeluogroupId>
            <artifactId>ikanalyzerartifactId>
            <version>2012_u6version>
        dependency>
    dependencies>
    <build>
        <plugins>
            
            <plugin>
                <groupId>org.apache.maven.pluginsgroupId>
                <artifactId>maven-compiler-pluginartifactId>
                <version>3.1version>
                <configuration>
                    <target>1.8target>
                    <source>1.8source>
                configuration>
            plugin>
        plugins>
    build>

2.配置文件 使用ikanalyzer 中文分词器

IKAnalyzer.cfg.xml


  
<properties>  
	<comment>IK Analyzer 扩展配置comment>
	
	<entry key="ext_dict">ext.dic;entry> 
	
	
	<entry key="ext_stopwords">stopword.dic;entry> 
	
properties>

ext.dic 和 stopword.dic 注意路径

3.创建测试类

package com.itheima;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.apache.lucene.search.*;
import java.io.File;

public class Lucence {

    @Test //创建索引
    public void CreateIndex() throws Exception {
        //1.通过FSDirectory.open指定索引目录
        FSDirectory directory = FSDirectory.open(new File("E:\\LUCENE"));
        //2.声明分词器为标准分词器 StandardAnalyzer 参数1:设置分词器版本
//        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_3);
        //将标准分词器改成ik分词器
        Analyzer analyzer = new IKAnalyzer();
        //3.声明索引库写出配置 IndexWriterConfig  参数1:设置版本  参数2:需要分词器
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3,analyzer);
        //4.声明索引库写入对象IndexWriter(参数1:需要索引库位置 ,参数2:)
        IndexWriter indexWriter = new IndexWriter(directory, config);
        //5.声明File指定读取本地磁盘文件路径
        File fileD = new File("E:\\findAllLUCENE");
        //6.file对象通过listFiles得到文件路径下的所有文件
        File[] files = fileD.listFiles();
        //7.循环显示并通过writer将doc保存索引库
        for (File file : files) {
            //文件名 file.getName()文件名,
            System.out.println("文件名:"+file.getName());
            //文件内容  FileUtils.readFileToString(file)  //读取文件内容
            System.out.println("文件内容:"+ FileUtils.readFileToString(file));
            //文件大小 FileUtils.sizeOf(file)
            System.out.println("文件大小:"+ FileUtils.sizeOf(file));
            //文件路径 file.getPath()
            System.out.println("文件路径"+file.getPath());
            //8.声明Document文档对象
            Document document = new Document();
            /**
             * TextField 文本存储
             * LongField 存储数值
             * Field.Store.YES 需要存储内容到索引库
             */
            document.add(new TextField( "fileName", file.getName(), Field.Store.YES));
            document.add(new TextField( "fileContent", FileUtils.readFileToString(file), Field.Store.YES));
            document.add(new LongField( "fileSize", FileUtils.sizeOf(file), Field.Store.YES));
            document.add(new TextField( "filePath", file.getPath(), Field.Store.YES));
            //9.writer对象将doc对象写入索引库
            indexWriter.addDocument(document);
        }
        //10.提交数据
        indexWriter.commit();
        //11.writer关闭
        indexWriter.close();
    }

    @Test  //直接查询
    public void queryIndex01() throws Exception {
        //1.创建索引库读取对象DirectoryReader.open
        DirectoryReader reader = DirectoryReader.open(FSDirectory.open(new File("E:\\LUCENE")));
        //2.声明IndexSearcher的搜索对象
        IndexSearcher searcher = new IndexSearcher(reader);
        //3.声明MatchAllDocsQuery查询全部对象
//        Query query = new MatchAllDocsQuery();//全部查询

        //3.TermQuery词条查询     param1词条域范围   param2词条内容
        //Query query = new TermQuery(new Term("fileName","传智播客"));

        //3.数值范围查询  参数1:查询的域,参数2:起始字节数值,参数3:结束字节数值,参数4:包含起始,参数5:包含结束
        Query query = NumericRangeQuery.newLongRange("fileSize",1L,50L,true,true);
        //4.通过indexSearcher对象的search方法进行查询 参数1:需要查询条件对象 参数2:查询条数
        TopDocs docs = searcher.search(query, 10);
        //5.topDocs.scoreDocs方法获取文档集合
        ScoreDoc[] scoreDocs = docs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            //6.循环文档集合,通过文档.doc获取到文档ID
            System.out.println("文档id===="+scoreDoc.doc);
            //7.循环内通过indexSearcher.doc(文档.doc)获取到文档对象
            Document doc = searcher.doc(scoreDoc.doc);

            //显示文档内容
            System.out.println("名称"+doc.getField("fileName"));
            System.out.println("内容"+doc.getField("fileContent"));
            System.out.println("大小"+doc.getField("fileSize"));
            System.out.println("路径"+doc.getField("filePath"));
        }

        //8.topDocs.totalHits可以获取符合条件的文档总数
        System.out.println("查询总记录数="+docs.totalHits);
    }
    @Test  //分词查询
    public void queryIndex() throws Exception {
        //1.创建索引库读取对象DirectoryReader.open
        DirectoryReader reader = DirectoryReader.open(FSDirectory.open(new File("E:\\LUCENE")));
        //2.声明IndexSearcher的搜索对象
        IndexSearcher searcher = new IndexSearcher(reader);

        //3.通过QueryParser 可以将搜索内容也通过分词器进行分词  单域查询
        String searchStr="传智播客的发射点发生";

        QueryParser parser = new QueryParser("fileName", new IKAnalyzer());
        Query query = parser.parse(searchStr);
        //多域查询
        /*String[] fileds = {"fileName", "fileContent"};
        MultiFieldQueryParser parser = new MultiFieldQueryParser(fileds,new IKAnalyzer());
        Query query = parser.parse(searchStr);*/
        //4.通过indexSearcher对象的search方法进行查询 参数1:需要查询条件对象 参数2:查询条数
        TopDocs docs = searcher.search(query, 10);
        //5.topDocs.scoreDocs方法获取文档集合
        ScoreDoc[] scoreDocs = docs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            //6.循环文档集合,通过文档.doc获取到文档ID
            System.out.println("文档id===="+scoreDoc.doc);
            //7.循环内通过indexSearcher.doc(文档.doc)获取到文档对象
            Document doc = searcher.doc(scoreDoc.doc);

            //显示文档内容
            System.out.println("名称"+doc.getField("fileName"));
            System.out.println("内容"+doc.getField("fileContent"));
            System.out.println("大小"+doc.getField("fileSize"));
            System.out.println("路径"+doc.getField("filePath"));
        }

        //8.topDocs.totalHits可以获取符合条件的文档总数
        System.out.println("查询总记录数="+docs.totalHits);
    }

    @Test  //删除索引
    public void deleteIndex() throws Exception {

        //指定索引目录
        FSDirectory directory = FSDirectory.open(new File("D:\\LUCENE"));
        //指定分词器为标准分词器 StandardAnalyzer
        //StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_3);
        Analyzer analyzer = new IKAnalyzer();
        //指定索引库写出配置   参数一,版本   参数二,分词器
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3,analyzer);

        //创建索引库写入对象,参数一指定目录  参数二指定配置
        IndexWriter writer = new IndexWriter(directory,config);

        //删除索引库中的文档对象
        writer.deleteDocuments(new Term("fileName","传智播客"));

        //删除所有索引库中数据
//        writer.deleteAll();

        //提交数据
        writer.commit();
        //关闭
        writer.close();
    }
}

你可能感兴趣的:(老年健忘症笔记)