ES(elasticsearch)搜索引擎 结合java 使用

ES(elasticsearch)搜索引擎 结合java 使用

官方文档地址:
https://www.elastic.co/guide/en/elasticsearch/client/java-rest/7.x/java-rest-high.html

重点概念:
文件索引块是因为使用了倒排索引

倒排索引:
例如  文件a 中有 单词 java php ,文件b中有 java php python ,文件c 中有java , python  
那么 单词对应的文件为:
 java :a ,b,c       
 php: a,b   
 python : b,c    
  查找php只需要查找a,b文件即可
分词器:
例如 this is java ,将该段分为this  ,is,java ,并且记录出现的次数,每次搜索将会算出一个分数,也就是匹配值,越高越匹配。

一,新建maven 项目,引入依赖

 <dependency>
            <groupId>org.springframework.bootgroupId>
            <artifactId>spring-boot-starter-data-elasticsearchartifactId>

        dependency>
        <dependency>
            <groupId>org.springframework.bootgroupId>
            <artifactId>spring-boot-starter-webartifactId>
        dependency>

        <dependency>
            <groupId>org.springframework.bootgroupId>
            <artifactId>spring-boot-starter-testartifactId>
            <scope>testscope>
        dependency>
        <dependency>
            <groupId>org.projectlombokgroupId>
            <artifactId>lombokartifactId>
            <version>1.18.16version>
        dependency>
        <dependency>
            <groupId>org.jsoupgroupId>
            <artifactId>jsoupartifactId>
            <version>1.12.1version>
        dependency>
        <dependency>
            <groupId>com.alibabagroupId>
            <artifactId>fastjsonartifactId>
            <version>1.2.74version>
        dependency>

说明:关键包是spring-boot-starter-data-elasticsearch 其他是辅助包,jsoup 是为了后续从网页上抓取数据使用

二:新建配置类 ElasticsearchConfig

@Configuration
public class ElasticsearchConfig {

    @Bean
    public RestHighLevelClient restHighLevelClient(){
        return new RestHighLevelClient(
                RestClient.builder(
                        new HttpHost("your ip", 9200, "http")));
    }
}

三:避免不必要的麻烦,将elasticsearch的客户端依赖 设置跟安装的es版本一直
操作方法:
点击pom 文件的 parent 标签 中的artifactId
ES(elasticsearch)搜索引擎 结合java 使用_第1张图片

继续进依赖
ES(elasticsearch)搜索引擎 结合java 使用_第2张图片
可以看到版本
ES(elasticsearch)搜索引擎 结合java 使用_第3张图片
复制出去到pom.xml ,刷新依赖
ES(elasticsearch)搜索引擎 结合java 使用_第4张图片

四:操作api

工具类EsClient :

@Component
public class EsClient {
    @Resource
    private RestHighLevelClient restHighLevelClient;
    private final String table = "jd_table";

    public String createIndex(V value) throws IOException {
        CreateIndexRequest createIndexRequest = new CreateIndexRequest(table);
        CreateIndexResponse createIndexResponse = restHighLevelClient.indices().create(createIndexRequest,
                RequestOptions.DEFAULT);
        return createIndexResponse.index();
    }

    /**
     * 保存
     *
     * @param value
     * @return
     * @throws IOException
     */
    public Object save(Object value) throws IOException {
        IndexRequest indexRequest = new IndexRequest(table);
        IndexRequest source = indexRequest.source(JSON.toJSONString(value), XContentType.JSON);
        IndexResponse index = restHighLevelClient.index(indexRequest, RequestOptions.DEFAULT);
        return index.getId();
    }

    public boolean existsIndex() throws IOException {
        GetIndexRequest getIndexRequest = new GetIndexRequest(table);
        boolean exists = restHighLevelClient.indices().exists(getIndexRequest, RequestOptions.DEFAULT);
        return exists;

    }

    public Object getDocById(String id) throws IOException {
        GetRequest getRequest = new GetRequest(table, id);
        //不获取_source上下文
        //  getRequest.fetchSourceContext(new FetchSourceContext(false));
        GetResponse documentFields = restHighLevelClient.get(getRequest, RequestOptions.DEFAULT);
        return documentFields.getSource() != null ? documentFields.getSource() : null;
    }

    public String updateDocById(Object obj, String id) throws IOException {
        UpdateRequest updateRequest = new UpdateRequest(table, id);
        updateRequest.doc(JSON.toJSONString(obj), XContentType.JSON);
        UpdateResponse updateResponse = restHighLevelClient.update(updateRequest, RequestOptions.DEFAULT);
        return updateResponse.getId() != null ? updateResponse.getId() : null;
    }

    public String deletDocById(String id) throws IOException {
        DeleteRequest deleteRequest = new DeleteRequest(table, id);
        DeleteResponse updateResponse = restHighLevelClient.delete(deleteRequest, RequestOptions.DEFAULT);
        return updateResponse.getId() != null ? updateResponse.getId() : null;
    }

    public boolean saveBulk(List<Object> values) throws IOException {
        BulkRequest bulkRequest = new BulkRequest(table);
        for (int i = 0; i < values.size(); i++) {
            bulkRequest.add(new IndexRequest(table).id("" + i).source(JSON.toJSONString(values.get(i)),
                    XContentType.JSON));
        }
        bulkRequest.timeout("30s");
        BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
        return !bulk.hasFailures();
    }

    public <T> List<T> searchTerm(String keyword, String fild,Class<T> claz) throws IOException {
        SearchRequest request = new SearchRequest(table);
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery(fild, keyword);
        searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
        searchSourceBuilder.query(termQueryBuilder);
        request.source(searchSourceBuilder);
        SearchResponse search = restHighLevelClient.search(request, RequestOptions.DEFAULT);
       List<T> lists= new ArrayList<>();
       for(SearchHit hit:search.getHits().getHits()){
           Map<String, Object> sourceAsMap = hit.getSourceAsMap();
           T result = JSONObject.parseObject(JSONObject.toJSONString(sourceAsMap), claz);
           lists.add(result);
       }
       return lists;
    }

    public <T> List<T> searchMatch(String keyword, String fild,Integer form,Integer size, Class<T> claz) throws IOException {
        SearchRequest request = new SearchRequest(table);
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        searchSourceBuilder.size(size);
        searchSourceBuilder.from(form);
        MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery(fild, keyword);
        searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
        searchSourceBuilder.query(matchQueryBuilder);
        request.source(searchSourceBuilder);
        SearchResponse search = restHighLevelClient.search(request, RequestOptions.DEFAULT);
        List<T> lists= new ArrayList<>();
        for(SearchHit hit:search.getHits().getHits()){
            Map<String, Object> sourceAsMap = hit.getSourceAsMap();
            T result = JSONObject.parseObject(JSONObject.toJSONString(sourceAsMap), claz);
            lists.add(result);
        }
        return lists;
    }


    public <T> List<T> searchMatchHeightLight(String keyword, String fild,Integer form,Integer size, Class<T> claz) throws IOException {
        SearchRequest request = new SearchRequest(table);
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        //分页
        searchSourceBuilder.size(size);
        searchSourceBuilder.from(form);

        MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery(fild, keyword);
        searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
        searchSourceBuilder.query(matchQueryBuilder);


        //高亮
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        highlightBuilder.field(fild);
        highlightBuilder.preTags("");
        highlightBuilder.postTags("");
        searchSourceBuilder.highlighter(highlightBuilder);

        request.source(searchSourceBuilder);

        SearchResponse search = restHighLevelClient.search(request, RequestOptions.DEFAULT);
        List<T> lists= new ArrayList<>();
        for(SearchHit hit:search.getHits().getHits()){

            Map<String, HighlightField> highlightFields = hit.getHighlightFields();
            HighlightField fields = highlightFields.get(fild);
            Map<String, Object> sourceAsMap = hit.getSourceAsMap();
            if(fields!=null){
                Text[] fragments = fields.fragments();
                StringBuilder n_fields=new StringBuilder();
                for(Text text:fragments){
                    n_fields.append(text);
                }
                sourceAsMap.put(fild,n_fields);
            }
            T result = JSONObject.parseObject(JSONObject.toJSONString(sourceAsMap), claz);

            lists.add(result);
        }
        return lists;
    }

    public <T> List<T> searchMatchAll(Class<T> claz) throws IOException {
        SearchRequest request = new SearchRequest(table);
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        MatchAllQueryBuilder matchAllQueryBuilder = QueryBuilders.matchAllQuery();
        searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
        searchSourceBuilder.query(matchAllQueryBuilder);
        request.source(searchSourceBuilder);
        SearchResponse search = restHighLevelClient.search(request, RequestOptions.DEFAULT);
        List<T> lists= new ArrayList<>();
        for(SearchHit hit:search.getHits().getHits()){
            Map<String, Object> sourceAsMap = hit.getSourceAsMap();
            T result = JSONObject.parseObject(JSONObject.toJSONString(sourceAsMap), claz);
            lists.add(result);
        }
        return lists;
    }

单元测试:

@SpringBootTest
class EsApplicationTests {

    @Autowired
    private EsClient esClient;
    @Test
    /**
     * 创建索引
     */
    void createIndex() throws IOException {
        String index = esClient.createIndex(null);
        System.out.println(esClient.existsIndex());
    }

    @Test
    /**
     * 保存单个
     */
    void testSave() throws IOException {
        Object save = esClient.save(new JdProduct("this is title","this is mg ",2.0d));
        System.out.println(save);
    }

    @Test
    /**
     * 根据id查询文档
     */
    void testGetDocById() throws IOException {
        Object obj = esClient.getDocById("cDSDYHYBbeqLPYSa4sFf");
        System.out.println(obj);
    }

    @Test
    /**
     * 批量插入
     */
    void testbiluk() throws IOException {
        List<JdProduct> objects = new ArrayList<>();
        objects.add(new JdProduct("this is title","this is mg ",2.0d));
        objects.add(new JdProduct("this is title1","this is mg1 ",2.0d));
        objects.add(new JdProduct("this is title2","this is mg2 ",2.0d));
        objects.add(new JdProduct("this is title3","this is mg3 ",2.0d));
        objects.add(new JdProduct("this is title4","this is mg4 ",2.0d));
        objects.add(new JdProduct("this is title5","this is mg5 ",2.0d));
        objects.add(new JdProduct("this is title6","this is mg6 ",2.0d));
        boolean b= esClient.saveBulk(objects);
        System.out.println(b);
    }

    @Test
    /**
     * 精确查询
     */
    void testTreamSearch() throws IOException {

        List<JdProduct> search = esClient.searchTerm("title", "title", JdProduct.class);
        System.out.println(search);
    }

    @Test
    /**
     * 匹配查询
     */
    void testMatchSearch() throws IOException {

        List<JdProduct> search = esClient.searchMatch("title", "title",0,10, JdProduct.class);
        System.out.println(search);
    }

    @Test
    /**
     * 匹配所有
     */
    void testMatchSearchAll() throws IOException {
        List<JdProduct> search = esClient.searchMatchAll(JdProduct.class);
        System.out.println(search);
    }
    @Test
    /**
     * 从京东页面抓取 参数 批量插入 es
     */
    void insertJdFromHtml() throws IOException {
        String urlStr="https://search.jd.com/Search?keyword=java";
        URL url = new URL(urlStr);
        Document document = Jsoup.parse(url, 30000);
        Element j_goodsList = document.getElementById("J_goodsList");
        Elements lis = j_goodsList.getElementsByTag("li");
        List<JdProduct> lists=new ArrayList<>();
        lis.forEach(m->{
            String priceStr = m.getElementsByClass("p-price").eq(0).text();

            Double price = Double.valueOf(priceStr.substring(1,priceStr.length()));
            String title = m.getElementsByClass("p-name").eq(0).text();
            String img = m.getElementsByTag("img").eq(0).attr("data-lazy-img");

            JdProduct jdProduct = new JdProduct(title,img,price);
            lists.add(jdProduct);
        });
        boolean insertFlag = esClient.saveBulk(lists);
        System.out.println(insertFlag);
    }


}

你可能感兴趣的:(es,elasticsearch)