ES存取

 

从商城爬取数据写入ES

/**
     * 解析网站的数据写入ES
     * @param keyWords
     * @throws IOException
     */
    public boolean parse(String keyWords) throws IOException {
        //爬取数据
        List products = EsReptile.reptile(keyWords);
        //创建ES批量操作的请求
        BulkRequest bulkRequest = new BulkRequest();
        bulkRequest.timeout("10m");//根据业务需求来设置超时间

        //将爬取数据放入批量操作请求中
        for (Product product: products) {
            System.out.println(JSON.toJSONString(product));
            bulkRequest.add(new IndexRequest("jlr").source(JSON.toJSONString(product), XContentType.JSON));
        }

        //执行请求
        BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
        return !bulk.hasFailures();

    }

 解析网站方法

public static List reptile(String param) throws IOException {
     //获得请求
        //String url = "https://list.tmall.com/search_product.htm?q=Java&type=p&spm=a220m.1000858.a2227oh.d100&from=.list.pc_1_searchbutton";
        String url = "https://search.jd.com/Search?keyword="+ param +"&enc=utf-8&wq=Java&pvid=954267ac4155414e9be65105f35536e2";
        //解析网页获得Document就是  html的JS对象
        Document parse = Jsoup.parse(new URL(url), 30000);

        Element content = parse.getElementById("J_goodsList");//获得这个页面的某个dev(dev的id)

        List products = new ArrayList<>();

        //获取J_goodsList里所有的li元素
        Elements elements = content.getElementsByTag("li");
        for(Element element:elements) {
            String img = element.getElementsByTag("img").eq(0).attr("src");
            String price = element.getElementsByClass("p-price").eq(0).text();
            String name = element.getElementsByClass("p-name").eq(0).text();


            Product product = new Product();
            product.setImg(img);
            product.setPrice(price);
            product.setName(name);
            products.add(product);

        }


    return products;

    }

 

 

es查询做分页高亮

/**
     * 解析的数据查询分页返回
     * @param keyWords
     * @throws IOException
     */
    public List> queryByPage(String keyWords, int pageNo, int pageSize) throws IOException {
        if(pageNo <= 1) {
            pageNo = 1;
        }
        //搜索请求
        SearchRequest searchRequest = new SearchRequest("jlr");
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();

        //高亮
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        highlightBuilder.field("name");//指定哪个字段要高亮
        highlightBuilder.preTags("");//标签前缀什么样式自己定义
        highlightBuilder.postTags("");//后缀
        sourceBuilder.highlighter(highlightBuilder);

        //分页
        sourceBuilder.from(pageNo);
        sourceBuilder.size(pageSize);
        //精准匹配
        TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("name", keyWords);
        sourceBuilder.query(termQueryBuilder);
        sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));//超时时间
        searchRequest.source(sourceBuilder);
        //执行查询
        SearchResponse searchResult = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

        //查询结果放入集合
        List> resultList = new ArrayList<>();
        SearchHit[] hits = searchResult.getHits().getHits();
        for (SearchHit searchHit: hits) {
            //获取到高亮字段
            Map highlightFields = searchHit.getHighlightFields();
            HighlightField name = highlightFields.get("name");

            Map sourceAsMap = searchHit.getSourceAsMap();
            //将原来的字段替换成高亮字段
            if(name != null) {
                Text[] fragments = name.fragments();

                String newName = "";
                for (Text text: fragments) {
                    newName += text;
                }
                sourceAsMap.put("name", newName);//覆盖原来的
            }
            resultList.add(sourceAsMap);
        }
        return resultList;
    }
}

资源来自:B站-狂神说Javahttps://www.bilibili.com/video/BV17a4y1x7zq?p=19

你可能感兴趣的:(ES,java,es)