SpringBoot+vue+JSOUP+Vue实现JD搜索高亮

1.pom文件



    4.0.0
    
        org.springframework.boot
        spring-boot-starter-parent
        2.2.5.RELEASE
         
    
    com.zxj
    es-jd
    0.0.1-SNAPSHOT
    es-jd
    Demo project for Spring Boot

    
        1.8
        
        7.6.1
    

    

        
            org.jsoup
            jsoup
            1.10.2
        
        
            com.alibaba
            fastjson
            1.2.60
        
        
            org.springframework.boot
            spring-boot-starter-data-elasticsearch
        
        
            org.springframework.boot
            spring-boot-starter-thymeleaf
        
        
            org.springframework.boot
            spring-boot-starter-web
        

        
            org.springframework.boot
            spring-boot-devtools
            runtime
            true
        
        
            org.springframework.boot
            spring-boot-configuration-processor
            true
        
        
            org.projectlombok
            lombok
            true
        
        
            org.springframework.boot
            spring-boot-starter-test
            test
            
                
                    org.junit.vintage
                    junit-vintage-engine
                
            
        
    

    
        
            
                org.springframework.boot
                spring-boot-maven-plugin
            
        
    


 静态资源素材:链接:https://pan.baidu.com/s/1VOErtTAtikR9MZCSO4062w 
提取码:two0

SpringBoot+vue+JSOUP+Vue实现JD搜索高亮_第1张图片

application.properties:

server.port=9090
#关闭thymeleaf缓存
spring.thymeleaf.cache=false

IndexController .java 

package com.zxj.controller;

import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;

/**
 * @author zxj
 * @date 2020/5/28 13:05
 */
@Controller
public class IndexController {

    @GetMapping({"/", "/index"})
    public String index(){
        return "index";
    }
}

 ElasticSearchConfig.java

package com.zxj.config;

import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

/**
 * @author zxj
 * @date 2020/5/28 11:10
 */
@Configuration
public class ElasticSearchConfig {

    @Bean
    public RestHighLevelClient restHighLevelClient(){
        RestHighLevelClient restHighLevelClient = new RestHighLevelClient(
                RestClient.builder(
                        new HttpHost("localhost", 9200, "http")
                )
        );
        return restHighLevelClient;
    }
}

实体类Content.java 

package com.zxj.bean;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

/**
 * @author zxj
 * @date 2020/5/28 13:35
 */
@Data
@AllArgsConstructor
@NoArgsConstructor
public class Content {
    private String title;
    private String img;
    private String price;
}

Html页面解析工具类HtmlParseUtil.java 

package com.zxj.utils;

import com.zxj.bean.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

/**
 * @author zxj
 * @date 2020/5/28 13:27
 */
public class HtmlParseUtil {
    public static void main(String[] args) throws IOException {
        parseJD("java").forEach(System.out :: println);
    }
    public static List parseJD(String keyword) throws IOException {
        String url = "https://search.jd.com/Search?keyword=" + keyword;
        Document document = Jsoup.parse(new URL(url), 30000);
        Element element = document.getElementById("J_goodsList");
        Elements elements = element.getElementsByTag("li");

        List list = new ArrayList<>();

        for (Element el : elements) {
            //注意,此处图片有的网站是懒加载的,src属性获取不到想要的图片
            String img = el.getElementsByTag("img").eq(0).attr("src");
            String price = el.getElementsByClass("p-price").eq(0).text();
            String title = el.getElementsByClass("p-name").eq(0).text();

            list.add(new Content(title, img, price));
        }
        return list;
    }
}
package com.zxj.service;

import com.alibaba.fastjson.JSON;
import com.sun.org.apache.xpath.internal.operations.Bool;
import com.zxj.bean.Content;
import com.zxj.utils.HtmlParseUtil;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.stereotype.Service;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;

/**
 * @author zxj
 * @date 2020/5/28 13:45
 */
@Service
public class ContentService {

    @Autowired
    private RestHighLevelClient restHighLevelClient;

    //1.解析数据放入es中
    public Boolean parseContent(String keyword) throws IOException {
        List contents = HtmlParseUtil.parseJD(keyword);
        BulkRequest bulkRequest = new BulkRequest();
        bulkRequest.timeout("2m");

        for (int i = 0; i < contents.size(); i++) {
            bulkRequest.add(new IndexRequest("jd_goods").source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
        }
        BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
        return !bulk.hasFailures();
    }

    //2.搜索
    public List> search(String keyword, Integer pageNo, Integer pageSize) throws IOException {
        if(pageNo <= 1){
            pageNo = 1;
        }
        SearchRequest searchRequest = new SearchRequest("jd_goods");
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();

        //分页
        sourceBuilder.from(pageNo);
        sourceBuilder.size(pageSize);

        sourceBuilder.query(QueryBuilders.termQuery("title", keyword));
        sourceBuilder.timeout(new TimeValue(10, TimeUnit.SECONDS));

        //高亮
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        highlightBuilder.field("title");
        highlightBuilder.requireFieldMatch(false);//多个高亮显示
        highlightBuilder.preTags("");
        highlightBuilder.postTags("");

        sourceBuilder.highlighter(highlightBuilder);

        searchRequest.source(sourceBuilder);
        SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

        ArrayList> list = new ArrayList<>();
        for (SearchHit hit : response.getHits().getHits()) {

            Map highlightFields = hit.getHighlightFields();
            HighlightField title = highlightFields.get("title");

            //原来不高亮的查询结果
            Map sourceAsMap = hit.getSourceAsMap();

            //解析出高亮字段,将原来不高亮的字段替换掉
            if(null != title){
                Text[] fragments = title.fragments();
                String newTitle = "";
                for (Text fragment : fragments) {
                    newTitle += fragment;
                }
                //替换
                sourceAsMap.put("title", newTitle);
            }


            list.add(sourceAsMap);
        }

        return list;

    }
}
package com.zxj.controller;

import com.zxj.service.ContentService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RestController;

import java.io.IOException;
import java.util.List;
import java.util.Map;

/**
 * @author zxj
 * @date 2020/5/28 13:45
 */
@RestController
public class ContentController {

    @Autowired
    private ContentService contentService;

    @GetMapping("/parse/{keyword}")
    public Boolean parse(@PathVariable("keyword") String keyword) throws IOException {
        return contentService.parseContent(keyword);
    }

    @GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
    public List> search(@PathVariable("keyword") String keyword,
                                            @PathVariable("pageNo") Integer pageNo,
                                            @PathVariable("pageSize") Integer pageSize) throws IOException {
        return contentService.search(keyword, pageNo, pageSize);
    }
}




    
    狂神说Java-ES仿京东实战
    



{{result.price}}

店铺: 狂神说Java

月成交999笔 评价 3

 

启动项目,访问http://localhost:9090/parse/hadoop,http://localhost:9090/parse/java 去京东搜索页面抓取关键字数据信息,存入ES中

访问http://localhost:9090/,输入关键字,查询出高亮结果

SpringBoot+vue+JSOUP+Vue实现JD搜索高亮_第2张图片

 

 

 

 

 

你可能感兴趣的:(SpringBoot+vue+JSOUP+Vue实现JD搜索高亮)