1.pom文件
4.0.0
org.springframework.boot
spring-boot-starter-parent
2.2.5.RELEASE
com.zxj
es-jd
0.0.1-SNAPSHOT
es-jd
Demo project for Spring Boot
1.8
7.6.1
org.jsoup
jsoup
1.10.2
com.alibaba
fastjson
1.2.60
org.springframework.boot
spring-boot-starter-data-elasticsearch
org.springframework.boot
spring-boot-starter-thymeleaf
org.springframework.boot
spring-boot-starter-web
org.springframework.boot
spring-boot-devtools
runtime
true
org.springframework.boot
spring-boot-configuration-processor
true
org.projectlombok
lombok
true
org.springframework.boot
spring-boot-starter-test
test
org.junit.vintage
junit-vintage-engine
org.springframework.boot
spring-boot-maven-plugin
静态资源素材:链接:https://pan.baidu.com/s/1VOErtTAtikR9MZCSO4062w
提取码:two0
application.properties:
server.port=9090
#关闭thymeleaf缓存
spring.thymeleaf.cache=false
IndexController .java
package com.zxj.controller;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
/**
* @author zxj
* @date 2020/5/28 13:05
*/
@Controller
public class IndexController {
@GetMapping({"/", "/index"})
public String index(){
return "index";
}
}
ElasticSearchConfig.java
package com.zxj.config;
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
/**
* @author zxj
* @date 2020/5/28 11:10
*/
@Configuration
public class ElasticSearchConfig {
@Bean
public RestHighLevelClient restHighLevelClient(){
RestHighLevelClient restHighLevelClient = new RestHighLevelClient(
RestClient.builder(
new HttpHost("localhost", 9200, "http")
)
);
return restHighLevelClient;
}
}
实体类Content.java
package com.zxj.bean;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
/**
* @author zxj
* @date 2020/5/28 13:35
*/
@Data
@AllArgsConstructor
@NoArgsConstructor
public class Content {
private String title;
private String img;
private String price;
}
Html页面解析工具类HtmlParseUtil.java
package com.zxj.utils;
import com.zxj.bean.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
/**
* @author zxj
* @date 2020/5/28 13:27
*/
public class HtmlParseUtil {
public static void main(String[] args) throws IOException {
parseJD("java").forEach(System.out :: println);
}
public static List parseJD(String keyword) throws IOException {
String url = "https://search.jd.com/Search?keyword=" + keyword;
Document document = Jsoup.parse(new URL(url), 30000);
Element element = document.getElementById("J_goodsList");
Elements elements = element.getElementsByTag("li");
List list = new ArrayList<>();
for (Element el : elements) {
//注意,此处图片有的网站是懒加载的,src属性获取不到想要的图片
String img = el.getElementsByTag("img").eq(0).attr("src");
String price = el.getElementsByClass("p-price").eq(0).text();
String title = el.getElementsByClass("p-name").eq(0).text();
list.add(new Content(title, img, price));
}
return list;
}
}
package com.zxj.service;
import com.alibaba.fastjson.JSON;
import com.sun.org.apache.xpath.internal.operations.Bool;
import com.zxj.bean.Content;
import com.zxj.utils.HtmlParseUtil;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
* @author zxj
* @date 2020/5/28 13:45
*/
@Service
public class ContentService {
@Autowired
private RestHighLevelClient restHighLevelClient;
//1.解析数据放入es中
public Boolean parseContent(String keyword) throws IOException {
List contents = HtmlParseUtil.parseJD(keyword);
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m");
for (int i = 0; i < contents.size(); i++) {
bulkRequest.add(new IndexRequest("jd_goods").source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return !bulk.hasFailures();
}
//2.搜索
public List
package com.zxj.controller;
import com.zxj.service.ContentService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RestController;
import java.io.IOException;
import java.util.List;
import java.util.Map;
/**
* @author zxj
* @date 2020/5/28 13:45
*/
@RestController
public class ContentController {
@Autowired
private ContentService contentService;
@GetMapping("/parse/{keyword}")
public Boolean parse(@PathVariable("keyword") String keyword) throws IOException {
return contentService.parseContent(keyword);
}
@GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
public List> search(@PathVariable("keyword") String keyword,
@PathVariable("pageNo") Integer pageNo,
@PathVariable("pageSize") Integer pageSize) throws IOException {
return contentService.search(keyword, pageNo, pageSize);
}
}
狂神说Java-ES仿京东实战
启动项目,访问http://localhost:9090/parse/hadoop,http://localhost:9090/parse/java 去京东搜索页面抓取关键字数据信息,存入ES中
访问http://localhost:9090/,输入关键字,查询出高亮结果