server.port=9090
# thymeleaf
spring.thymeleaf.cache=false
/src/main/java/com/kuang/kuangshenesjd/config/ElasticSearchClientConfig.java
```java
package com.kuang.kuangshenesjd.config;
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
// 狂神的Spring两步骤
// 1.找对象
// 2.放到spring中
// 3.如果是springboot,先分析源码
// xxxx AutoConfiguration xxxProperfile
@Configuration
public class ElasticSearchClientConfig {
@Bean
public RestHighLevelClient restHighLevelClient() {
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("127.0.0.1", 9200, "http"))
);
return client;
}
}
项目结构
├── README.md
├── kuangshen-es-jd.iml
├── mvnw
├── mvnw.cmd
├── pom.xml
├── src
│ ├── main
│ │ ├── java
│ │ │ └── com
│ │ │ └── kuang
│ │ │ └── kuangshenesjd
│ │ │ ├── KuangshenEsJdApplication.java
│ │ │ ├── config
│ │ │ │ └── ElasticSearchClientConfig.java
│ │ │ ├── controller
│ │ │ │ ├── ContentController.java
│ │ │ │ └── IndexController.java
│ │ │ ├── pojo
│ │ │ │ └── Content.java
│ │ │ ├── service
│ │ │ │ └── ContentService.java
│ │ │ └── utils
│ │ │ └── HtmlParseUtil.java
│ │ └── resources
│ │ ├── application.properties
│ │ ├── static
│ │ │ ├── css
│ │ │ │ └── style.css
│ │ │ ├── images
│ │ │ │ └── jdlogo.png
│ │ │ └── js
│ │ │ └── jquery.min.js
│ │ └── templates
│ │ └── index.html
│ └── test
│ └── java
│ └── com
│ └── kuang
│ └── kuangshenesjd
│ └── KuangshenEsJdApplicationTests.java
└── target
├── classes
│ ├── application.properties
│ ├── com
│ │ └── kuang
│ │ └── kuangshenesjd
│ │ ├── KuangshenEsJdApplication.class
│ │ ├── config
│ │ │ └── ElasticSearchClientConfig.class
│ │ ├── controller
│ │ │ ├── ContentController.class
│ │ │ └── IndexController.class
│ │ ├── pojo
│ │ │ └── Content.class
│ │ ├── service
│ │ │ └── ContentService.class
│ │ └── utils
│ │ └── HtmlParseUtil.class
│ ├── static
│ │ ├── css
│ │ │ └── style.css
│ │ ├── images
│ │ │ └── jdlogo.png
│ │ └── js
│ │ └── jquery.min.js
│ └── templates
│ └── index.html
├── generated-sources
│ └── annotations
├── generated-test-sources
│ └── test-annotations
└── test-classes
└── com
└── kuang
└── kuangshenesjd
└── KuangshenEsJdApplicationTests.class
45 directories, 31 files
# 爬取数据
数据问题?数据库中获取,消息队列中获取,都可以称为数据源。也可用爬虫解决。(当前只需要少量数据进行测试,所以项目中需先进行数据爬取解析)
## 爬取数据:
```java
package com.kuang.kuangshenesjd.utils;
import com.kuang.kuangshenesjd.pojo.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
@Component
public class HtmlParseUtil {
public static void main(String[] args) throws IOException {
new HtmlParseUtil().parseJD("java").forEach(System.out::println);
}
public ArrayList parseJD(String keywords) throws IOException {
// 获取请求,前提需要联网,ajax不能获取到!
String url = "https://search.jd.com/Search?keyword=" + keywords + "&enc=utf-8";
// 解析网页.(Jsoup返回的Document就是页面对象)
Document document = Jsoup.parse(new URL(url), 30000);
// 所有你在js中可以使用的方法
Element element = document.getElementById("J_goodsList");
// 获取所有的li元素
Elements elements = element.getElementsByTag("li");
// 获取元素中的内容,el代表每一个li标签
ArrayList goodsList = new ArrayList<>();
for (Element el : elements) {
// 关于这种图片特别多的网站,都是延迟加载的
String image = el.getElementsByTag("img").eq(0).attr("src");
String price = el.getElementsByClass("p-price").eq(0).text();
String title = el.getElementsByClass("p-name").eq(0).text();
Content content = new Content();
content.setTitle(title);
content.setImg(image);
content.setPrice(price);
goodsList.add(content);
System.out.println("=========================");
System.out.println(image);
System.out.println(price);
System.out.println(title);
}
return goodsList;
}
}
package com.kuang.kuangshenesjd.service;
import com.alibaba.fastjson.JSON;
import com.kuang.kuangshenesjd.pojo.Content;
import com.kuang.kuangshenesjd.utils.HtmlParseUtil;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
// 业务编写
@Service
public class ContentService {
@Autowired
private RestHighLevelClient restHighLevelClient;
// 1. 解析数据,放进es索引库中
public Boolean parseContent(String keywords) throws IOException {
ArrayList<Content> contents = new HtmlParseUtil().parseJD(keywords);
// 把查询的数据放到到es中
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m");
for (int i = 0; i < contents.size(); i++) {
bulkRequest.add(
new IndexRequest("jd_goods")
.source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
}
BulkResponse bulkResponse = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return bulkResponse.hasFailures();
}
// 2.获取数据实现搜索功能
public List<Map<String, Object>> searchPage(String keyword, int pageNo, int pageSize) throws IOException {
if (pageNo < 1) {
pageNo = 1;
}
// 条件搜索
SearchRequest searchRequest = new SearchRequest("jd_goods");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
// 分页
sourceBuilder.from(pageNo);
sourceBuilder.size(pageSize);
// 精准匹配
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
sourceBuilder.query(termQueryBuilder);
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
// 执行搜索
searchRequest.source(sourceBuilder);
SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
// 解析结果
ArrayList<Map<String, Object>> list = new ArrayList<>();
for (SearchHit hit : response.getHits().getHits()) {
list.add(hit.getSourceAsMap());
}
return list;
}
// 3.获取数据,高亮
public List<Map<String, Object>> searchPageHighlighter(String keyword, int pageNo, int pageSize) throws IOException {
if (pageNo < 1) {
pageNo = 1;
}
// 条件搜索
SearchRequest searchRequest = new SearchRequest("jd_goods");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
// 分页
sourceBuilder.from(pageNo);
sourceBuilder.size(pageSize);
// 精准匹配
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
sourceBuilder.query(termQueryBuilder);
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
// 高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("title");
highlightBuilder.requireFieldMatch(false);// 关闭多个高亮显示
highlightBuilder.preTags("");
highlightBuilder.postTags("");
sourceBuilder.highlighter(highlightBuilder);
// 执行搜索
searchRequest.source(sourceBuilder);
SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
// 解析结果
ArrayList<Map<String, Object>> list = new ArrayList<>();
for (SearchHit hit : response.getHits().getHits()) {
// 解析高亮的字段
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
HighlightField title = highlightFields.get("title");
Map<String, Object> map = hit.getSourceAsMap();// 原来的结果
// 解析高亮的字段
if (title != null) {
// 将高亮的字段替换成原来没有高亮的字段
Text[] fragments = title.fragments();
String newTitle = "";
for (Text text : fragments) {
newTitle += text;
}
map.put("title", newTitle);
}
list.add(map);
}
return list;
}
}
通过接口的方式爬取数据,并存储在es中
package com.kuang.kuangshenesjd.controller;
import com.kuang.kuangshenesjd.service.ContentService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.ResponseBody;
import java.io.IOException;
import java.util.List;
import java.util.Map;
@Controller
public class ContentController {
@Autowired
private ContentService contentService;
@GetMapping("/parse/{keyword}")
@ResponseBody
public Boolean parse(@PathVariable("keyword") String keyword) throws IOException {
return contentService.parseContent(keyword);
}
@GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
public String search(@PathVariable("keyword") String keyword,
@PathVariable("pageNo") int pageNo,
@PathVariable("pageSize") int pageSize, Model model) throws IOException {
if (pageNo == 0) {
pageNo = 1;
}
System.out.println(keyword + pageNo + pageSize);
List<Map<String, Object>> list = contentService.searchPageHighlighter(keyword, pageNo, pageSize);
model.addAttribute("list", list);
return "index";
}
}