学习来源 :狂神说Java,b站地址,点击进入
所需安装的环境 (连接有提供):ik分词器,Elasticsearch,kabana,ElasticSearch Head(可以在谷歌浏览器中安装扩展包)
链接:https://pan.baidu.com/s/1WO676lT1pAihEYofESgPHw
提取码:bv7n
狂神使用的是 vue ,我使用的是 thymeleaf
具体实现可以参考码云地址
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0modelVersion>
<parent>
<groupId>org.springframework.bootgroupId>
<artifactId>spring-boot-starter-parentartifactId>
<version>2.2.5.RELEASEversion>
<relativePath/>
parent>
<groupId>com.hjmgroupId>
<artifactId>springboot-es-jdartifactId>
<version>0.0.1-SNAPSHOTversion>
<name>springboot-es-jdname>
<description>Demo project for Spring Bootdescription>
<properties>
<java.version>1.8java.version>
<elasticsearch.version>7.6.1elasticsearch.version>
properties>
<dependencies>
<dependency>
<groupId>org.jsoupgroupId>
<artifactId>jsoupartifactId>
<version>1.10.2version>
dependency>
<dependency>
<groupId>org.springframework.bootgroupId>
<artifactId>spring-boot-starter-data-elasticsearchartifactId>
dependency>
<dependency>
<groupId>com.alibabagroupId>
<artifactId>fastjsonartifactId>
<version>1.2.68version>
dependency>
<dependency>
<groupId>org.springframework.bootgroupId>
<artifactId>spring-boot-starter-thymeleafartifactId>
dependency>
<dependency>
<groupId>org.springframework.bootgroupId>
<artifactId>spring-boot-starter-webartifactId>
dependency>
<dependency>
<groupId>org.springframework.bootgroupId>
<artifactId>spring-boot-configuration-processorartifactId>
<optional>trueoptional>
dependency>
<dependency>
<groupId>org.projectlombokgroupId>
<artifactId>lombokartifactId>
<optional>trueoptional>
dependency>
<dependency>
<groupId>org.springframework.bootgroupId>
<artifactId>spring-boot-starter-testartifactId>
<scope>testscope>
<exclusions>
<exclusion>
<groupId>org.junit.vintagegroupId>
<artifactId>junit-vintage-engineartifactId>
exclusion>
exclusions>
dependency>
dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.bootgroupId>
<artifactId>spring-boot-maven-pluginartifactId>
plugin>
plugins>
build>
project>
@Component
public class HtmlParseUtil {
public List<Content> parseJD(String keywords) throws Exception {
//获取请求https://search.jd.com/Search?keyword=java
String url = "https://search.jd.com/Search?keyword="+keywords+"&enc=utf-8";
//解析网页(就是js页面对象)
Document document = Jsoup.parse(new URL(url), 30000);
Element element = document.getElementById("J_goodsList");
//System.out.println(element.html());
Elements elements = element.getElementsByTag("li");
ArrayList<Content> contents = new ArrayList<>();
for (Element el : elements) {
//图片地址
String img = el.getElementsByTag("img").eq(0).attr("src");
String price = el.getElementsByClass("p-price").eq(0).text();
String title = el.getElementsByClass("p-name").eq(0).text();
// System.out.println("======================");
// System.out.println(img);
// System.out.println(price);
// System.out.println(title);
Content content = new Content();
content.setImg(img);
content.setTitle(title);
content.setPrice(price);
contents.add(content);
}
return contents;
}
}
@Autowired
private RestHighLevelClient restHighLevelClient;
/**
* 解析数据放到 ElasticSearch 索引中
* @param keywords
* @return
* @throws Exception
*/
public boolean parseContent(String keywords) throws Exception {
List<Content> contents = new HtmlParseUtil().parseJD(keywords);
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m");
//检测是否存在 jd_goods
GetIndexRequest request = new GetIndexRequest("jd_goods");
boolean exists =restHighLevelClient.indices()
.exists(request,RequestOptions.DEFAULT);
//如果不存在 jd_goods 索引,就创建一个jd_goods索引
if (!exists) {
CreateIndexRequest createIndexRequest = new CreateIndexRequest("jd_goods");
CreateIndexResponse createIndexResponse1 =
restHighLevelClient.indices().create(createIndexRequest, RequestOptions.DEFAULT);
}
//将解析到的数据批量加入到 jd_goods 中
for (int i = 0; i < contents.size(); i++) {
bulkRequest.add(new IndexRequest("jd_goods")
.source(JSON.toJSONString(contents.get(i)), XContentType.JSON) );
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return !bulk.hasFailures();
}
/**
* 实现高亮功能
* @param keyWord
* @return
* @throws IOException
*/
public List<Map<String,Object>> searchPageHighLight(String keyWord) throws IOException {
//条件查询
SearchRequest searchRequest = new SearchRequest("jd_goods");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
//分页
sourceBuilder.from(1);
sourceBuilder.size(30);
//精准匹配
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyWord);
sourceBuilder.query(termQueryBuilder);
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
//高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("title");
//多个高亮关闭 例如 java 店铺只卖 java 书本 ,只高亮一个 java
highlightBuilder.requireFieldMatch(false);
highlightBuilder.preTags("");
highlightBuilder.postTags("");
sourceBuilder.highlighter(highlightBuilder);
searchRequest.source(sourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
//解析结果
ArrayList<Map<String, Object>> list = new ArrayList<>();
for (SearchHit hit : searchResponse.getHits().getHits()) {
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
HighlightField title = highlightFields.get("title");
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
if (title != null) {
Text[] fragments = title.fragments();
String n_title = "";
for (Text text: fragments) {
n_title += text;
//System.out.println(n_title);
}
sourceAsMap.put("title",n_title);
}
list.add(sourceAsMap);
}
return list;
}
@Controller
public class ContentController {
@Autowired
private ContentService contentService;
@PostMapping("/parse")
public String parse(@RequestParam(value = "keyword", required = false)String keyword) throws Exception {
// System.out.println(contentService.parseContent(keyword));
return "redirect:search/"+keyword;
}
@GetMapping("/search/{keywords}")
public String search(@PathVariable(value = "keywords") String keywords,
Model model) throws IOException {
List<Map<String, Object>> maps = contentService.searchPageHighLight(keywords);
String json = "";
Content content = null;
List<Content> list = new ArrayList<>();
for (int i = 0; i < maps.size(); i++) {
json = JSON.toJSONString(maps.get(i));
content = JSON.parseObject(json, Content.class);
//System.out.println(content);
list.add(content);
}
model.addAttribute("value",list);
return "search";
}
}