在做springboot和elasticsearch整合实战时,elasticsearch服务我用的docker容器,由于是第一次整合遇到许多bug。再次做记录。
1.elasticsearch的服务的跨域问题
解决链接
2.elasticsearch-head创建索引响应406
解决链接
3.由于版本的原因在es6.x需要在往索引增加Document时添加type字段
参考
4.es-head的github地址,告诉我们怎么开启es-head的服务
链接
下面时相关实战:
一、ES数据来源,我选择通过爬虫爬取数据,存到es中
@Component
public class HtmlParseUtil {
public List<Content> parseJD(String keywords)throws Exception{
//1.获取请求
String url = "https://search.jd.com/Search?keyword="+keywords;
//2.解析网页(Jsoup返回Document就是浏览器的Document对象)
Document document = Jsoup.parse(new URL(url), 30000);
Element ele = document.getElementById("J_goodsList");
//System.out.println(ele);
List<Content> list = new ArrayList<>();
//获取所有的li标签
Elements tag_lis = ele.getElementsByTag("li");
//获取元素中的内容,这里每个element就是li标签
for(Element element:tag_lis){
String img = element.getElementsByTag("img").eq(0).attr("src");
String price = element.getElementsByClass("p-price").eq(0).text();
String title = element.getElementsByClass("p-name").eq(0).text();
//封装对象
Content content = new Content();
content.setTitle(title);
content.setImg(img);
content.setPrice(price);
list.add(content);
}
return list;
}
}
通过上面的工具类能取京东爬取关键字商品的信息(图片url、title、price),前提是要导入相关依赖。
以杜蕾斯为关键字,测试效果:
二.es的配置(原生)
@Configuration
public class ElasticSearchClientConfig {
@Bean
public RestHighLevelClient restHighLevelClient(){
return new RestHighLevelClient(
RestClient.builder(
new HttpHost("192.168.1.101", 9200,"http")));
}
}
三.service层
@Service
public class ContentService {
@Autowired
private RestHighLevelClient restHighLevelClient;
@Autowired
private HtmlParseUtil htmlParseUtil;
//1.解析数据放入es索引中
public Boolean parseContent(String keywords) throws Exception{
List<Content> contents = htmlParseUtil.parseJD(keywords);
//把查询出来的数据放入es里面
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m");
for(int i=0;i<contents.size();i++){
System.out.println(contents.get(i));
bulkRequest.add(
new IndexRequest("jd_goods_2",keywords+"")
.source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return !bulk.hasFailures();
}
//2.获取这些数据实现搜索功能
public List<Map<String,Object>> searchPage(String keyword, int pageNo, int pageSize) throws IOException {
if(pageNo<=1){
pageNo = 1;
}
//条件搜索
SearchRequest searchRequest = new SearchRequest("jd_goods");
//资源构造器(封装查询条件)
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
//分页
sourceBuilder.from(pageNo);
sourceBuilder.size(pageSize);
//精准匹配
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
sourceBuilder.query(termQueryBuilder);
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
//执行搜索
searchRequest.source(sourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
//解析结果
List<Map<String,Object>> list = new ArrayList<>();
for(SearchHit documentFields:searchResponse.getHits().getHits()){
list.add(documentFields.getSourceAsMap());
}
return list;
}
}
四.controller层
@Controller
public class ContentController {
@Autowired
private ContentService contentService;
//从京东爬取关键字信息(title、price、src)存放到es中
@GetMapping("/parse/{keyword}")
@ResponseBody
public Boolean parse(@PathVariable String keyword) throws Exception{
return contentService.parseContent(keyword);
}
//从es中查询相关的信息 参数一:关键字 参数二:页码 参数三:显示条数
@GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
@ResponseBody
public List<Map<String,Object>> search(@PathVariable("keyword")String keyword,
@PathVariable("pageNo")int pageNo,
@PathVariable("pageSize")int pageSize) throws IOException {
if(pageNo==0){
pageNo=1;
}
if(pageSize==0){
pageSize=5;
}
return contentService.searchPage(keyword,pageNo,pageSize);
}
@GetMapping("/parse/add2es")
public String test(String keyword){
System.out.println(keyword);
return "redirect:/parse/"+keyword;
}
}