新建一个Maven工程,推荐使用Springboot来搭建项目,在pom.xml中引入Elasticsearch依赖。
这里有两种方式,一种是使用Springboot的启动器:
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
//2.4.4 //可以自己指定版本号,不过Springboot会自动帮你选择对应版本
</dependency>
另一种方式是直接使用Spring整合的Elasticsearch:
<dependency>
<groupId>org.springframework.data</groupId>
<artifactId>spring-data-elasticsearch</artifactId>
<version>4.1.6</version>
</dependency>
版本的选择可根据下面的关联关系图来参考:
Spring Data Elasticsearch | Elasticsearch | Spring Boot |
---|---|---|
4.1.x | 7.9.3 | 2.4.x |
4.0.x | 7.6.2 | 2.3.x |
3.2.x | 6.8.12 | 2.2.x |
3.1.x | 6.2.2 | 2.1.x |
3.0.x | 5.5.0 | 2.0.x |
2.1.x | 2.4.0 | 1.5.x |
在resources下面创建一个配置文件application.yml,添加Elasticsearch配置属性:
spring:
elasticsearch:
rest:
uris: 127.0.0.1:9200
//如果你安装的Elasticsearch没有设置用户名和密码的话,以下两个属性则填任意值就可以
username: elastic
password: 123456
connection-timeout: 30000
read-timeout: 30000
import org.springframework.data.annotation.Id;
import org.springframework.data.elasticsearch.annotations.DateFormat;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Field;
import org.springframework.data.elasticsearch.annotations.FieldType;
import java.io.Serializable;
@Document(indexName = "demo-index", type = "_doc")//这个type属性即索引的type,在Elasticsearch7.0之前是必须要指定的,但是从Elasticsearch7.0开始已经被废弃,不需要指定,否则控制台会一直警告,所以你根据实际版本来看。
public class DemoBO implements Serializable {
@Id
private String id;//这个ID其实是Elasticsearch文档中自动生成的那个ID,即该文档的唯一标识符。
@Field(type = FieldType.Keyword)//该注解可直接设置字段的类型、分词器、别名等,建议配置,不然后面没法搞了。
private String demoName;//创建索引Mapping时字段名建议设置为驼峰形式,也可以用别的形式,然后用别名来代替,但你还是会哭的。
@Field(type = FieldType.Text, analyzer = "ik_max_word", searchAnalyzer = "ik_smart")
private String demoValue;
@Field(type = FieldType.Integer)
private Integer demoNumber;
@Field(type = FieldType.Date, format = DateFormat.custom, pattern = "yyyy-MM-dd HH:mm:ss")
private String demoTime;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getDemoName() {
return demoName;
}
public void setDemoName(String demoName) {
this.demoName = demoName;
}
public String getDemoValue() {
return demoValue;
}
public void setDemoValue(String demoValue) {
this.demoValue = demoValue;
}
public Integer getDemoNumber() {
return demoNumber;
}
public void setDemoNumber(Integer demoNumber) {
this.demoNumber = demoNumber;
}
public String getDemoTime() {
return demoTime;
}
public void setDemoTime(String demoTime) {
this.demoTime = demoTime;
}
}
import com.bo.DemoBO;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;
import org.springframework.data.elasticsearch.annotations.Query;
import org.springframework.data.elasticsearch.repository.ElasticsearchRepository;
public interface DemoRepository extends ElasticsearchRepository<DemoBO, String> {
Page<DemoBO> findByDemoName(String demoName, Pageable pageable);//这里就是前面说过创建字段时不使用驼峰形式会哭的情形,Spring定义了findByXXX格式的方法名格式来查询文档,XXX即为字段名,也就是以这个字段作为入参条件来获取数据,所以不用驼峰形式而用别名方式虽然可以入参,但无法获取出参,会比较麻烦。
Page<DemoBO> findByDemoNameAndDemoValue(String demoName, String demoValue, Pageable pageable);//可以同时根据两个字段来查询文档,方法命名格式为findByXXXAndXXX。
@Query("{\"bool\": {" +
"\"must\": [" +
"{" +
"\"match_phrase\": {" +
"\"demoValue\": \"?1\"" +
"}" +
"}" +
"]," +
"\"must_not\": [" +
"{" +
"\"term\": {" +
"\"demoName\": {" +
"\"value\": \"?0\"" +
"}" +
"}" +
"}" +
"]," +
"\"should\": [" +
"{" +
"\"range\": {" +
"\"demoNumber\": {" +
"\"gte\": \"?2\"" +
"}" +
"}" +
"}" +
"]" +
"}}")//除了自带和指定格式的方法,还可以用这种自定义查询语句的方法,但是只支持bool关键字,上面的?0、?1、?2对应下面传参的位置,即?0代表demoName,?1代表demoValue,?2代表demoNumber。
Page<DemoBO> getDocument(String demoName, String demoValue, int demoNumber, Pageable pageable);//用到@Query时方法名可以随便取
}
Spring Data Elasticsearch常用自带方法参考:
Keyword | Sample | Elasticsearch Query String |
---|---|---|
And | findByNameAndPrice | { “query” : { “bool” : { “must” : [ { “query_string” : { “query” : “?”, “fields” : [ “name” ] } }, { “query_string” : { “query” : “?”, “fields” : [ “price” ] } } ] } }} |
Or | findByNameOrPrice | { “query” : { “bool” : { “should” : [ { “query_string” : { “query” : “?”, “fields” : [ “name” ] } }, { “query_string” : { “query” : “?”, “fields” : [ “price” ] } } ] } }} |
Is | findByName | { “query” : { “bool” : { “must” : [ { “query_string” : { “query” : “?”, “fields” : [ “name” ] } } ] } }} |
Not | findByNameNot | { “query” : { “bool” : { “must_not” : [ { “query_string” : { “query” : “?”, “fields” : [ “name” ] } } ] } }} |
Between | findByPriceBetween | { “query” : { “bool” : { “must” : [ {“range” : {“price” : {“from” : ?, “to” : ?, “include_lower” : true, “include_upper” : true } } } ] } }} |
LessThan | findByPriceLessThan | { “query” : { “bool” : { “must” : [ {“range” : {“price” : {“from” : null, “to” : ?, “include_lower” : true, “include_upper” : false } } } ] } }} |
GreaterThan | findByPriceGreaterThan | { “query” : { “bool” : { “must” : [ {“range” : {“price” : {“from” : ?, “to” : null, “include_lower” : false, “include_upper” : true } } } ] } }} |
StartingWith | findByNameStartingWith | { “query” : { “bool” : { “must” : [ { “query_string” : { “query” : “?*”, “fields” : [ “name” ] }, “analyze_wildcard”: true } ] } }} |
EndingWith | findByNameEndingWith | { “query” : { “bool” : { “must” : [ { “query_string” : { “query” : “*?”, “fields” : [ “name” ] }, “analyze_wildcard”: true } ] } }} |
Contains/Containing | findByNameContaining | { “query” : { “bool” : { “must” : [ { “query_string” : { “query” : “?”, “fields” : [ “name” ] }, “analyze_wildcard”: true } ] } }} |
In (when annotated as FieldType.Keyword) | findByNameIn(Collectionnames) | { “query” : { “bool” : { “must” : [ {“bool” : {“must” : [ {“terms” : {“name” : ["?","?"]}} ] } } ] } }} |
In | findByNameIn(Collectionnames) | { “query”: {“bool”: {“must”: [{“query_string”:{“query”: “”?" “?”", “fields”: [“name”]}}]}}} |
NotIn (when annotated as FieldType.Keyword) | findByNameNotIn(Collectionnames) | { “query” : { “bool” : { “must” : [ {“bool” : {“must_not” : [ {“terms” : {“name” : ["?","?"]}} ] } } ] } }} |
NotIn | findByNameNotIn(Collectionnames) | {“query”: {“bool”: {“must”: [{“query_string”: {“query”: “NOT(”?" “?”)", “fields”: [“name”]}}]}}} |
OrderBy | findByAvailableTrueOrderByNameDesc | { “query” : { “bool” : { “must” : [ { “query_string” : { “query” : “true”, “fields” : [ “available” ] } } ] } }, “sort”:[{“name”:{“order”:“desc”}}] } |
这一步骤在对应Elasticsearch7.0以后的版本不需要我们来执行,因为在DemoBO中我们已经定义好了索引名和Mapping信息,在后续实际调用过程中Spring会自动帮我们去查询该索引是否存在,如果不存在则创建。
public class CreateIndexes {
@Autowired
private ElasticsearchRestTemplate elasticsearchRestTemplate;
//这是在对应Elasticsearch7.0以前的版本才有效的方法,在Elasticsearch7.0以后的版本此方法已经被移除。
public void createDemoIndex(String indexName) {
if (elasticsearchRestTemplate.indexExists(indexName)) {//判断是否已存在该索引,如果存在则先删除
elasticsearchRestTemplate.deleteIndex(indexName);
}
elasticsearchRestTemplate.createIndex(DemoBO.class);
elasticsearchRestTemplate.putMapping(DemoBO.class);//这个步骤必须执行,否则你会发现在Elasticsearch中只新建了索引,但是没有对应的Mapping信息。
}
}
public class GetDocument {
@Autowired
private DemoRepository demoRepository;
public DemoBO findById(String id) {
//Spring Data Elasticsearch自带根据ID获取文档和获取所有文档的方法
DemoBO bo = demoRepository.findById(id).get();
return bo;
}
public List<DemoBO> findAll() {
//你可以用findAll()获取所有文档,返回类型为Iterable,但是不建议这么用,如果数据量太大,后台会报塞不下了这个错,建议用下面这种分页方式分批获取。
List<DemoBO> list = demoRepository.findAll(PageRequest.of(0, 10)).getContent();
return list;
}
public List<DemoBO> findByDemoName(String demoName) {
//这个就是我们在DemoRepository中自定义的通过demoName来获取文档的方法
List<DemoBO> list = demoRepository.findByDemoName(demoName, PageRequest.of(0, 10)).getContent();
return list;
}
public List<DemoBO> findByDemoNameAndDemoValue(String demoName, String demoValue) {
List<DemoBO> list = demoRepository.findByDemoNameAndDemoValue(demoName, demoValue, PageRequest.of(0, 10)).getContent();
return list;
}
}
Spring Data Elasticsearch自带新增文档的方法,所以我们不需要自己额外再自定义一个。
public class PutDocument {
@Autowired
private DemoRepository demoRepository;
public void save() {
DemoBO bo = new DemoBO();
//bo.setId("0001");//ID你可以自己指定,或者由Elasticsearch帮你自动创建
bo.setDemoName("abc");
bo.setDemoValue("123");
bo.setDemoNumber(1);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
bo.setDemoTime(sdf.format(new Date()));
demoRepository.save(bo);//这里的入参必须是我们在前面创建的DemoBO类型
}
}
更新文档与新增文档方法一致,只要注意传入需要修改的文档ID即可。
public class PostDocument {
@Autowired
private DemoRepository demoRepository;
public void save() {
DemoBO bo = new DemoBO();
bo.setId("0001");
bo.setDemoName("def");
bo.setDemoValue("456");
bo.setDemoNumber(2);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
bo.setDemoTime(sdf.format(new Date()));
demoRepository.save(bo);
}
}
Spring Data Elasticsearch自带两种删除方式,一种是直接传入整个DemoBO对象,另一种是根据ID来删除,推荐使用后一种。
public class DeleteDocument {
@Autowired
private DemoRepository demoRepository;
public void delete(DemoBO bo) {
demoRepository.delete(bo);
}
public void deleteById(String id) {
demoRepository.deleteById(id);
}
}
public class BoolGetDocument {
@Autowired
private DemoRepository demoRepository;
public List<DemoBO> getDocument(String demoName, String demoValue, int demoNumber) {
List<DemoBO> list = demoRepository.getDocument(demoName, demoValue, demoNumber, PageRequest.of(0, 10)).getContent();
return list;
}
}
Spring Data Elasticsearch进行复杂查询时还是比较麻烦的,在DemoRepository中自定义脚本无法实现,还是需要用到Elasticsearch官方的一些API方法。
public class GetAggregation {
@Autowired
private ElasticsearchRestTemplate elasticsearchRestTemplate;
//这里要说明一下,ElasticsearchRestTemplate在不同的Spring Data Elasticsearch版本下方法会有所不同,我这里用的是对应Elasticsearch7.0以后的版本。
public List<DemoBO> getAggregation(String demoValue) {
List<DemoBO> list = new ArrayList<>();
NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
.withQuery(QueryBuilders.matchPhraseQuery("demoValue", demoValue))
.addAggregation(AggregationBuilders.terms("qc").field("demoName").size(10).subAggregation(AggregationBuilders.topHits("tj").sort("demoName", SortOrder.DESC).size(1)))
.withPageable(PageRequest.of(0, 1))
.build();//"qc"和"tj"是自定义名称,可以自己取,"qc"那部分指的是去重,"tj"那部分指的是统计,相当于SQL查询中的Group by。
SearchHits searchHits = elasticsearchRestTemplate.search(searchQuery, DemoBO.class);
Terms terms = searchHits.getAggregations().get("qc");
for (Terms.Bucket b: terms.getBuckets()) {
DemoBO bo = new DemoBO();
bo.setDemoName(b.getKeyAsString());
TopHits top = b.getAggregations().get("tj");
for (SearchHit hit: top.getHits()) {
if (hit.getSourceAsMap().get("demoValue") != null) {
bo.setDemoValue(hit.getSourceAsMap().get("demoValue").toString());
} else {
bo.setDemoValue("");
}
}
list.add(bo);
}
return list;
}
//下面这个方法是对应Elasticsearch7.0以前的版本
public List<DemoBO> getAggregation(String demoValue) {
List<DemoBO> list = new ArrayList<>();
SearchQuery searchQuery = new NativeSearchQueryBuilder()
.withQuery(QueryBuilders.matchPhraseQuery("demoValue", demoValue))
.addAggregation(AggregationBuilders.terms("qc").field("demoName").size(10).subAggregation(AggregationBuilders.topHits("tj").sort("demoName", SortOrder.DESC).size(1)))
.withPageable(PageRequest.of(0, 1))
.build();
AggregatedPage<DemoBO> aggregatedPage = elasticsearchRestTemplate.queryForPage(searchQuery, DemoBO.class);
Terms terms = aggregatedPage.getAggregations().get("qc");
for (Bucket b: terms.getBuckets()) {
DemoBO bo = new DemoBO();
bo.setDemoName(b.getKeyAsString());
TopHits top = b.getAggregations().get("tj");
for (SearchHit hit: top.getHits()) {
if (hit.getSourceAsMap().get("demoValue") != null) {
bo.setDemoValue(hit.getSourceAsMap().get("demoValue").toString());
} else {
bo.setDemoValue("");
}
}
list.add(bo);
}
return list;
}
}
这边使用的是从Oracle数据库导入到Elasticsearch,可作为一个参考,实现方式是一样的。
public class OracleToES {
@Autowired
private ElasticsearchRestTemplate elasticsearchRestTemplate;
public void writeOracleDataToES(String indexName, String sql) {
Connection conn = null;
PreparedStatement ps = null;
ResultSet rs = null;
try {
System.out.println("开始导入数据: " + indexName);
Class.forName("oracle.jdbc.driver.OracleDriver");
conn = DriverManager.getConnection("jdbc:oracle:thin:@127.0.0.1:1521:sjk", "demo", "123456");
ps = conn.prepareStatement(sql, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
ps.setFetchSize(0);
rs = ps.executeQuery();
ResultSetMetaData colData = rs.getMetaData();
int count = 0;
StringBuilder c = new StringBuilder("1");
Object v;
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
List<IndexQuery> list = new ArrayList<>();
while (rs.next()) {
count++;
JSONObject json = new JSONObject();
for (int i = 1; i <= colData.getColumnCount(); i++) {
c.replace(0, c.length(), colData.getColumnName(i));
if ("DEMO_TIME".equals(c.toString())) {//Oracle字段为Date类型时需要单独处理,否则导入会报错
if (rs.getDate(c.toString()) != null) {
v = sdf.format(rs.getDate(c.toString()));
} else {
v = null;
}
} else {
if (rs.getObject(c.toString()) != null) {
v = rs.getObject(c.toString());
} else {
v = null;
}
}
c.replace(0, c.length(), lineToHump(c.toString().toLowerCase()));//Oracle字段默认为大写,需要转换为小写
json.put(c.toString(), v);
}
IndexQuery indexQuery = new IndexQuery();
DemoBO bo = JSONObject.toJavaObject(json, DemoBO.class);
indexQuery.setObject(bo);
list.add(indexQuery);
if (count % 1000 == 0) {//每1000条数据提交一次
elasticsearchRestTemplate.bulkIndex(list);
list.clear();
}
}
if (list.size() > 0) {//提交剩余的数据
elasticsearchRestTemplate.bulkIndex(list);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (rs != null) {
rs.close();
}
} catch (Exception e) {
e.printStackTrace();
}
try {
if (ps != null) {
ps.close();
}
} catch (Exception e) {
e.printStackTrace();
}
try {
if (conn != null) {
conn.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
//在Oracle中我们一般是以下划线形式来创建字段名的,所以这里转换成驼峰形式
private String lineToHump(String str) {
Matcher matcher = linePattern.matcher(str);
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
matcher.appendReplacement(sb, matcher.group(1).toUpperCase());
}
matcher.appendTail(sb);
return sb.toString();
}
}