ES版本为7.6.2
Hbase版本为2.1.0
思路:先将hbase的rowkey当做索引存入到ES中 ,根据habse中的字段来读取出ES中索引,再去habse查找响应的数据
用到的依赖:
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-common</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-endpoint</artifactId>
<version>2.2.3</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>6.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.9.1</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
<version>4.1.17.Final</version>
</dependency>
</dependencies>
代码:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.AdminClient;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.cluster.health.ClusterHealthStatus;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import java.io.*;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Map;
public class EsUtils {
private static TransportClient client = null;
private static Settings build = null;
private static Admin admin = null ;
private static Configuration conf = null;
private static Connection connection = null;
static {
build = Settings.builder().put("cluster.name","ES").put("client.transport.sniff", true).build();
// java连接ES的端口是9300
try {
client = new PreBuiltTransportClient(build).addTransportAddress(new TransportAddress(InetAddress.getByName("cdh1.host.com"),9300));
System.out.println("elasticsearch集群连接成功");
conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum" , "192.168.122.163");
conf.set("hbase.zookeeper.property.clientPort","2181");
connection = ConnectionFactory.createConnection(conf);
admin = ConnectionFactory.createConnection(conf).getAdmin();
} catch (IOException e) {
e.printStackTrace();
}
}
// 通过读取本地文件插入ES索引
public static void buldData() throws IOException {
BulkRequestBuilder bulk = client.prepareBulk();
File file = new File("C:\\Users\\Administrator\\Desktop\\index.txt");
BufferedReader reader = null;
InputStreamReader insReader = null;
insReader = new InputStreamReader( new FileInputStream(file), "UTF-8");
reader = new BufferedReader(insReader);
int line = 1;
String lineData = null;
while((lineData = reader.readLine())!=null){
System.out.println("line " + line + ": " + lineData);
String[] array = lineData.split("\t");
String id = String.valueOf(array[0].trim());
String name = String.valueOf(array[1].trim());
// 第一个参数表示集群名称,第二个是类型,都是自定义名字
// 第三个字段为你想要构建的索引,通过name查出来id,id为hbase的rowkey
bulk.add(client.prepareIndex("indexs" , "index" , name)
.setSource(XContentFactory.jsonBuilder().startObject()
.field("id" , id)
.field("name" , name)
.endObject()
));
line++;
}
BulkResponse responses = bulk.get();
}
// 根据索引获取rowkey
public static String getDocById(String index, String type, String name){
Map<String, Object> source = client.prepareGet(index,type,name).get().getSource();
String id= (String) source.get("id");
return id;
}
// 获取hbase表的实例
public static HTable getTable(String tableName) throws IOException {
HTable htable = null;
TableName table = TableName.valueOf(tableName);
htable = (HTable) connection.getTable(table);
return htable;
}
// 在hbase中查询
public static Result searchByRowKey(String tableName, String rowkey) throws IOException {
Result result = null;
HTable htable = null;
TableName table = TableName.valueOf(tableName);
htable = (HTable) connection.getTable(table);
Get get = new Get(rowkey.getBytes());
result = htable.get(get);
return result;
}
public static void main(String[] args) throws IOException {
long start = System.currentTimeMillis();
String docById = getDocById("indexs", "index", "wang");
System.out.println("Es用时:" +(System.currentTimeMillis() - start) + "ms");
long time2 = System.currentTimeMillis();
searchByRowKey("aiwen_ip_info",docById);
System.out.println("Hbase用时:" +(System.currentTimeMillis() - time2) + "ms");
System.out.println("总耗时: " + (System.currentTimeMillis() - start) + "ms");
}
}