使用Es构建hbase的二级索引

ES版本为7.6.2
Hbase版本为2.1.0

思路:先将hbase的rowkey当做索引存入到ES中 ,根据habse中的字段来读取出ES中索引,再去habse查找响应的数据
用到的依赖:

<dependencies>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>3.1.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>3.1.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>3.1.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
            <version>3.1.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-common</artifactId>
            <version>3.1.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>2.1.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-endpoint</artifactId>
            <version>2.2.3</version>
        </dependency>

        <dependency>
        <groupId>org.elasticsearch.client</groupId>
        <artifactId>transport</artifactId>
        <version>6.8.0</version>
    </dependency>
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-core</artifactId>
            <version>2.9.1</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
        </dependency>
        <dependency>
            <groupId>io.netty</groupId>
            <artifactId>netty-all</artifactId>
            <version>4.1.17.Final</version>
        </dependency>

    </dependencies>

代码:

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.AdminClient;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.cluster.health.ClusterHealthStatus;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import java.io.*;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Map;

public  class EsUtils {
    private static TransportClient client = null;
    private static Settings build = null;
    private static Admin admin = null ;
    private static Configuration conf = null;
    private static Connection connection = null;

    static {
        build = Settings.builder().put("cluster.name","ES").put("client.transport.sniff", true).build();
        // java连接ES的端口是9300
        try {
            client = new PreBuiltTransportClient(build).addTransportAddress(new TransportAddress(InetAddress.getByName("cdh1.host.com"),9300));
            System.out.println("elasticsearch集群连接成功");
            conf = HBaseConfiguration.create();
            conf.set("hbase.zookeeper.quorum" , "192.168.122.163");
            conf.set("hbase.zookeeper.property.clientPort","2181");
            connection = ConnectionFactory.createConnection(conf);
            admin = ConnectionFactory.createConnection(conf).getAdmin();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    // 通过读取本地文件插入ES索引
    public static void buldData() throws IOException {
        BulkRequestBuilder bulk = client.prepareBulk();
        File file = new File("C:\\Users\\Administrator\\Desktop\\index.txt");
        BufferedReader reader = null;
        InputStreamReader insReader = null;
        insReader = new InputStreamReader( new FileInputStream(file), "UTF-8");
        reader = new BufferedReader(insReader);
        int line = 1;
        String lineData = null;
        while((lineData = reader.readLine())!=null){
            System.out.println("line " + line + ": " + lineData);
            String[] array =  lineData.split("\t");
            String id = String.valueOf(array[0].trim());
            String  name = String.valueOf(array[1].trim());
            // 第一个参数表示集群名称,第二个是类型,都是自定义名字
            // 第三个字段为你想要构建的索引,通过name查出来id,id为hbase的rowkey
            bulk.add(client.prepareIndex("indexs" , "index" , name)
                    .setSource(XContentFactory.jsonBuilder().startObject()
                            .field("id" , id)
                            .field("name" , name)
                            .endObject()
                    ));
            line++;
        }
        BulkResponse responses = bulk.get(); 
    }

    // 根据索引获取rowkey 
    public static  String  getDocById(String index, String type, String name){
        Map<String, Object> source   = client.prepareGet(index,type,name).get().getSource();
        String  id= (String) source.get("id");
        return  id;

    }
    // 获取hbase表的实例
    public static HTable getTable(String tableName) throws IOException {
        HTable htable = null;
        TableName table = TableName.valueOf(tableName);
        htable = (HTable) connection.getTable(table);
        return htable;
    }
    // 在hbase中查询
    public static Result searchByRowKey(String tableName, String rowkey) throws IOException {
        Result result = null;
        HTable htable = null;
        TableName table = TableName.valueOf(tableName);
        htable = (HTable) connection.getTable(table);
        Get get = new Get(rowkey.getBytes());
        result = htable.get(get);
        return result;
    }
    public static void main(String[] args) throws IOException {
        long start = System.currentTimeMillis();
        String docById = getDocById("indexs", "index", "wang");
        System.out.println("Es用时:" +(System.currentTimeMillis() - start) + "ms");
        long time2 = System.currentTimeMillis();
        searchByRowKey("aiwen_ip_info",docById);
        System.out.println("Hbase用时:" +(System.currentTimeMillis() - time2) + "ms");
        System.out.println("总耗时: " + (System.currentTimeMillis() - start) + "ms");
    }
}

你可能感兴趣的:(大数据)