使用ES作为hbase的二级索引进行查询

先上代码,ES的工具类

/**
 * elasticsearch 工具类
 */
public class elasticsearchUtil {
    private static TransportClient client = null;
    private static Settings build = null;

    static {
        build = Settings.builder()
                .put("cluster.name", "elasticsearch-cluster") //ES集群名称
                .put("client.transport.sniff", true) //自动嗅探整个集群的状态,把集群中其他ES节点的ip添加到本地的客户端列表中
                .build();
        try {
            client = new PreBuiltTransportClient(build).addTransportAddress(new TransportAddress(InetAddress.getByName("hadoop03"),9300));
            System.out.println("elasticsearch集群连接成功");
        } catch (UnknownHostException e) {
            e.printStackTrace();
        }
    }

    /**
     * 查看集群健康状态
     */
    public static void clusterInfo(){
        AdminClient admin = client.admin();
        ClusterHealthResponse clusterHealthResponse = admin.cluster().prepareHealth().get();
        String clusterName = clusterHealthResponse.getClusterName();
        int numberOfDataNodes = clusterHealthResponse.getNumberOfDataNodes();
        int numberOfNodes = clusterHealthResponse.getNumberOfNodes();
        ClusterHealthStatus status = clusterHealthResponse.getStatus();
        System.out.println("集群名称:" + clusterName + "\n" + "集群数据节点数:" + numberOfDataNodes + "\n" + "集群总共节点数:" + numberOfNodes + "\n" +"集群状态:" + status);
    }

    /**
     * 将文件中的数据批量导入ES中
     * 就是hbase的二级索引
     */
    public static void bulkData(){
        BulkRequestBuilder bulk = client.prepareBulk();
        File file = new File("C:\\Users\\Administrator\\Desktop\\resoult.txt");
        BufferedReader reader = null;
        System.out.println("以行为单位读取文件内容,一次读一整行:");
        InputStreamReader insReader = null;
        try {
            insReader = new InputStreamReader( new FileInputStream(file), "UTF-8");
            reader = new BufferedReader(insReader);
            int line = 1;
            String lineData = null;
            while ((lineData = reader.readLine()) != null) {
                // 显示行号
                System.out.println("line " + line + ": " + lineData);
                String[] array =  lineData.split("\t");
                Long posted_day = Long.valueOf(array[0].trim());
                String matnr = String.valueOf(array[1].trim());
                String matnr_desc = String.valueOf(array[2].trim());
                String matnr_grp = String.valueOf(array[3].trim());
                String brand = String.valueOf(array[4].trim());
                String model = String.valueOf(array[5].trim());
                String prov_name = String.valueOf(array[6].trim());
//                Integer prov_code = Integer.valueOf(array[7].trim());
                String cust_name = String.valueOf(array[8].trim());
                Long cust_code = Long.valueOf(array[9].trim());
//                Integer sc_cnt_sum = Integer.valueOf(array[10].trim());
//                Integer act_cnt_sum = Integer.valueOf(array[11].trim());
//                Integer other_sum = Integer.valueOf(array[12].trim());
//                String act_percent = String.valueOf(array[13].trim());
//                Integer act_unknow_sum = Integer.valueOf(array[14].trim());

                bulk.add(client.prepareIndex("channel_active" , "report" , String.valueOf(line))
                        .setSource(XContentFactory.jsonBuilder().startObject()
                                .field("posted_day" , posted_day)
                                .field("matnr" , matnr)
                                .field("matnr_desc" , matnr_desc)
                                .field("matnr_grp" , matnr_grp)
                                .field("brand" , brand)
                                .field("model" , model)
                                .field("prov_name" , prov_name)
//                                .field("prov_code" , prov_code)
                                .field("cust_name" , cust_name)
                                .field("cust_code" , cust_code)
//                                .field("sc_cnt_sum" , sc_cnt_sum)
//                                .field("act_cnt_sum" , act_cnt_sum)
//                                .field("other_sum" , other_sum)
//                                .field("act_percent" , act_percent)
//                                .field("act_unknow_sum" , act_unknow_sum)
                                .endObject()
                        ));
                line++;
            }
            BulkResponse responses = bulk.get();
            String status = responses.status().name();
            System.out.println(status);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 根据某个字段精确匹配查询
     * @param index
     * @param type
     * @param field
     * @param value
     * @return 返回获取到的rowkey(id)
     */
    public static ArrayList preciseSearchByField(String index, String type , String field , String value){
        SearchResponse response = client.prepareSearch(index).setTypes(type)
                .setQuery(QueryBuilders.matchQuery(field, value)).setSize(1222).get();
        SearchHit[] hits = response.getHits().getHits();
        ArrayList idList = new ArrayList();
        for(int i = 0 ; i < hits.length ; i++){
//            System.out.println("第" + i + "个:" + hits[i].getSourceAsString());
            idList.add(hits[i].getId());
        }
        return idList;
    }

    /**
     * 根据ES中id获取document
     * @param index
     * @param type
     * @param id
     */
    public static void getDocById(String index, String type , String id){
        GetResponse documentFields = client.prepareGet(index, type, id).get();
        Map source = documentFields.getSource();

        Set sourceKeys = source.keySet();
        for(String s : sourceKeys){
            Object o = source.get(s);
            System.out.println(s + ":::" + o);
        }
    }

    public static void MutilSearch(){
        SearchRequestBuilder srb=client.prepareSearch("channel_active").setTypes("matnr");
        QueryBuilder queryBuilder=QueryBuilders.matchPhraseQuery("matnr", "1032015903014110");
        QueryBuilder queryBuilder2=QueryBuilders.matchPhraseQuery("prov_name","浙江");
        SearchResponse sr=srb.setQuery(QueryBuilders.boolQuery()
                .must(queryBuilder)
                .mustNot(queryBuilder2))
                .execute()
                .actionGet();
        SearchHits hits=sr.getHits();
        for(SearchHit hit:hits){
            System.out.println(hit.getSourceAsString());
        }
    }


    public static void main(String[] args) {
//        clusterInfo();
//        bulkData();
//        preciseSearchByField("channel_active" , "report" ,"matnr" , "1006084301017310");
//        getDocById("channel_active" , "report" , "14");
        MutilSearch();
    }
}

hbase的工具类:

/**
 * hbase工具类
 */
public class hbaseUtil {
    private static Admin admin = null ;
    private static Configuration conf = null;
    private static Connection connection = null;


    static{
        try {
        conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum" , "hadoop03,hadoop04,hadoop05");
        conf.set("hbase.zookeeper.property.clientPort","2181");
//        conf.addResource(ConfigurationManager.getProperty("hbase-site"));
//        conf.addResource(ConfigurationManager.getProperty("core-site"));
        connection = ConnectionFactory.createConnection(conf);
        admin = new HBaseAdmin(conf);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * Hbase获取所有的表
     * @return
     */
    public static List getAllTables() {
        List tables = null;
        if (admin != null) {
            try {
                HTableDescriptor[] allTable = admin.listTables();
                if (allTable.length > 0)
                    tables = new ArrayList();
                for (HTableDescriptor hTableDescriptor : allTable) {
                    tables.add(hTableDescriptor.getNameAsString());
                    System.out.println(hTableDescriptor.getNameAsString());
                }
            }catch (IOException e) {
                e.printStackTrace();
            }
        }
        return tables;
    }

    /**
     * 创建表
     * @param tableName
     */
    public static void createTable(String tableName){
        try {
            if(isExistsTable(tableName)){
                System.out.println(tableName + ":表已经存在");
            }else{
                Admin admin = connection.getAdmin();
                HTableDescriptor table = new HTableDescriptor(TableName.valueOf(tableName));
                table.addFamily(new HColumnDescriptor("mycf"));
                System.out.println("新建表");
                admin.createTable(table);
                System.out.println("建表完成");
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 判断表是否存在
     * @param tableName
     * @return
     */
    public static Boolean isExistsTable(String tableName){
        boolean isExists = true;
        try {
            isExists = admin.tableExists(TableName.valueOf(tableName));
        } catch (IOException e) {
            e.printStackTrace();
        }
        return isExists;
    }

    /**
     * 删除表
     * @param tableName
     */
    public static void deleteTable(String tableName){
        TableName table = TableName.valueOf(tableName);
        if(isExistsTable(tableName)){
            try {
                admin.disableTable(table);
                admin.deleteTable(table);
                System.out.println(tableName + "表已成功删除");
            } catch (IOException e) {
                e.printStackTrace();
            }
        }else{
            System.out.println(tableName + "表不存在!");
        }
    }

    /**
     * 获取hbase表的实例
     * @param tableName
     * @return
     */
    public static HTable getTable(String tableName){
        HTable Htable = null;
        try {
        TableName table = TableName.valueOf(tableName);
        Htable = (HTable) connection.getTable(table);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return Htable;
    }

    /**
     * 批量插入数据到hbase:将文件中的数据全部插入,注意,是不带标题的文本
     * @param tableName
     * @param cf
     */
    public static void bulkPutData(String tableName, String cf){
        File file = new File("C:\\Users\\Administrator\\Desktop\\resoult.txt");
        HTable table = getTable(tableName);
        try {
            List putLists = new ArrayList();
            int bestBathPutSize = 3177; //3177不是我杜撰的,是2*hbase.client.write.buffer/put.heapSize()计算出来的
            BufferedReader reader = null;
            System.out.println("以行为单位读取文件内容,一次读一整行:");
            InputStreamReader insReader = new InputStreamReader( new FileInputStream(file), "UTF-8");
            reader = new BufferedReader(insReader);
            String lineData = null;
            int line = 1;
            while ((lineData = reader.readLine()) != null) {
                // 显示行号
                System.out.println("line " + line + ": " + lineData);
                String[] array =  lineData.split("\t");
                String posted_day = String.valueOf(array[0]);
                String matnr = String.valueOf(array[1]);
                String matnr_desc = String.valueOf(array[2]);
                String matnr_grp = String.valueOf(array[3]);
                String brand = String.valueOf(array[4]);
                String model = String.valueOf(array[5]);
                String prov_name = String.valueOf(array[6]);
                String prov_code = String.valueOf(array[7]);
                String cust_name = String.valueOf(array[8]);
                String cust_code = String.valueOf(array[9]);
                String sc_cnt_sum =    String.valueOf(array[10]);
                String act_cnt_sum = String.valueOf(array[11]);
                String other_sum = String.valueOf(array[12]);
                String act_percent = String.valueOf(array[13]);
                String act_unknow_sum = String.valueOf(array[14]);

                Put put=new Put(String.valueOf(line).getBytes());
                put.addColumn(cf.getBytes() , "posted_day".getBytes() , posted_day.getBytes("UTF-8"));
                put.addColumn(cf.getBytes() , "matnr".getBytes() , matnr.getBytes("UTF-8"));
                put.addColumn(cf.getBytes() , "matnr_desc".getBytes() , matnr_desc.getBytes("UTF-8"));
                put.addColumn(cf.getBytes() , "matnr_grp".getBytes() , matnr_grp.getBytes("UTF-8"));
                put.addColumn(cf.getBytes() , "brand".getBytes() , brand.getBytes("UTF-8"));
                put.addColumn(cf.getBytes() , "model".getBytes() , model.getBytes("UTF-8"));
                put.addColumn(cf.getBytes() , "prov_name".getBytes() , prov_name.getBytes("UTF-8"));
                put.addColumn(cf.getBytes() , "prov_code".getBytes() , prov_code.getBytes("UTF-8"));
                put.addColumn(cf.getBytes() , "cust_name".getBytes() , cust_name.getBytes("UTF-8"));
                put.addColumn(cf.getBytes() , "cust_code".getBytes() , cust_code.getBytes("UTF-8"));
                put.addColumn(cf.getBytes() , "sc_cnt_sum".getBytes() , sc_cnt_sum.getBytes("UTF-8"));
                put.addColumn(cf.getBytes() , "act_cnt_sum".getBytes() , act_cnt_sum.getBytes("UTF-8"));
                put.addColumn(cf.getBytes() , "other_sum".getBytes() , other_sum.getBytes("UTF-8"));
                put.addColumn(cf.getBytes() , "act_percent".getBytes() , act_percent.getBytes("UTF-8"));
                put.addColumn(cf.getBytes() , "act_unknow_sum".getBytes() , act_unknow_sum.getBytes("UTF-8"));
                put.setDurability(Durability.SKIP_WAL); //此次写数据关闭WAL
                putLists.add(put);
                if(putLists.size()==bestBathPutSize){
                    //达到最佳大小值了,马上提交一把
                    table.put(putLists);
                    putLists.clear();
                    putLists.clear();
                }
                line++;
            }
            reader.close();
            //剩下的未提交数据,最后做一次提交
            getTable(tableName).put(putLists) ;
        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            try {
                table.close();
                connection.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * 单条数据插入hbase
     * @param tableName
     * @param rowkey
     * @param cf
     * @param column
     * @param value
     */
    public static void put(String tableName, String rowkey, String cf, String column, String value) {
        HTable table = getTable(tableName);
        Put put = new Put(Bytes.toBytes(rowkey));
        put.add(Bytes.toBytes(cf), Bytes.toBytes(column), Bytes.toBytes(value));
        try {
            table.put(put);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 根据rowkey查询hbase,返回result对象
     * @param tableName
     * @param rowkey
     * @return
     */
    public static Result searchByRowkey(String tableName , String rowkey){
        Result result = null;
        try {
            HTable table = getTable(tableName);
            Get get = new Get(rowkey.getBytes());
            result = table.get(get);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return result;
    }

    /**
     * 将rowkey集合查询返回result集合
     * @param tableName
     * @param rowkey
     * @return
     */
    public static ArrayList searchByRowkeyList(String tableName , ArrayList rowkey){
        Result result = null;
        ArrayList ResultList = new ArrayList();
        for(String rk : rowkey){
            try {
                HTable table = getTable(tableName);
                Get get = new Get(rk.getBytes());
                result = table.get(get);
                ResultList.add(result);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        System.out.println("hbase 结果大小:" + ResultList.size());
        return ResultList;
    }

    /**
     * 将result对象转化为map输出
     * @param result
     * @return
     */
    public static Map result2Map(Result result){
        Map map = new HashMap();
        if (result != null && result.listCells() != null) {
            for (Cell cell : result.listCells()) {
                String key = Bytes.toString(CellUtil.cloneQualifier(cell));
                String value = Bytes.toString(CellUtil.cloneValue(cell));
                System.out.println(key + " => " + value);
                map.put(key, value);
            }
        }
        return map;
    }

    /**
     * 通过rowkey查询指定某个字段的值
     * @param tableName
     * @param rowkey
     * @param qualifier
     * @return
     */
    public static byte[] searchQualifierByrowkey(String tableName, String rowkey, String qualifier) {
        System.out.println("get result. table=" + tableName + " rowkey=" + rowkey + " qualifier=" + qualifier);
        Result result = searchByRowkey(tableName, rowkey);
        if (result != null && result.listCells() != null) {
            for (Cell cell : result.listCells()) {
                String key = Bytes.toString(CellUtil.cloneQualifier(cell));
                if (key.equals(qualifier)) {
                    String value = Bytes.toString(CellUtil.cloneValue(cell));
                    System.out.println(key + " => " + value);
                    return CellUtil.cloneValue(cell);
                }
            }
        }
        return null;
    }

    /**
     * 查看hbase表中的全部数据
     * @param tableName
     * @return
     */
    public static List> scan(String tableName) {
        System.out.println("scan table " + tableName);
        try {
            HTable table = getTable(tableName);
            Scan scan = new Scan();
            ResultScanner rs = table.getScanner(scan);
            List> resList = new ArrayList>();
            for (Result r : rs) {
                Map m = result2Map(r);
                StringBuilder sb = new StringBuilder();
                for(String k : m.keySet()) {
                    sb.append(k).append("=>").append(m.get(k)).append(" ");
                }
                System.out.println(sb.toString());
                resList.add(m);
            }
            return resList;
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    public static void main(String[] args) {
        getAllTables();
//        createTable("test1");
//        deleteTable("test");

//        put("test1" , "rowkey001" , "mycf" , "info" , "1111111111");

//        long start = System.currentTimeMillis();
//        bulkPutData("test1" , "mycf");
//        System.out.println(System.currentTimeMillis() - start);

//        result2Map(searchByRowkey("test1", "9999"));
//        searchQualifierByrowkey("test1" , "9999" , "cust_name");
//        scan("test1");
        result2Map(searchByRowkey("test1" , "14"));
    }
}
/**
 * 通过ES查询hbase
 */
public class searchHbaseByES {
    public static void main(String[] args) {
        long start = System.currentTimeMillis();
        ArrayList rowkeyList = elasticsearchUtil.preciseSearchByField("channel_active", "report", "matnr", "1006084301017110");
        System.out.println("ES 结果大小:" + rowkeyList.size());
//        for(String list :rowkeyList){
//            System.out.println(list);
//        }

        ArrayList results = hbaseUtil.searchByRowkeyList("test1", rowkeyList);
        System.out.println("11111111111111111:::" + results.size());
        for(Result result : results){
            hbaseUtil.result2Map(result);
        }
        System.out.println(System.currentTimeMillis() - start);

    }
}

pom.xml 依赖:


  org.apache.hbase
  hbase-server
  1.3.3


  org.elasticsearch.client
  transport
  6.1.3

在自己的测试集群里实现ES作为hbase的二级索引来查询,查询条件先查ES,ES返回rowkey,然后再用rowkey去查hbase,返回结果。

还未实现分页。待更新。。。。

 

 

 

 

 

 

 

 

 

 

 

 

 

你可能感兴趣的:(使用ES作为hbase的二级索引进行查询)