springBoot直接集成Hbase2.1


上次分享了基于spring的starter集成hbase的,说过要分享直接对接的。
一、pom引包

<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-client</artifactId>
    <version>3.0.0</version>
    <exclusions>
        <exclusion>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
        </exclusion>
        <exclusion>
            <groupId>log4j</groupId>
            <artifactId>log4j</artifactId>
        </exclusion>
        <exclusion>
            <groupId>javax.servlet</groupId>
            <artifactId>servlet-api</artifactId>
        </exclusion>
    </exclusions>
</dependency>
<dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-shaded-client</artifactId>
    <version>2.1.0</version>
</dependency>
<dependency>
   <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-common</artifactId>
    <version>3.0.0</version>
</dependency>

这里要说的有3点
1、hbase-shaded-client这个包比原生的hbase-client包好处是回将hbase-client包对应的版本整合一起
2、hadoop-common这个包要引入,不然可能会在表操作时报错
3、注意要在环境变量配置HADOOP_HOME,建一个hadoop目录,目录建一个bin,里面放Hadoop-common(网上下载)离对应版本的文件

二、配置类
HBaseConfig

import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

import java.io.IOException;
import java.util.Map;
import java.util.Set;

/**
 * @author zhengwen
 **/
@Configuration
@EnableConfigurationProperties(HBaseProperties.class)
public class HBaseConfig {
    private final HBaseProperties properties;

    public HBaseConfig(HBaseProperties properties) {
        this.properties = properties;
    }

    @Bean
    public Connection hbaseConnection() {
        Connection conn = null;
        try {
            conn = ConnectionFactory.createConnection(configuration());
        } catch (IOException e) {
            System.out.println(e.getMessage());
        }
        return conn;
    }

    public org.apache.hadoop.conf.Configuration configuration() {
        org.apache.hadoop.conf.Configuration configuration = HBaseConfiguration.create();
        Map<String, String> config = properties.getConfig();
        Set<String> keySet = config.keySet();
        for (String key : keySet) {
            configuration.set(key, config.get(key));
        }
        return configuration;
    }
}

HBaseProperties

import org.springframework.boot.context.properties.ConfigurationProperties;

import java.util.Map;

/**
 * @author zhengwen
 **/
@ConfigurationProperties(prefix = "hbase")
public class HBaseProperties {
    private Map<String, String> config;

    public Map<String, String> getConfig() {
        return config;
    }

    public void setConfig(Map<String, String> config) {
        this.config = config;
    }
}

HbaseUtil


import com.google.common.io.Resources;
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;

import java.io.File;
import java.io.IOException;

/**
 * @author zhengwen
 **/
@Slf4j
public class HbaseUtil {
    /**
     * 判断表是否存在
     * @param admin hbase连接admin
     * @param tableName 表对象
     * @return 布尔值,true是,false否
     */
    public static boolean checkTableExist(Admin admin, TableName tableName) throws IOException {
        if (admin == null || tableName == null){
            return false;
        }
        return admin.tableExists(tableName);
    }

    /**
     * 初始化连接
     * @param hbaseConf
     * @param hbaseConnection
     */
    public static Admin initConn(Configuration hbaseConf, Connection hbaseConnection) {
        //这里可以通过xml补充配置,这个hbase-site.xml在hbase的安装目录去找
        Admin admin = null;
        hbaseConf.addResource(Resources.getResource("hbase-site.xml"));
        File workaround = new File(".");
        System.getProperties().put("hadoop.home.dir",
                workaround.getAbsolutePath());
        new File("./bin").mkdirs();
        try {
            new File("./bin/winutils.exe").createNewFile();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
            log.error("----初始hbase连接异常:{}",e.getMessage(),e);
        }
        try {
            hbaseConnection = ConnectionFactory.createConnection(hbaseConf);
            admin = hbaseConnection.getAdmin();
        } catch (IOException e) {
            e.printStackTrace();
            log.error("---获取hbase admin异常:{}",e.getMessage(),e);
        }
        return admin;
    }

    /**
     * 关闭连接
     * @param admin
     * @param hbaseConnection
     */
    public static void closeConn(Admin admin,Connection hbaseConnection) {
        try {
            if (null != admin) {
                admin.close();
            }
            /* 一定注意,这里这个连接不能关闭,否则后面就都没有了,让spring自己管理,
            我就呗这里坑了半天
            if (null != hbaseConnection) {
                hbaseConnection.close();
            }*/
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

三、使用


import com.alibaba.fastjson.JSON;
import com.fillersmart.g20.dc.collect.store.constant.Constant;
import com.fillersmart.g20.dc.collect.store.model.HbaseTable;
import com.fillersmart.g20.dc.collect.store.service.HbaseStoreService;
import com.fillersmart.g20.dc.collect.store.util.HbaseDataUtil;
import com.fillersmart.g20.dc.collect.store.util.HbaseUtil;
import com.fillersmart.g20.dc.core.data.HbaseSaveData;
import com.fillersmart.g20.dc.core.result.Result;
import com.fillersmart.g20.dc.core.result.ResultGenerator;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;

import javax.annotation.Resource;
import java.util.*;

/**
 * @author zhengwen
 **/
@Slf4j
@Service
public class HbaseStoreServiceImpl implements HbaseStoreService {

    @Autowired
    private Connection hbaseConnection;

    @Override
    public Result<?> createTable(HbaseTable hbaseTable) {

        String tableNameStr = hbaseTable.getTableName();
        Admin admin = null;
        try {
            Configuration hbaseConf = hbaseConnection.getConfiguration();
            admin = HbaseUtil.initConn(hbaseConf, hbaseConnection);

            TableName tableName = TableName.valueOf(tableNameStr);
            List<Map<String, String>> columnMpList = hbaseTable.getColumnMapList();

            boolean isExist = HbaseUtil.checkTableExist(admin, tableName);

            if (isExist) {
                log.info("----表:{}已经存在", tableNameStr);
                return ResultGenerator.genFailResult(tableName + "表已经存在");
            } else {
                //入参转列簇信息
                List<ColumnFamilyDescriptor> columnFamilyDescriptorList = new ArrayList<>();
                for (Map<String, String> mp : columnMpList) {
                    Set<String> keySet = mp.keySet();
                    Iterator<String> it = keySet.iterator();
                    while (it.hasNext()) {
                        String key = it.next();
                        if ("colEn".equals(key)) {
                            String colEn = mp.get(key);

                            ColumnFamilyDescriptor columnFamilyDescriptor = ColumnFamilyDescriptorBuilder.newBuilder(colEn.getBytes()).build();
                            columnFamilyDescriptorList.add(columnFamilyDescriptor);

                        }
                    }
                }
                TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName).setColumnFamilies(columnFamilyDescriptorList).build();
                admin.createTable(tableDescriptor);
            }

        } catch (Exception e) {
            e.printStackTrace();
            log.error("---创建表:{}异常,原因:{}", tableNameStr, e.getCause().getMessage(), e);
            return ResultGenerator.genFailResult(e.getCause().getMessage());
        } finally {
            HbaseUtil.closeConn(admin, hbaseConnection);
        }

        return ResultGenerator.genSuccessResult();
    }

    @Override
    public Result<?> saveData(HbaseSaveData hbaseSaveData) {
        String tableNameStr = hbaseSaveData.getTableName();
        List<Map<String, String>> dataMpList = hbaseSaveData.getDataList();
        Admin admin = null;
        try {
            Configuration hbaseConf = hbaseConnection.getConfiguration();
            admin = HbaseUtil.initConn(hbaseConf, hbaseConnection);

            //record
            String recordTableNameStr = tableNameStr + "_record";
            TableName recordTableName = TableName.valueOf(recordTableNameStr);
            boolean tableExist = HbaseUtil.checkTableExist(admin, recordTableName);
            if (tableExist) {
                Table table = hbaseConnection.getTable(recordTableName);

                if (CollectionUtils.isEmpty(dataMpList)) {
                    return ResultGenerator.genFailResult("保存数据为空");
                }
                List<Put> putList = new ArrayList<>();
                for (Map<String, String> dataMp : dataMpList) {
                    putList = HbaseDataUtil.initDataList(dataMp, Constant.RECORD_SAVE);
                }
                if (CollectionUtils.isEmpty(putList)) {
                    log.error("---记录数据存储数据初始异常:{}", JSON.toJSONString(putList));
                } else {
                    table.put(putList);
                }
                table.close();
                log.info("----表:{}数据存储HBase成功",recordTableName.getNameAsString());
            } else {
                log.error("---表:{}不存在,无法保存", recordTableNameStr);
                return ResultGenerator.genFailResult(recordTableNameStr + "表不存在,无法保存");
            }

            //最新记录
            String shadowTableNameStr = tableNameStr + "_shadow";
            TableName shadowTableName = TableName.valueOf(shadowTableNameStr);
            tableExist = HbaseUtil.checkTableExist(admin, shadowTableName);
            if (tableExist) {
                Table table = hbaseConnection.getTable(shadowTableName);

                if (CollectionUtils.isEmpty(dataMpList)) {
                    return ResultGenerator.genFailResult("保存数据为空");
                }
                List<Put> putList = new ArrayList<>();
                for (Map<String, String> dataMp : dataMpList) {
                    putList = HbaseDataUtil.initDataList(dataMp, Constant.SHADOW_SAVE);
                }
                if (CollectionUtils.isEmpty(putList)) {
                    log.error("---shadow最新数据存储数据初始异常:{}", JSON.toJSONString(putList));
                } else {
                    table.put(putList);
                }
                table.close();
                log.info("----表:{}数据存储HBase成功",shadowTableName.getNameAsString());
            } else {
                log.error("---表:{}不存在,无法保存", shadowTableNameStr);
                return ResultGenerator.genFailResult(shadowTableNameStr + "表不存在,无法保存");
            }
        } catch (Exception e) {
            e.printStackTrace();
            log.error("---Hbase数据表:{}保存数据异常,原因:{}", tableNameStr, e.getMessage(), e);
            return ResultGenerator.genFailResult(tableNameStr + "数据存储失败");
        } finally {
            HbaseUtil.closeConn(admin, hbaseConnection);
        }

        return ResultGenerator.genSuccessResult();
    }

    @Override
    public Result<?> restCreateTable(HbaseTable hbaseTable) {
        String tableNameStr = hbaseTable.getTableName();

        List<Map<String, String>> columnMpList = hbaseTable.getColumnMapList();

        if (StringUtils.isBlank(tableNameStr)) {
            return ResultGenerator.genFailResult("表名不能为空");
        }
        //TODO 怎么判断表是否存在?
        //表参数转


        return null;
    }
}

这里就直接贴一个service的实现类大家参考,里面也就写了建表与数据保存的。这里要说的关键点有3点:
1、hbase的建表时不需要考虑列,只需要先设置好列簇就行
2、hbase的数据保存类似jdbc,素有数据都可以转为map,key是字段名,value是值,批量插入
3、注意存储对象转map不能有嵌套对象,当然如果真有也不是不可以,也就是字段离存的json字符串而已
四、补充
hbase-site.xml,这个其实可以不要,如果你不要就是默认hbase的安装离得默认配置,要就是可以使用这里面得配置覆盖安装离的配置

<?xml version="1.0" encoding="UTF-8"?>

<!--Autogenerated by Cloudera Manager-->
<configuration>
  <property>
    <name>hbase.rootdir</name>
    <value>hdfs://bigdata01:8020/hbase</value>
  </property>
  <property>
    <name>hbase.client.write.buffer</name>
    <value>2097152</value>
  </property>
  <property>
    <name>hbase.client.pause</name>
    <value>100</value>
  </property>
  <property>
    <name>hbase.client.retries.number</name>
    <value>10</value>
  </property>
  <property>
    <name>hbase.client.scanner.caching</name>
    <value>100</value>
  </property>
  <property>
    <name>hbase.client.keyvalue.maxsize</name>
    <value>10485760</value>
  </property>
  <property>
    <name>hbase.ipc.client.allowsInterrupt</name>
    <value>true</value>
  </property>
  <property>
    <name>hbase.client.primaryCallTimeout.get</name>
    <value>10</value>
  </property>
  <property>
    <name>hbase.client.primaryCallTimeout.multiget</name>
    <value>10</value>
  </property>
  <property>
    <name>hbase.client.scanner.timeout.period</name>
    <value>60000</value>
  </property>
  <property>
    <name>hbase.coprocessor.region.classes</name>
    <value>org.apache.hadoop.hbase.security.access.SecureBulkLoadEndpoint</value>
  </property>
  <property>
    <name>hbase.regionserver.thrift.http</name>
    <value>false</value>
  </property>
  <property>
    <name>hbase.thrift.support.proxyuser</name>
    <value>false</value>
  </property>
  <property>
    <name>hbase.rpc.timeout</name>
    <value>60000</value>
  </property>
  <property>
    <name>hbase.snapshot.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>hbase.snapshot.region.timeout</name>
    <value>300000</value>
  </property>
  <property>
    <name>hbase.snapshot.master.timeout.millis</name>
    <value>300000</value>
  </property>
  <property>
    <name>hbase.security.authentication</name>
    <value>simple</value>
  </property>
  <property>
    <name>hbase.rpc.protection</name>
    <value>authentication</value>
  </property>
  <property>
    <name>zookeeper.session.timeout</name>
    <value>60000</value>
  </property>
  <property>
    <name>zookeeper.znode.parent</name>
    <value>/hbase</value>
  </property>
  <property>
    <name>zookeeper.znode.rootserver</name>
    <value>root-region-server</value>
  </property>
  <property>
    <name>hbase.zookeeper.quorum</name>
    <value>bigdata03,bigdata01,bigdata02</value>
  </property>
  <property>
    <name>hbase.zookeeper.property.clientPort</name>
    <value>2181</value>
  </property>
  <property>
    <name>hbase.rest.ssl.enabled</name>
    <value>false</value>
  </property>
</configuration>

最后这里说下,我们都是使用的是别名,所以要配置host,不然这些个bigdata01、bigdata02都不知道是什么地址的。晚上都推荐用SwitchHosts,但是我在win10上改来改去狠容易把原host文件丢失,还是使用早些年在卓尔电商做支付,同事们都用的一个小程序方便。在win10上依然可用,不错。
然后呢我这里是没有加一些封装好的接口的,下次分享hbaseApi,顺便把相关的封装的接口分享学习下,下次的集成方式也将又不一样哦。
这里没有用最新版本是因为项目上使用的就是3.0 + 2.1。同学们可以自己更换版本玩。

你可能感兴趣的:(框架搭建,Hbase2.1集成,原生集成hbase,大数据)