序
上次分享了基于spring的starter集成hbase的,说过要分享直接对接的。
一、pom引包
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.0.0</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-shaded-client</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.0.0</version>
</dependency>
这里要说的有3点
1、hbase-shaded-client这个包比原生的hbase-client包好处是回将hbase-client包对应的版本整合一起
2、hadoop-common这个包要引入,不然可能会在表操作时报错
3、注意要在环境变量配置HADOOP_HOME,建一个hadoop目录,目录建一个bin,里面放Hadoop-common(网上下载)离对应版本的文件
二、配置类
HBaseConfig
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.io.IOException;
import java.util.Map;
import java.util.Set;
/**
* @author zhengwen
**/
@Configuration
@EnableConfigurationProperties(HBaseProperties.class)
public class HBaseConfig {
private final HBaseProperties properties;
public HBaseConfig(HBaseProperties properties) {
this.properties = properties;
}
@Bean
public Connection hbaseConnection() {
Connection conn = null;
try {
conn = ConnectionFactory.createConnection(configuration());
} catch (IOException e) {
System.out.println(e.getMessage());
}
return conn;
}
public org.apache.hadoop.conf.Configuration configuration() {
org.apache.hadoop.conf.Configuration configuration = HBaseConfiguration.create();
Map<String, String> config = properties.getConfig();
Set<String> keySet = config.keySet();
for (String key : keySet) {
configuration.set(key, config.get(key));
}
return configuration;
}
}
HBaseProperties
import org.springframework.boot.context.properties.ConfigurationProperties;
import java.util.Map;
/**
* @author zhengwen
**/
@ConfigurationProperties(prefix = "hbase")
public class HBaseProperties {
private Map<String, String> config;
public Map<String, String> getConfig() {
return config;
}
public void setConfig(Map<String, String> config) {
this.config = config;
}
}
HbaseUtil
import com.google.common.io.Resources;
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import java.io.File;
import java.io.IOException;
/**
* @author zhengwen
**/
@Slf4j
public class HbaseUtil {
/**
* 判断表是否存在
* @param admin hbase连接admin
* @param tableName 表对象
* @return 布尔值,true是,false否
*/
public static boolean checkTableExist(Admin admin, TableName tableName) throws IOException {
if (admin == null || tableName == null){
return false;
}
return admin.tableExists(tableName);
}
/**
* 初始化连接
* @param hbaseConf
* @param hbaseConnection
*/
public static Admin initConn(Configuration hbaseConf, Connection hbaseConnection) {
//这里可以通过xml补充配置,这个hbase-site.xml在hbase的安装目录去找
Admin admin = null;
hbaseConf.addResource(Resources.getResource("hbase-site.xml"));
File workaround = new File(".");
System.getProperties().put("hadoop.home.dir",
workaround.getAbsolutePath());
new File("./bin").mkdirs();
try {
new File("./bin/winutils.exe").createNewFile();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
log.error("----初始hbase连接异常:{}",e.getMessage(),e);
}
try {
hbaseConnection = ConnectionFactory.createConnection(hbaseConf);
admin = hbaseConnection.getAdmin();
} catch (IOException e) {
e.printStackTrace();
log.error("---获取hbase admin异常:{}",e.getMessage(),e);
}
return admin;
}
/**
* 关闭连接
* @param admin
* @param hbaseConnection
*/
public static void closeConn(Admin admin,Connection hbaseConnection) {
try {
if (null != admin) {
admin.close();
}
/* 一定注意,这里这个连接不能关闭,否则后面就都没有了,让spring自己管理,
我就呗这里坑了半天
if (null != hbaseConnection) {
hbaseConnection.close();
}*/
} catch (IOException e) {
e.printStackTrace();
}
}
}
三、使用
import com.alibaba.fastjson.JSON;
import com.fillersmart.g20.dc.collect.store.constant.Constant;
import com.fillersmart.g20.dc.collect.store.model.HbaseTable;
import com.fillersmart.g20.dc.collect.store.service.HbaseStoreService;
import com.fillersmart.g20.dc.collect.store.util.HbaseDataUtil;
import com.fillersmart.g20.dc.collect.store.util.HbaseUtil;
import com.fillersmart.g20.dc.core.data.HbaseSaveData;
import com.fillersmart.g20.dc.core.result.Result;
import com.fillersmart.g20.dc.core.result.ResultGenerator;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import javax.annotation.Resource;
import java.util.*;
/**
* @author zhengwen
**/
@Slf4j
@Service
public class HbaseStoreServiceImpl implements HbaseStoreService {
@Autowired
private Connection hbaseConnection;
@Override
public Result<?> createTable(HbaseTable hbaseTable) {
String tableNameStr = hbaseTable.getTableName();
Admin admin = null;
try {
Configuration hbaseConf = hbaseConnection.getConfiguration();
admin = HbaseUtil.initConn(hbaseConf, hbaseConnection);
TableName tableName = TableName.valueOf(tableNameStr);
List<Map<String, String>> columnMpList = hbaseTable.getColumnMapList();
boolean isExist = HbaseUtil.checkTableExist(admin, tableName);
if (isExist) {
log.info("----表:{}已经存在", tableNameStr);
return ResultGenerator.genFailResult(tableName + "表已经存在");
} else {
//入参转列簇信息
List<ColumnFamilyDescriptor> columnFamilyDescriptorList = new ArrayList<>();
for (Map<String, String> mp : columnMpList) {
Set<String> keySet = mp.keySet();
Iterator<String> it = keySet.iterator();
while (it.hasNext()) {
String key = it.next();
if ("colEn".equals(key)) {
String colEn = mp.get(key);
ColumnFamilyDescriptor columnFamilyDescriptor = ColumnFamilyDescriptorBuilder.newBuilder(colEn.getBytes()).build();
columnFamilyDescriptorList.add(columnFamilyDescriptor);
}
}
}
TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName).setColumnFamilies(columnFamilyDescriptorList).build();
admin.createTable(tableDescriptor);
}
} catch (Exception e) {
e.printStackTrace();
log.error("---创建表:{}异常,原因:{}", tableNameStr, e.getCause().getMessage(), e);
return ResultGenerator.genFailResult(e.getCause().getMessage());
} finally {
HbaseUtil.closeConn(admin, hbaseConnection);
}
return ResultGenerator.genSuccessResult();
}
@Override
public Result<?> saveData(HbaseSaveData hbaseSaveData) {
String tableNameStr = hbaseSaveData.getTableName();
List<Map<String, String>> dataMpList = hbaseSaveData.getDataList();
Admin admin = null;
try {
Configuration hbaseConf = hbaseConnection.getConfiguration();
admin = HbaseUtil.initConn(hbaseConf, hbaseConnection);
//record
String recordTableNameStr = tableNameStr + "_record";
TableName recordTableName = TableName.valueOf(recordTableNameStr);
boolean tableExist = HbaseUtil.checkTableExist(admin, recordTableName);
if (tableExist) {
Table table = hbaseConnection.getTable(recordTableName);
if (CollectionUtils.isEmpty(dataMpList)) {
return ResultGenerator.genFailResult("保存数据为空");
}
List<Put> putList = new ArrayList<>();
for (Map<String, String> dataMp : dataMpList) {
putList = HbaseDataUtil.initDataList(dataMp, Constant.RECORD_SAVE);
}
if (CollectionUtils.isEmpty(putList)) {
log.error("---记录数据存储数据初始异常:{}", JSON.toJSONString(putList));
} else {
table.put(putList);
}
table.close();
log.info("----表:{}数据存储HBase成功",recordTableName.getNameAsString());
} else {
log.error("---表:{}不存在,无法保存", recordTableNameStr);
return ResultGenerator.genFailResult(recordTableNameStr + "表不存在,无法保存");
}
//最新记录
String shadowTableNameStr = tableNameStr + "_shadow";
TableName shadowTableName = TableName.valueOf(shadowTableNameStr);
tableExist = HbaseUtil.checkTableExist(admin, shadowTableName);
if (tableExist) {
Table table = hbaseConnection.getTable(shadowTableName);
if (CollectionUtils.isEmpty(dataMpList)) {
return ResultGenerator.genFailResult("保存数据为空");
}
List<Put> putList = new ArrayList<>();
for (Map<String, String> dataMp : dataMpList) {
putList = HbaseDataUtil.initDataList(dataMp, Constant.SHADOW_SAVE);
}
if (CollectionUtils.isEmpty(putList)) {
log.error("---shadow最新数据存储数据初始异常:{}", JSON.toJSONString(putList));
} else {
table.put(putList);
}
table.close();
log.info("----表:{}数据存储HBase成功",shadowTableName.getNameAsString());
} else {
log.error("---表:{}不存在,无法保存", shadowTableNameStr);
return ResultGenerator.genFailResult(shadowTableNameStr + "表不存在,无法保存");
}
} catch (Exception e) {
e.printStackTrace();
log.error("---Hbase数据表:{}保存数据异常,原因:{}", tableNameStr, e.getMessage(), e);
return ResultGenerator.genFailResult(tableNameStr + "数据存储失败");
} finally {
HbaseUtil.closeConn(admin, hbaseConnection);
}
return ResultGenerator.genSuccessResult();
}
@Override
public Result<?> restCreateTable(HbaseTable hbaseTable) {
String tableNameStr = hbaseTable.getTableName();
List<Map<String, String>> columnMpList = hbaseTable.getColumnMapList();
if (StringUtils.isBlank(tableNameStr)) {
return ResultGenerator.genFailResult("表名不能为空");
}
//TODO 怎么判断表是否存在?
//表参数转
return null;
}
}
这里就直接贴一个service的实现类大家参考,里面也就写了建表与数据保存的。这里要说的关键点有3点:
1、hbase的建表时不需要考虑列,只需要先设置好列簇就行
2、hbase的数据保存类似jdbc,素有数据都可以转为map,key是字段名,value是值,批量插入
3、注意存储对象转map不能有嵌套对象,当然如果真有也不是不可以,也就是字段离存的json字符串而已
四、补充
hbase-site.xml,这个其实可以不要,如果你不要就是默认hbase的安装离得默认配置,要就是可以使用这里面得配置覆盖安装离的配置
<?xml version="1.0" encoding="UTF-8"?>
<!--Autogenerated by Cloudera Manager-->
<configuration>
<property>
<name>hbase.rootdir</name>
<value>hdfs://bigdata01:8020/hbase</value>
</property>
<property>
<name>hbase.client.write.buffer</name>
<value>2097152</value>
</property>
<property>
<name>hbase.client.pause</name>
<value>100</value>
</property>
<property>
<name>hbase.client.retries.number</name>
<value>10</value>
</property>
<property>
<name>hbase.client.scanner.caching</name>
<value>100</value>
</property>
<property>
<name>hbase.client.keyvalue.maxsize</name>
<value>10485760</value>
</property>
<property>
<name>hbase.ipc.client.allowsInterrupt</name>
<value>true</value>
</property>
<property>
<name>hbase.client.primaryCallTimeout.get</name>
<value>10</value>
</property>
<property>
<name>hbase.client.primaryCallTimeout.multiget</name>
<value>10</value>
</property>
<property>
<name>hbase.client.scanner.timeout.period</name>
<value>60000</value>
</property>
<property>
<name>hbase.coprocessor.region.classes</name>
<value>org.apache.hadoop.hbase.security.access.SecureBulkLoadEndpoint</value>
</property>
<property>
<name>hbase.regionserver.thrift.http</name>
<value>false</value>
</property>
<property>
<name>hbase.thrift.support.proxyuser</name>
<value>false</value>
</property>
<property>
<name>hbase.rpc.timeout</name>
<value>60000</value>
</property>
<property>
<name>hbase.snapshot.enabled</name>
<value>true</value>
</property>
<property>
<name>hbase.snapshot.region.timeout</name>
<value>300000</value>
</property>
<property>
<name>hbase.snapshot.master.timeout.millis</name>
<value>300000</value>
</property>
<property>
<name>hbase.security.authentication</name>
<value>simple</value>
</property>
<property>
<name>hbase.rpc.protection</name>
<value>authentication</value>
</property>
<property>
<name>zookeeper.session.timeout</name>
<value>60000</value>
</property>
<property>
<name>zookeeper.znode.parent</name>
<value>/hbase</value>
</property>
<property>
<name>zookeeper.znode.rootserver</name>
<value>root-region-server</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>bigdata03,bigdata01,bigdata02</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
<property>
<name>hbase.rest.ssl.enabled</name>
<value>false</value>
</property>
</configuration>
最后这里说下,我们都是使用的是别名,所以要配置host,不然这些个bigdata01、bigdata02都不知道是什么地址的。晚上都推荐用SwitchHosts,但是我在win10上改来改去狠容易把原host文件丢失,还是使用早些年在卓尔电商做支付,同事们都用的一个小程序方便。在win10上依然可用,不错。
然后呢我这里是没有加一些封装好的接口的,下次分享hbaseApi,顺便把相关的封装的接口分享学习下,下次的集成方式也将又不一样哦。
这里没有用最新版本是因为项目上使用的就是3.0 + 2.1。同学们可以自己更换版本玩。