本篇使用到的开发工具IntelliJ IDEA,jdk版本为:jdk1.8,虚拟机版本为CentOS 7。
MySQl版本:5.7.34
hadoop 3.1.3
HBase版本:2.3.5
Java连接Hbase
先新键一个Maven项目
在pom.xml中放入
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>2.3.5</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>3.1.3</version>
</dependency>
</dependencies>
在main下创建一键文件夹resources,并在文件夹中创建一个文件log4j.properties
log4j.properties中写入的数据为
log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=log/hd.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n
代码块
package org.example.cn.kgc.base;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
public class Java2HBase{
static Configuration config = null;
public static void init(String...items) {
config = HBaseConfiguration.create();
for (String item : items) {
String[] ps = item.split("=");
config.set(ps[0],ps[1]);
}
}
private static void close(AutoCloseable...closes){
for (AutoCloseable close : closes) {
if(null!=close){
try {
close.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
private static Connection con() throws IOException {
return ConnectionFactory.createConnection(config);
}
private static Admin admin(Connection con) throws IOException {
return con.getAdmin();
}
private static boolean nameSpaceExists(String nameSpace,String[] nss){
for (String ns : nss) {
if(nameSpace.equals(ns)){
return true;
}
}
return false;
}
//创建nameSpace
public static void createNameSpace(String nameSpace){
Connection con = null;
Admin admin = null;
try {
admin = admin(con = con());
if(nameSpaceExists(nameSpace,admin.listNamespaces())){
throw new IOException("namespace ["+nameSpace+"]created in failure for existence");
}
admin.createNamespace(NamespaceDescriptor
.create(nameSpace).build());
System.out.println("namespace [ "+nameSpace+"] created in success");
} catch (IOException e) {
e.printStackTrace();
}finally{
close(admin,con);
}
}
//创建表
public static void createTable(String tableName,String columnFamily,String...columnFamilies){
Connection con = null;
Admin admin = null;
try {
admin= admin(con = con());
TableName tn = TableName.valueOf(tableName);
if(admin.tableExists(tn)){
throw new IOException("table [ "+tableName+" ] create in failure for existence");
}
//根据表名创建 表描述构造器
TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tn);
//创建列簇集合
List<ColumnFamilyDescriptor> list = new ArrayList<>();
list.add(ColumnFamilyDescriptorBuilder.of(columnFamily));
for (String family : columnFamilies) {
list.add(ColumnFamilyDescriptorBuilder.of(family));
}
//向表描述器添加列簇
builder.setColumnFamilies(list);
admin.createTable(builder.build());
System.out.println("table[ "+tableName+" ] created in success");
} catch (IOException e) {
e.printStackTrace();
}finally{
close(admin,con);
}
}
//删除表
public static void dropTable(String tableName) {
Connection con = null;
Admin admin = null;
try {
admin = admin(con = con());
TableName tn = TableName.valueOf(tableName);
if (!admin.tableExists(tn)) {
throw new IOException("table [ "+ tableName+" ] dropped in failure for absence");
}
if(admin.isTableEnabled(tn)){
admin.disableTable(tn);
System.out.println("table [ "+tableName+" ] is enabled and is disabled in success");
}
admin.deleteTable(tn);
System.out.println("table [ "+tableName+" ] dropped in success");
} catch (IOException e) {
e.printStackTrace();
}finally{
close(admin,con);
}
}
private static boolean tableExists(Connection con,TableName tableName ){
Admin admin = null;
try {
admin = admin(con);
return admin.tableExists(tableName);
} catch (IOException e) {
e.printStackTrace();
return false;
}finally {
close(admin);
}
}
//插入一条数据
public static void put(String tableName,String rowKey,String family,String column,String value){
String msg = "put [ "+rowKey+" => "+family+" => "+column+" => value ("+value+") ] into table [ "+tableName+" ]";
TableName tn = TableName.valueOf(tableName);
Connection con = null;
Table table = null;
try {
con = con();
if(!tableExists(con,tn)){
throw new IOException("table [ "+tableName+" ] not exist error");
}
table = con.getTable(tn);
//构造带有行间的Put 对象
Put put = new Put(Bytes.toBytes(rowKey));
put.addColumn(Bytes.toBytes(family),Bytes.toBytes(column),Bytes.toBytes(value));
table.put(put);
} catch (IOException e) {
e.printStackTrace();
}finally{
close(table,con);
}
}
/**
* 将file路径指向的文件数据映射到hbase
* 文件名即表名,为了防止命名冲突:tablename_timestamp
* 文件首行为表结构: key,cf:col,...
* @param file
*/
//批量插入
public static void putBatch(String file,String regexSep){
File data = new File(file);
Connection con = null;
BufferedMutator mutator = null;
BufferedReader br = null;
try {
//输入文件验证
if(!data.exists()|| !data.isFile()){
throw new IOException(file+ " not exist or not file error");
}
//解析hbase表名
String[] ns = data.getName().split("_|\\.");
String tableName =ns[0]+":"+ns[1];
TableName tn = TableName.valueOf(tableName);
con = con();
//验证hbase表是否存在
if(!tableExists(con,tn)){
throw new IOException("hbase table [ "+tableName+" ] not exists error");
}
//通过文件首行解析hbase结构
br = new BufferedReader(new FileReader(data)) ;
String line = null;
if((line=br.readLine())==null){
throw new IOException("file [ "+file+" ] empty error");
}
String[] ps = line.split(regexSep);
//创建批量插入异常倾听
DateTimeFormatter dtf = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
BufferedMutator.ExceptionListener listener = (e,_mutator)->{
System.out.println("put data into table [ "+tableName+" ] error"
+e.getNumExceptions()+" rows,retry put at "+dtf.format(LocalDateTime.now()));
int count = 0;
for (int i = 0; i < e.getNumExceptions(); i++) {
Row row = e.getRow(i);
try {
_mutator.mutate((Put)row);
count++;
} catch (IOException ex) {
ex.printStackTrace();
System.out.println("retry put "+row+" error,please check it");
}
}
System.out.println("retry put data into table [ "+tableName+" ] from error total "
+e.getNumExceptions()+" rows,finish "+count+" rows,at "+dtf.format(LocalDateTime.now()));
};
BufferedMutatorParams bmp = new BufferedMutatorParams(tn)
.writeBufferSize(8 * 1024 * 1024)
.listener(listener);
mutator = con.getBufferedMutator(bmp);
int count = 0,CAPACITY = 1000;
List<Put> list = new ArrayList<>(CAPACITY);
Put put = null;
while((line=br.readLine())!=null){
String[] arr = line.split(regexSep);
put = new Put(Bytes.toBytes(arr[0]));
for (int i = 1 ; i <arr.length ; i++) {
String[] ts = ps[i].split(":");
put.addColumn(Bytes.toBytes(ts[0]),Bytes.toBytes(ts[1]),Bytes.toBytes(arr[i]));
}
list.add(put);
if(list.size()==CAPACITY){
mutator.mutate(list);
count += list.size();
list.clear();
}
}
mutator.mutate(list);
count += list.size();
list.clear();
System.out.println("batch put into [ "+tableName+" ,"+count+" rows ] from [ "+file+" ] in success");
} catch (Exception e) {
e.printStackTrace();
System.out.println("batch put from [ "+file+" ] in failure");
}finally{
close(br,mutator,con);
}
}
Test
public static void main(String[] args) {
init("hbase.zookeeper.quorum=192.168.6.160");
//createNameSpace("dsj"); 创建库
//createTable("dsj:test","cf1","cf2","cf3"); //创建表
//dropTable("dsj:test"); //删除表 //put("dsj:test","00003","cf2","gfName","angle");//插入数据
putBatch("D:\\zb\\project\\Hadoop\\hive\\javaTohbase\\files\\dsj_test_165555.txt",",");
}
}