项目中需要用到Flink往hbase写数据,集群开启了kerberos认证,但是默认的FlinkUpsertSinkFunction不支持kerberos认证,不支持就无法往hbase插入数据。
在原有的HBaseUpsertSinkFunction的基础上修改 增加kerberos认证,以下是修改好之后的代码,主要是修改了open方法,判断是否需要kerberos认证,如果需要认证则进行kerberos认证,如果不需要则不认证。(jsonObject是kerberos的配置信息,keytab文件,kerberos账号这些信息。),修改了invoke方法判断票据是否过期,如果剩余时间只有20%就重新登录kerberos;
package com.rongan.realtime.sink;
import com.alibaba.fastjson.JSONObject;
import com.rongan.realtime.util.HbaseMutilUtil;
import org.apache.flink.addons.hbase.HBaseTableSchema;
import org.apache.flink.addons.hbase.util.HBaseConfigurationUtil;
import org.apache.flink.addons.hbase.util.HBaseReadWriteHelper;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.state.FunctionInitializationContext;
import org.apache.flink.runtime.state.FunctionSnapshotContext;
import org.apache.flink.runtime.util.ExecutorThreadFactory;
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.types.Row;
import org.apache.flink.util.StringUtils;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.client.BufferedMutator;
import org.apache.hadoop.hbase.client.BufferedMutatorParams;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import static org.apache.flink.util.Preconditions.checkArgument;
/**
* The upsert sink for HBase.
*
* This class leverage {@link BufferedMutator} to buffer multiple
* {@link org.apache.hadoop.hbase.client.Mutation Mutations} before sending the requests to cluster.
* The buffering strategy can be configured by {@code bufferFlushMaxSizeInBytes},
* {@code bufferFlushMaxMutations} and {@code bufferFlushIntervalMillis}.
*/
public class HBaseUpsertSinkFunction
extends RichSinkFunction>
implements CheckpointedFunction, BufferedMutator.ExceptionListener {
private static final long serialVersionUID = 1L;
private static final Logger LOG = LoggerFactory.getLogger(org.apache.flink.addons.hbase.HBaseUpsertSinkFunction.class);
private final String hTableName;
private final HBaseTableSchema schema;
private final byte[] serializedConfig;
private final long bufferFlushMaxSizeInBytes;
private final long bufferFlushMaxMutations;
private final long bufferFlushIntervalMillis;
private transient HBaseReadWriteHelper helper;
private transient Connection connection;
private transient BufferedMutator mutator;
private transient ScheduledExecutorService executor;
private transient ScheduledFuture scheduledFuture;
private transient AtomicLong numPendingRequests;
private JSONObject jsonObject;
private transient volatile boolean closed = false;
/**
* This is set from inside the {@link BufferedMutator.ExceptionListener} if a {@link Throwable}
* was thrown.
*
* Errors will be checked and rethrown before processing each input element, and when the sink is closed.
*/
private final AtomicReference failureThrowable = new AtomicReference<>();
public HBaseUpsertSinkFunction(
String hTableName,
HBaseTableSchema schema,
org.apache.hadoop.conf.Configuration conf,
JSONObject jsonObject,
long bufferFlushMaxSizeInBytes,
long bufferFlushMaxMutations,
long bufferFlushIntervalMillis) {
this.hTableName = hTableName;
this.schema = schema;
// Configuration is not serializable
this.serializedConfig = HBaseConfigurationUtil.serializeConfiguration(conf);
this.bufferFlushMaxSizeInBytes = bufferFlushMaxSizeInBytes;
this.bufferFlushMaxMutations = bufferFlushMaxMutations;
this.bufferFlushIntervalMillis = bufferFlushIntervalMillis;
this.jsonObject = jsonObject;
}
@Override
public void open(Configuration parameters) throws Exception {
LOG.info("start open ...");
org.apache.hadoop.conf.Configuration config = prepareRuntimeConfiguration();
try {
this.helper = new HBaseReadWriteHelper(schema);
this.numPendingRequests = new AtomicLong(0);
if (null == connection) {
//进行kerberos认证
HbaseMutilUtil.loginKerberos(config, jsonObject);
this.connection = ConnectionFactory.createConnection(config);
}
// create a parameter instance, set the table name and custom listener reference.
BufferedMutatorParams params = new BufferedMutatorParams(TableName.valueOf(hTableName))
.listener(this)
.writeBufferSize(bufferFlushMaxSizeInBytes);
this.mutator = connection.getBufferedMutator(params);
if (bufferFlushIntervalMillis > 0) {
this.executor = Executors.newScheduledThreadPool(
1, new ExecutorThreadFactory("hbase-upsert-sink-flusher"));
this.scheduledFuture = this.executor.scheduleWithFixedDelay(() -> {
if (closed) {
return;
}
try {
flush();
} catch (Exception e) {
// fail the sink and skip the rest of the items
// if the failure handler decides to throw an exception
failureThrowable.compareAndSet(null, e);
}
}, bufferFlushIntervalMillis, bufferFlushIntervalMillis, TimeUnit.MILLISECONDS);
}
} catch (TableNotFoundException tnfe) {
LOG.error("The table " + hTableName + " not found ", tnfe);
throw new RuntimeException("HBase table '" + hTableName + "' not found.", tnfe);
} catch (IOException ioe) {
LOG.error("Exception while creating connection to HBase.", ioe);
throw new RuntimeException("Cannot create connection to HBase.", ioe);
}
LOG.info("end open.");
}
private org.apache.hadoop.conf.Configuration prepareRuntimeConfiguration() throws IOException {
// create default configuration from current runtime env (`hbase-site.xml` in classpath) first,
// and overwrite configuration using serialized configuration from client-side env (`hbase-site.xml` in classpath).
// user params from client-side have the highest priority
org.apache.hadoop.conf.Configuration runtimeConfig = HBaseConfigurationUtil.deserializeConfiguration(serializedConfig, HBaseConfiguration.create());
// do validation: check key option(s) in final runtime configuration
if (StringUtils.isNullOrWhitespaceOnly(runtimeConfig.get(HConstants.ZOOKEEPER_QUORUM))) {
LOG.error("Can not connect to HBase without {} configuration", HConstants.ZOOKEEPER_QUORUM);
throw new IOException("Check HBase configuration failed, lost: '" + HConstants.ZOOKEEPER_QUORUM + "'!");
}
return runtimeConfig;
}
private void checkErrorAndRethrow() {
Throwable cause = failureThrowable.get();
if (cause != null) {
throw new RuntimeException("An error occurred in HBaseSink.", cause);
}
}
@Override
public void invoke(Tuple2 value, Context context) throws Exception {
checkErrorAndRethrow();
//判断是否hbase是否需要进行kerberos认证
if ("true".equals(jsonObject.getString("haveKerberos")))
//如果需要则检查kerberos票据是否过期,如果过期了,重新登录
HbaseMutilUtil.reLoginKerberos();
if (value.f0) {
Put put = helper.createPutMutation(value.f1);
mutator.mutate(put);
} else {
Delete delete = helper.createDeleteMutation(value.f1);
mutator.mutate(delete);
}
// flush when the buffer number of mutations greater than the configured max size.
if (bufferFlushMaxMutations > 0 && numPendingRequests.incrementAndGet() >= bufferFlushMaxMutations) {
flush();
}
}
private void flush() throws IOException {
// BufferedMutator is thread-safe
mutator.flush();
numPendingRequests.set(0);
checkErrorAndRethrow();
}
@Override
public void close() throws Exception {
closed = true;
if (mutator != null) {
try {
mutator.close();
} catch (IOException e) {
LOG.warn("Exception occurs while closing HBase BufferedMutator.", e);
}
this.mutator = null;
}
if (connection != null) {
try {
connection.close();
} catch (IOException e) {
LOG.warn("Exception occurs while closing HBase Connection.", e);
}
this.connection = null;
}
if (scheduledFuture != null) {
scheduledFuture.cancel(false);
if (executor != null) {
executor.shutdownNow();
}
}
}
@Override
public void snapshotState(FunctionSnapshotContext context) throws Exception {
while (numPendingRequests.get() != 0) {
flush();
}
}
@Override
public void initializeState(FunctionInitializationContext context) throws Exception {
// nothing to do.
}
@Override
public void onException(RetriesExhaustedWithDetailsException exception, BufferedMutator mutator) throws RetriesExhaustedWithDetailsException {
// fail the sink and skip the rest of the items
// if the failure handler decides to throw an exception
failureThrowable.compareAndSet(null, exception);
}
}
因为暂时只修改了HbaseUpsertSinkFunction所以无法通过写sql来配置kerberos信息,只能使用DataStream的方式来使用修改之后的HbaseUpsertSinkFunction。也就是查询完sql 之后调用 toRetractStream[Row] 方法之后,再手动addSink (new HbaseUpsertSinkFunction()),通过这种方式来使用HbaseUpsertSinkFunction。Flink-sql底层也是这么做的。
代码:
val deviceInfo: DataStream[tuple.Tuple2[lang.Boolean, Row]] = tableEnv.sqlQuery("select mac as rowkey, ROW(incident_sign ,system_sign,agent_version ,host_name ,ip ,mac ,system_info ,uuid ,date_time) as info from agetDeviceInfo")
.toRetractStream[Row].
map(st => {
//一定要重新调用一下map方法将类型转为java.lang.Boolean的。因为HbaseUpsertSinkFunction是java开发的,他继承的是 RichSinkFunction> 这个都是java类型,所以要转一下
不然后面添加sink报类型不匹配
new org.apache.flink.api.java.tuple.Tuple2[lang.Boolean, Row](st._1, st._2)
})
//创建hbaseschema
val schema = new HBaseTableSchema()
schema.setRowKey("rowkey", classOf[String])
schema.addColumn("info", "incident_sign", classOf[String])
schema.addColumn("info", "system_sign", classOf[String])
schema.addColumn("info", "agent_version", classOf[String])
schema.addColumn("info", "host_name", classOf[String])
schema.addColumn("info", "ip", classOf[String])
schema.addColumn("info", "mac", classOf[String])
schema.addColumn("info", "system_info", classOf[String])
schema.addColumn("info", "uuid", classOf[String])
schema.addColumn("info", "date_time", classOf[String])
//添加hbaseUpsertSink
deviceInfo.addSink(FlinkUtil.getHbaseSink(params, schema, FlinkUtil.initHbaseProperties(params), "t_rsd_base_asset"))