Flink自定义HBaseUpsertSinkFunction支持kerberos认证 代码详解

1.介绍

项目中需要用到Flink往hbase写数据,集群开启了kerberos认证,但是默认的FlinkUpsertSinkFunction不支持kerberos认证,不支持就无法往hbase插入数据。

在原有的HBaseUpsertSinkFunction的基础上修改 增加kerberos认证,以下是修改好之后的代码,主要是修改了open方法,判断是否需要kerberos认证,如果需要认证则进行kerberos认证,如果不需要则不认证。(jsonObject是kerberos的配置信息,keytab文件,kerberos账号这些信息。),修改了invoke方法判断票据是否过期,如果剩余时间只有20%就重新登录kerberos;

 

package com.rongan.realtime.sink;

import com.alibaba.fastjson.JSONObject;
import com.rongan.realtime.util.HbaseMutilUtil;
import org.apache.flink.addons.hbase.HBaseTableSchema;
import org.apache.flink.addons.hbase.util.HBaseConfigurationUtil;
import org.apache.flink.addons.hbase.util.HBaseReadWriteHelper;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.state.FunctionInitializationContext;
import org.apache.flink.runtime.state.FunctionSnapshotContext;
import org.apache.flink.runtime.util.ExecutorThreadFactory;
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.types.Row;
import org.apache.flink.util.StringUtils;

import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.client.BufferedMutator;
import org.apache.hadoop.hbase.client.BufferedMutatorParams;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;

import static org.apache.flink.util.Preconditions.checkArgument;

/**
 * The upsert sink for HBase.
 *
 * 

This class leverage {@link BufferedMutator} to buffer multiple * {@link org.apache.hadoop.hbase.client.Mutation Mutations} before sending the requests to cluster. * The buffering strategy can be configured by {@code bufferFlushMaxSizeInBytes}, * {@code bufferFlushMaxMutations} and {@code bufferFlushIntervalMillis}.

*/ public class HBaseUpsertSinkFunction extends RichSinkFunction> implements CheckpointedFunction, BufferedMutator.ExceptionListener { private static final long serialVersionUID = 1L; private static final Logger LOG = LoggerFactory.getLogger(org.apache.flink.addons.hbase.HBaseUpsertSinkFunction.class); private final String hTableName; private final HBaseTableSchema schema; private final byte[] serializedConfig; private final long bufferFlushMaxSizeInBytes; private final long bufferFlushMaxMutations; private final long bufferFlushIntervalMillis; private transient HBaseReadWriteHelper helper; private transient Connection connection; private transient BufferedMutator mutator; private transient ScheduledExecutorService executor; private transient ScheduledFuture scheduledFuture; private transient AtomicLong numPendingRequests; private JSONObject jsonObject; private transient volatile boolean closed = false; /** * This is set from inside the {@link BufferedMutator.ExceptionListener} if a {@link Throwable} * was thrown. * *

Errors will be checked and rethrown before processing each input element, and when the sink is closed. */ private final AtomicReference failureThrowable = new AtomicReference<>(); public HBaseUpsertSinkFunction( String hTableName, HBaseTableSchema schema, org.apache.hadoop.conf.Configuration conf, JSONObject jsonObject, long bufferFlushMaxSizeInBytes, long bufferFlushMaxMutations, long bufferFlushIntervalMillis) { this.hTableName = hTableName; this.schema = schema; // Configuration is not serializable this.serializedConfig = HBaseConfigurationUtil.serializeConfiguration(conf); this.bufferFlushMaxSizeInBytes = bufferFlushMaxSizeInBytes; this.bufferFlushMaxMutations = bufferFlushMaxMutations; this.bufferFlushIntervalMillis = bufferFlushIntervalMillis; this.jsonObject = jsonObject; } @Override public void open(Configuration parameters) throws Exception { LOG.info("start open ..."); org.apache.hadoop.conf.Configuration config = prepareRuntimeConfiguration(); try { this.helper = new HBaseReadWriteHelper(schema); this.numPendingRequests = new AtomicLong(0); if (null == connection) { //进行kerberos认证 HbaseMutilUtil.loginKerberos(config, jsonObject); this.connection = ConnectionFactory.createConnection(config); } // create a parameter instance, set the table name and custom listener reference. BufferedMutatorParams params = new BufferedMutatorParams(TableName.valueOf(hTableName)) .listener(this) .writeBufferSize(bufferFlushMaxSizeInBytes); this.mutator = connection.getBufferedMutator(params); if (bufferFlushIntervalMillis > 0) { this.executor = Executors.newScheduledThreadPool( 1, new ExecutorThreadFactory("hbase-upsert-sink-flusher")); this.scheduledFuture = this.executor.scheduleWithFixedDelay(() -> { if (closed) { return; } try { flush(); } catch (Exception e) { // fail the sink and skip the rest of the items // if the failure handler decides to throw an exception failureThrowable.compareAndSet(null, e); } }, bufferFlushIntervalMillis, bufferFlushIntervalMillis, TimeUnit.MILLISECONDS); } } catch (TableNotFoundException tnfe) { LOG.error("The table " + hTableName + " not found ", tnfe); throw new RuntimeException("HBase table '" + hTableName + "' not found.", tnfe); } catch (IOException ioe) { LOG.error("Exception while creating connection to HBase.", ioe); throw new RuntimeException("Cannot create connection to HBase.", ioe); } LOG.info("end open."); } private org.apache.hadoop.conf.Configuration prepareRuntimeConfiguration() throws IOException { // create default configuration from current runtime env (`hbase-site.xml` in classpath) first, // and overwrite configuration using serialized configuration from client-side env (`hbase-site.xml` in classpath). // user params from client-side have the highest priority org.apache.hadoop.conf.Configuration runtimeConfig = HBaseConfigurationUtil.deserializeConfiguration(serializedConfig, HBaseConfiguration.create()); // do validation: check key option(s) in final runtime configuration if (StringUtils.isNullOrWhitespaceOnly(runtimeConfig.get(HConstants.ZOOKEEPER_QUORUM))) { LOG.error("Can not connect to HBase without {} configuration", HConstants.ZOOKEEPER_QUORUM); throw new IOException("Check HBase configuration failed, lost: '" + HConstants.ZOOKEEPER_QUORUM + "'!"); } return runtimeConfig; } private void checkErrorAndRethrow() { Throwable cause = failureThrowable.get(); if (cause != null) { throw new RuntimeException("An error occurred in HBaseSink.", cause); } } @Override public void invoke(Tuple2 value, Context context) throws Exception { checkErrorAndRethrow(); //判断是否hbase是否需要进行kerberos认证 if ("true".equals(jsonObject.getString("haveKerberos"))) //如果需要则检查kerberos票据是否过期,如果过期了,重新登录 HbaseMutilUtil.reLoginKerberos(); if (value.f0) { Put put = helper.createPutMutation(value.f1); mutator.mutate(put); } else { Delete delete = helper.createDeleteMutation(value.f1); mutator.mutate(delete); } // flush when the buffer number of mutations greater than the configured max size. if (bufferFlushMaxMutations > 0 && numPendingRequests.incrementAndGet() >= bufferFlushMaxMutations) { flush(); } } private void flush() throws IOException { // BufferedMutator is thread-safe mutator.flush(); numPendingRequests.set(0); checkErrorAndRethrow(); } @Override public void close() throws Exception { closed = true; if (mutator != null) { try { mutator.close(); } catch (IOException e) { LOG.warn("Exception occurs while closing HBase BufferedMutator.", e); } this.mutator = null; } if (connection != null) { try { connection.close(); } catch (IOException e) { LOG.warn("Exception occurs while closing HBase Connection.", e); } this.connection = null; } if (scheduledFuture != null) { scheduledFuture.cancel(false); if (executor != null) { executor.shutdownNow(); } } } @Override public void snapshotState(FunctionSnapshotContext context) throws Exception { while (numPendingRequests.get() != 0) { flush(); } } @Override public void initializeState(FunctionInitializationContext context) throws Exception { // nothing to do. } @Override public void onException(RetriesExhaustedWithDetailsException exception, BufferedMutator mutator) throws RetriesExhaustedWithDetailsException { // fail the sink and skip the rest of the items // if the failure handler decides to throw an exception failureThrowable.compareAndSet(null, exception); } }

2.使用修改之后的HBaseUpsertSinkFunction

因为暂时只修改了HbaseUpsertSinkFunction所以无法通过写sql来配置kerberos信息,只能使用DataStream的方式来使用修改之后的HbaseUpsertSinkFunction。也就是查询完sql 之后调用 toRetractStream[Row] 方法之后,再手动addSink (new HbaseUpsertSinkFunction()),通过这种方式来使用HbaseUpsertSinkFunction。Flink-sql底层也是这么做的。

代码:

   val deviceInfo: DataStream[tuple.Tuple2[lang.Boolean, Row]] = tableEnv.sqlQuery("select mac as rowkey, ROW(incident_sign ,system_sign,agent_version ,host_name ,ip ,mac ,system_info ,uuid ,date_time) as info from agetDeviceInfo")
      .toRetractStream[Row].
     map(st => {
     //一定要重新调用一下map方法将类型转为java.lang.Boolean的。因为HbaseUpsertSinkFunction是java开发的,他继承的是 RichSinkFunction> 这个都是java类型,所以要转一下
不然后面添加sink报类型不匹配
      new org.apache.flink.api.java.tuple.Tuple2[lang.Boolean, Row](st._1, st._2)
    })


   //创建hbaseschema
    val schema = new HBaseTableSchema()
    schema.setRowKey("rowkey", classOf[String])
    schema.addColumn("info", "incident_sign", classOf[String])
    schema.addColumn("info", "system_sign", classOf[String])
    schema.addColumn("info", "agent_version", classOf[String])
    schema.addColumn("info", "host_name", classOf[String])
    schema.addColumn("info", "ip", classOf[String])
    schema.addColumn("info", "mac", classOf[String])
    schema.addColumn("info", "system_info", classOf[String])
    schema.addColumn("info", "uuid", classOf[String])
    schema.addColumn("info", "date_time", classOf[String])
   //添加hbaseUpsertSink
   deviceInfo.addSink(FlinkUtil.getHbaseSink(params, schema, FlinkUtil.initHbaseProperties(params), "t_rsd_base_asset"))

3.util代码

你可能感兴趣的:(flink)