package com.glab.flink.connector.clickhouse.table;
import com.glab.flink.connector.clickhouse.table.internal.dialect.ClickHouseDialect;
import com.glab.flink.connector.clickhouse.table.internal.options.ClickHouseOptions;
import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.ConfigOptions;
import org.apache.flink.configuration.ReadableConfig;
import org.apache.flink.connector.jdbc.internal.options.JdbcLookupOptions;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.catalog.ResolvedCatalogTable;
import org.apache.flink.table.catalog.ResolvedSchema;
import org.apache.flink.table.connector.sink.DynamicTableSink;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.factories.*;
import org.apache.flink.table.utils.TableSchemaUtils;
import java.time.Duration;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
public class ClickHouseDynamicTableFactory implements DynamicTableSinkFactory, DynamicTableSourceFactory {
public static final String IDENTIFIER = "clickhouse";
private static final String DRIVER_NAME = "ru.yandex.clickhouse.ClickHouseDriver";
public static final ConfigOption URL = ConfigOptions.key("url")
.stringType()
.noDefaultValue()
.withDeprecatedKeys("the ClickHouse url in format `clickhouse://:`.");
public static final ConfigOption USERNAME = ConfigOptions.key("username")
.stringType()
.noDefaultValue()
.withDescription("the ClickHouse username.");
public static final ConfigOption PASSWORD = ConfigOptions.key("password")
.stringType()
.noDefaultValue()
.withDescription("the ClickHouse password.");
public static final ConfigOption DATABASE_NAME = ConfigOptions.key("database-name")
.stringType()
.defaultValue("default")
.withDescription("the ClickHouse database name. Default to `default`.");
public static final ConfigOption TABLE_NAME = ConfigOptions.key("table-name")
.stringType()
.noDefaultValue()
.withDescription("the ClickHouse table name.");
public static final ConfigOption SINK_BATCH_SIZE = ConfigOptions.key("sink.batch-size")
.intType()
.defaultValue(Integer.valueOf(1000))
.withDescription("the flush max size, over this number of records, will flush data. The default value is 1000.");
public static final ConfigOption SINK_FLUSH_INTERVAL = ConfigOptions.key("sink.flush-interval")
.durationType()
.defaultValue(Duration.ofSeconds(1L))
.withDescription("the flush interval mills, over this time, asynchronous threads will flush data. The default value is 1s.");
public static final ConfigOption SINK_MAX_RETRIES = ConfigOptions.key("sink.max-retries")
.intType()
.defaultValue(Integer.valueOf(3))
.withDescription("the max retry times if writing records to database failed.");
public static final ConfigOption SINK_WRITE_LOCAL = ConfigOptions.key("sink.write-local")
.booleanType()
.defaultValue(Boolean.valueOf(false))
.withDescription("directly write to local tables in case of Distributed table.");
public static final ConfigOption SINK_PARTITION_STRATEGY = ConfigOptions.key("sink.partition-strategy")
.stringType()
.defaultValue("balanced")
.withDescription("partition strategy. available: balanced, hash, shuffle.");
public static final ConfigOption SINK_PARTITION_KEY = ConfigOptions.key("sink.partition-key")
.stringType()
.noDefaultValue()
.withDescription("partition key used for hash strategy.");
public static final ConfigOption SINK_IGNORE_DELETE = ConfigOptions.key("sink.ignore-delete")
.booleanType()
.defaultValue(Boolean.valueOf(true))
.withDescription("whether to treat update statements as insert statements and ignore deletes. defaults to true.");
public static final ConfigOption LOOKUP_CACHE_MAX_ROWS = ConfigOptions.key("lookup.cache.max-rows")
.longType()
.defaultValue(-1L)
.withDescription("the max number of rows of lookup cache, over this value, the oldest rows will be eliminated." +
"cache.max-rows and cache ttl options must all be specified id any of them is specified. cache is not enabled as default.");
public static final ConfigOption LOOKUP_CACHE_TTL = ConfigOptions.key("lookup.cache.ttl")
.durationType()
.defaultValue(Duration.ofSeconds(10))
.withDescription("the cache time to live");
public static final ConfigOption LOOKUP_MAX_RETRIES = ConfigOptions.key("lookup.max-retries")
.intType()
.defaultValue(3)
.withDescription("the max retry times if lookup database failed.");
@Override
public DynamicTableSource createDynamicTableSource(Context context) {
FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context);
ReadableConfig config = helper.getOptions();
helper.validate();
try {
validateConfigOptions(config);
} catch (Exception e) {
e.printStackTrace();
}
//带New的使用1.13API,不带的用12的
ResolvedSchema resolvedSchema = context.getCatalogTable().getResolvedSchema();
return new ClickHouseDynamicTableSource(resolvedSchema, getOptions(config), getJdbcLookupOptions(config));
}
@Override
public DynamicTableSink createDynamicTableSink(Context context) {
FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context);
ReadableConfig config = helper.getOptions();
helper.validate();
try {
validateConfigOptions(config);
} catch (Exception e) {
e.printStackTrace();
}
//带New的使用1.13API,不带的用12的
ResolvedSchema resolvedSchema = context.getCatalogTable().getResolvedSchema();
return new ClickHouseDynamicTableSink(resolvedSchema, getOptions(config));
}
@Override
public String factoryIdentifier() {
return IDENTIFIER;
}
@Override
public Set> requiredOptions() {
Set> requiredOptions = new HashSet<>();
requiredOptions.add(URL);
requiredOptions.add(TABLE_NAME);
return requiredOptions;
}
@Override
public Set> optionalOptions() {
Set> optionalOptions = new HashSet<>();
optionalOptions.add(USERNAME);
optionalOptions.add(PASSWORD);
optionalOptions.add(DATABASE_NAME);
optionalOptions.add(SINK_BATCH_SIZE);
optionalOptions.add(SINK_FLUSH_INTERVAL);
optionalOptions.add(SINK_MAX_RETRIES);
optionalOptions.add(SINK_WRITE_LOCAL);
optionalOptions.add(SINK_PARTITION_STRATEGY);
optionalOptions.add(SINK_PARTITION_KEY);
optionalOptions.add(SINK_IGNORE_DELETE);
optionalOptions.add(LOOKUP_CACHE_MAX_ROWS);
optionalOptions.add(LOOKUP_CACHE_TTL);
optionalOptions.add(LOOKUP_MAX_RETRIES);
return optionalOptions;
}
private void validateConfigOptions(ReadableConfig config) throws Exception{
String partitionStrategy = config.get(SINK_PARTITION_STRATEGY);
if (!Arrays.asList(new String[] { "hash", "balanced", "shuffle" }).contains(partitionStrategy))
throw new IllegalArgumentException("Unknown sink.partition-strategy `" + partitionStrategy + "`");
if (partitionStrategy.equals("hash") && !config.getOptional(SINK_PARTITION_KEY).isPresent())
throw new IllegalArgumentException("A partition key must be provided for hash partition strategy");
if ((config.getOptional(USERNAME).isPresent() ^ config.getOptional(PASSWORD).isPresent()))
throw new IllegalArgumentException("Either all or none of username and password should be provided");
}
private ClickHouseOptions getOptions(ReadableConfig config) {
return (new ClickHouseOptions.Builder()).withUrl((String)config.get(URL))
.withUsername((String)config.get(USERNAME))
.withPassword((String)config.get(PASSWORD))
.withDatabaseName((String)config.get(DATABASE_NAME))
.withTableName((String)config.get(TABLE_NAME))
.withBatchSize(((Integer)config.get(SINK_BATCH_SIZE)).intValue())
.withFlushInterval((Duration)config.get(SINK_FLUSH_INTERVAL))
.withMaxRetries(((Integer)config.get(SINK_MAX_RETRIES)).intValue())
.withWriteLocal((Boolean)config.get(SINK_WRITE_LOCAL))
.withPartitionStrategy((String)config.get(SINK_PARTITION_STRATEGY))
.withPartitionKey((String)config.get(SINK_PARTITION_KEY))
.withIgnoreDelete(((Boolean)config.get(SINK_IGNORE_DELETE)).booleanValue())
.setDialect(new ClickHouseDialect())
.build();
}
/* private JdbcOptions getJdbcOptions(ReadableConfig config) {
return JdbcOptions.builder()
.setDriverName(DRIVER_NAME)
.setDBUrl(config.get(URL))
.setTableName(config.get(TABLE_NAME))
.setDialect(new ClickHouseDialect())
.build();
}*/
private JdbcLookupOptions getJdbcLookupOptions(ReadableConfig config) {
return JdbcLookupOptions.builder()
.setCacheExpireMs(config.get(LOOKUP_CACHE_TTL).toMillis())
.setMaxRetryTimes(config.get(LOOKUP_MAX_RETRIES))
.setCacheMaxSize(config.get(LOOKUP_CACHE_MAX_ROWS))
.build();
}
}
4.3 ClickHouseDynamicTableSink.java
package com.glab.flink.connector.clickhouse.table;
import com.glab.flink.connector.clickhouse.table.internal.AbstractClickHouseSinkFunction;
import com.glab.flink.connector.clickhouse.table.internal.options.ClickHouseOptions;
import org.apache.flink.table.catalog.ResolvedSchema;
import org.apache.flink.table.connector.ChangelogMode;
import org.apache.flink.table.connector.sink.DynamicTableSink;
import org.apache.flink.table.connector.sink.SinkFunctionProvider;
import org.apache.flink.types.RowKind;
import org.apache.flink.util.Preconditions;
public class ClickHouseDynamicTableSink implements DynamicTableSink {
private final ResolvedSchema resolvedSchema;
private final ClickHouseOptions options;
public ClickHouseDynamicTableSink(ResolvedSchema resolvedSchema, ClickHouseOptions options) {
this.resolvedSchema = resolvedSchema;
this.options = options;
}
@Override
public ChangelogMode getChangelogMode(ChangelogMode requestedMode) {
validatePrimaryKey(requestedMode);
return ChangelogMode.newBuilder()
.addContainedKind(RowKind.INSERT)
.addContainedKind(RowKind.UPDATE_AFTER)
.addContainedKind(RowKind.DELETE)
.build();
}
private void validatePrimaryKey(ChangelogMode requestedMode) {
Preconditions.checkState((ChangelogMode.insertOnly().equals(requestedMode) || this.resolvedSchema.getPrimaryKey().isPresent()), "please declare primary key for sink table when query contains update/delete record.");
}
@Override
public SinkRuntimeProvider getSinkRuntimeProvider(Context context) {
AbstractClickHouseSinkFunction sinkFunction =
(new AbstractClickHouseSinkFunction.Builder())
.withOptions(this.options)
.withFieldNames(this.resolvedSchema.getColumnNames())
.withFieldDataTypes(this.resolvedSchema.getColumnDataTypes())
.withPrimaryKey(this.resolvedSchema.getPrimaryKey())
.withRowDataTypeInfo(context.createTypeInformation(this.resolvedSchema.toSinkRowDataType()))
.build();
return SinkFunctionProvider.of(sinkFunction);
}
@Override
public ClickHouseDynamicTableSink copy() {
return new ClickHouseDynamicTableSink(this.resolvedSchema, this.options);
}
@Override
public String asSummaryString() {
return "ClickHouse sink";
}
}
Task not serializable是Spark开发过程最令人头疼的问题之一,这里记录下出现这个问题的两个实例,一个是自己遇到的,另一个是stackoverflow上看到。等有时间了再仔细探究出现Task not serialiazable的各种原因以及出现问题后如何快速定位问题的所在,至少目前阶段碰到此类问题,没有什么章法
1.
package spark.exampl
mysql 查看当前正在执行的操作,即正在执行的sql语句的方法为:
show processlist 命令
mysql> show global status;可以列出MySQL服务器运行各种状态值,我个人较喜欢的用法是show status like '查询值%';一、慢查询mysql> show variab
1. 只有Map任务的Map Reduce Job
File System Counters
FILE: Number of bytes read=3629530
FILE: Number of bytes written=98312
FILE: Number of read operations=0
FILE: Number of lar
import java.util.LinkedList;
import java.util.List;
import ljn.help.*;
public class BTreeLowestParentOfTwoNodes {
public static void main(String[] args) {
/*
* node data is stored in
本文介绍Java API 中 Date, Calendar, TimeZone和DateFormat的使用,以及不同时区时间相互转化的方法和原理。
问题描述:
向处于不同时区的服务器发请求时需要考虑时区转换的问题。譬如,服务器位于东八区(北京时间,GMT+8:00),而身处东四区的用户想要查询当天的销售记录。则需把东四区的“今天”这个时间范围转换为服务器所在时区的时间范围。
入口脚本
入口脚本是应用启动流程中的第一环,一个应用(不管是网页应用还是控制台应用)只有一个入口脚本。终端用户的请求通过入口脚本实例化应用并将将请求转发到应用。
Web 应用的入口脚本必须放在终端用户能够访问的目录下,通常命名为 index.php,也可以使用 Web 服务器能定位到的其他名称。
控制台应用的入口脚本一般在应用根目录下命名为 yii(后缀为.php),该文