vertica 实时消费kafka 例子

1.提前在vertica种建立好相关对接kafka json数据的表

CREATE TABLE public.res_time_space
(
    record_id varchar(255) NOT NULL,
    object_id varchar(255),
    object_type int NOT NULL,
    snap_time int,
    channel_id varchar(80),
    device_id varchar(80),
    device_name varchar(80),
    tollgate_id varchar(80),
    longitude float,
    latitude float,
    image_url varchar(255),
    close_image_url varchar(255),
    distant_image_url varchar(255),
    direction int,
    speed float,
    lane int,
    event_id varchar(128),
    event_type varchar(64),
    ods_time int,
    address varchar(255),
    date_time date NOT NULL,
    s2_13 int,
    s2_15 int,
    s2_17 int,
    s2_19 int,
    type_date int NOT NULL,
    hour_time int
)
PARTITION BY (res_time_space.type_date);

res_time_space_kafka_vertica.sh

# 安装kafka插件(初始化)
#su - dbadmin
#cd /opt/vertica/packages/kafka/ddl
#vsql -U dbadmin -w dbadmin -d viid -f install.sql
#-----------------------------------------------------------------
config_schema=res_time_space
dbhost=192.168.108.64
dbusername=dbadmin
dbpassword=dbadmin
frame_duration='00:00:10'
eof_timeout_ms=3000
operator=dbadmin
kafka_hosts=192.168.108.126:9092
kafka_topic=res_time_space
kakfa_partitions=1
target_schema=public
target_table=res_time_space
# Vertica 8.1.0+
kafka_config=" --config-schema ${config_schema} --dbhost ${dbhost} --username ${dbusername} --password ${dbpassword}"
 
# shutdown instance
/opt/vertica/packages/kafka/bin/vkconfig shutdown --instance-name ${config_schema}_instance ${kafka_config}
echo "Shutdown Instance Complete!"
# truncate table
#$VSQL <<- EOF
#drop schema ${config_schema}_instance cascade;
#truncate table public.stranger_event;
#EOF
 
# Create and Configure Scheduler
/opt/vertica/packages/kafka/bin/vkconfig scheduler --create --add ${kafka_config} --frame-duration ${frame_duration} --eof-timeout-ms ${eof_timeout_ms} --operator ${operator}
echo "Create and Configure Scheduler Complete!"
 
# Create a Cluster
/opt/vertica/packages/kafka/bin/vkconfig cluster --create --cluster ${config_schema}_cluster --hosts ${kafka_hosts} ${kafka_config}
echo "Create Cluster Complete!"
 
# Create a Data Table
 
# Create a Source
/opt/vertica/packages/kafka/bin/vkconfig source --create --source ${kafka_topic}  --cluster ${config_schema}_cluster --partitions ${kakfa_partitions} ${kafka_config}
echo "Create Kafka Source Complete!"
 
# Create a Target
/opt/vertica/packages/kafka/bin/vkconfig target --create --target-schema ${target_schema} --target-table ${target_table} ${kafka_config}
echo "Create Target Complete!"
 
# Create a Load-Spec
/opt/vertica/packages/kafka/bin/vkconfig load-spec --create --load-spec load_spec_${config_schema} --parser KafkaJSONParser --parser-parameters flatten_arrays=False,flatten_maps=False ${kafka_config}
#/opt/vertica/packages/kafka/bin/vkconfig load-spec --create --load-spec load_date_dimension_spec --parser KafkaJSONParser --filters "FILTER KafkaInsertDelimiters(delimiter=E'\n')" ${kafka_config}
 
echo "Create Load-Spec Complete!"
 
# Create a Microbatch
/opt/vertica/packages/kafka/bin/vkconfig microbatch --create --microbatch ${kafka_topic} --target-schema ${target_schema} --target-table ${target_table} --rejection-schema ${target_schema} --rejection-table ${target_table}_rej --load-spec load_spec_${config_schema} --add-source ${kafka_topic} --add-source-cluster ${config_schema}_cluster ${kafka_config}
echo "Create Microbatch Complete!"
 
# Launch the Scheduler
/opt/vertica/packages/kafka/bin/vkconfig launch --instance-name load_spec_${config_schema} ${kafka_config} &
echo "Launch the Scheduler Complete!"
echo "Done!"
 

 

-- 资源池操作
CREATE RESOURCE POOL custom_pool MEMORYSIZE '10%' PLANNEDCONCURRENCY 1 QUEUETIMEOUT 0;

SELECT name, memorysize, maxmemorysize FROM V_CATALOG.RESOURCE_POOLS;
SELECT name, memorysize, maxmemorysize, priority, maxconcurrency
    FROM V_CATALOG.RESOURCE_POOLS where is_internal ='t';
   --  PRIORITY值越大优先级越高,可以存着负值
ALTER RESOURCE POOL custom_pool PRIORITY 5;
  
DROP RESOURCE POOL custom_pool;

完整版:

# 安装kafka插件(初始化)
#su - dbadmin
#cd /opt/vertica/packages/kafka/ddl
#vsql -U dbadmin -w dbadmin -d viid -f install.sql
#-----------------------------------------------------------------
scheduler_name=res_time_space
dbhost=192.168.108.64
dbusername=dbadmin
dbpassword=dbadmin
frame_duration='00:00:10'
# 如果COPY命令在eof-timeout-ms间隔内未收到任何消息,则Vertica会通过结束该COPY语句来进行响应
eof_timeout_ms=3000
operator=dbadmin
kafka_hosts=192.168.108.126:9092
kafka_topic=res_time_space
kakfa_partitions=1
kafka_consumer_group_id=res_time_space_group
target_schema=public
target_table=res_time_space
# Vertica 8.1.0
kafka_config=" --config-schema ${scheduler_name} --dbhost ${dbhost} --username ${dbusername} --password ${dbpassword}"
 
# shutdown instance
/opt/vertica/packages/kafka/bin/vkconfig shutdown --instance-name load_spec_${scheduler_name} ${kafka_config}
echo "Shutdown Instance Complete!"
# truncate table
#$VSQL <<- EOF
#drop schema ${scheduler_name}_instance cascade;
#truncate table public.stranger_event;
#EOF
# 创建专用资源池 
# 将PLANNEDCONCURRENCY设置为略高于主题数是一个好习惯。较高的PLANNEDCONCURRENCY为调度程序自己的内部进程分配资源,从而防止它们影响数据负载。
# EXECUTIONPARALLELISM 假设您的调度程序正在读取的主题有100个分区。然后,您可以将EXECUTIONPARALLELISM设置为10,以便每个线程将从10个分区中读取
# 将QUEUETIMEOUT参数设置为0。值0允许连续加载数据
/opt/vertica/bin/vsql -U dbadmin -w dbadmin -d viid -c "CREATE RESOURCE POOL res_time_space_pool MEMORYSIZE '10%' PLANNEDCONCURRENCY 2 QUEUETIMEOUT 0 EXECUTIONPARALLELISM 10;"
# Create and Configure Scheduler 调度程序本身
# 指定消费者组 以通过其他方式监听vertica的消费情况
# $ ./kafka-consumer-groups.sh --describe --group res_time_space_group \
#   --bootstrap-server 192.168.108.126:9092
/opt/vertica/packages/kafka/bin/vkconfig scheduler --create --add ${kafka_config} --frame-duration ${frame_duration} --eof-timeout-ms ${eof_timeout_ms} --operator ${operator} --resource-pool res_time_space_pool --consumer-group-id ${kafka_consumer_group_id} --message_max_bytes 25165824
echo "Create and Configure Scheduler Complete!"
 
# Create a Cluster 集群
/opt/vertica/packages/kafka/bin/vkconfig cluster --create --cluster ${scheduler_name}_cluster --hosts ${kafka_hosts} ${kafka_config}
echo "Create Cluster Complete!"
 
# Create a Data Table
 
# Create a Source 来源
/opt/vertica/packages/kafka/bin/vkconfig source --create --source ${kafka_topic}  --cluster ${scheduler_name}_cluster --partitions ${kakfa_partitions} ${kafka_config}
echo "Create Kafka Source Complete!"
 
# Create a Target 目标
#避免在目标表中包含具有主键限制的列。如果调度程序遇到行的值违反此限制的情况,它将停止加载数据。
#如果必须有一个受主键限制的列,请尝试在调度程序加载该流数据之前过滤掉该数据流中该列的所有冗余值
/opt/vertica/packages/kafka/bin/vkconfig target --create --target-schema ${target_schema} --target-table ${target_table} ${kafka_config}
echo "Create Target Complete!"
 
# Create a Load-Spec 加载规范
# 将一个字符串行转为多列值
#COPY kafka_iot SOURCE KafkaSource(stream='iot-data|0|-2,iot-data|1|-2', 
#                                     brokers='kafka01:9092', 
#                                     stop_on_eof=True) 
#                  FILTER KafkaInsertDelimiters(delimiter = E'\n') 
#                  DELIMITER ',';
/opt/vertica/packages/kafka/bin/vkconfig load-spec --create --load-spec load_spec_${scheduler_name} --parser KafkaJSONParser --parser-parameters flatten_arrays=False,flatten_maps=False ${kafka_config}
#/opt/vertica/packages/kafka/bin/vkconfig load-spec --create --load-spec load_date_dimension_spec --parser KafkaJSONParser --filters "FILTER KafkaInsertDelimiters(delimiter=E'\n')" ${kafka_config}
 
echo "Create Load-Spec Complete!"
 
# Create a Microbatch 微批处理
# --offset partition_1_offset[,partition_2_offset,...] 您可以使用此选项跳过源中的某些消息或重新加载以前阅读的消息。
# 在设置微批处理的偏移量之前,请使用shutdown实用程序关闭调度程序。
# --rejection-schema schema_name 用于存储拒绝的消息的表。
# 从表中查看消费详情 SELECT *FROM res_time_space.stream_microbatch_history
/opt/vertica/packages/kafka/bin/vkconfig microbatch --create --microbatch ${kafka_topic} --target-schema ${target_schema} --target-table ${target_table} --rejection-schema ${target_schema} --rejection-table ${target_table}_rej --load-spec load_spec_${scheduler_name} --add-source ${kafka_topic} --add-source-cluster ${scheduler_name}_cluster ${kafka_config}
echo "Create Microbatch Complete!"
 
# Launch the Scheduler 微批调度
/opt/vertica/packages/kafka/bin/vkconfig launch --instance-name load_spec_${scheduler_name} ${kafka_config} &
echo "Launch the Scheduler Complete!"
echo "Done!"
 

更新当前设置 :重新从某个偏移量开始消费

scheduler_name=res_time_space
dbhost=192.168.108.64
dbusername=dbadmin
dbpassword=dbadmin
frame_duration='00:00:10'
# 如果COPY命令在eof-timeout-ms间隔内未收到任何消息,则Vertica会通过结束该COPY语句来进行响应
eof_timeout_ms=3000
operator=dbadmin
kafka_hosts=192.168.108.126:9092
kafka_topic=res_time_space
kakfa_partitions=1
kafka_consumer_group_id=res_time_space_group
target_schema=public
target_table=res_time_space
# Vertica 8.1.0
kafka_config=" --config-schema ${scheduler_name} --dbhost ${dbhost} --username ${dbusername} --password ${dbpassword}"


/opt/vertica/packages/kafka/bin/vkconfig shutdown --instance-name load_spec_${scheduler_name} ${kafka_config}
echo "stop the Scheduler Complete!"

# SELECT max(start_offset)-2 as cur_start_offset FROM res_time_space.stream_microbatch_history
# 重设偏移量 offset=cur_start_offset-N条
/opt/vertica/packages/kafka/bin/vkconfig microbatch --update --microbatch ${kafka_topic} --offset 12650 --partition 0 ${kafka_config}
echo "update Microbatch Complete!"

/opt/vertica/packages/kafka/bin/vkconfig launch --instance-name load_spec_${scheduler_name} ${kafka_config} &
echo "Launch the Scheduler Complete!"
echo "Done!"

 

你可能感兴趣的:(vertica)