版本:Apache Flink 1.11.2 for Scala 2.11 下载地址:
https://mirror.bit.edu.cn/apache/flink/flink-1.11.2/flink-1.11.2-bin-scala_2.11.tgz下载地址官网有变动,我直接贴最全的flink 下载地址,可以自行在上面下载 :
Index of /dist/flink
Hive 2.1.1
参考官方Example链接:Apache Flink 1.11 Documentation: HiveCatalog
datanucleus.schema.autoCreateTables
true
hbase.zookeeper.quorum
192.168.2.3:2181
hive.downloaded.resources.dir
/data/emr/hive/tmp/${hive.session.id}_resources
hive.exec.local.scratchdir
/data/emr/hive/tmp
hive.hwi.listen.host
0.0.0.0
hive.hwi.listen.port
7002
hive.llap.daemon.output.service.port
7009
hive.llap.daemon.rpc.port
7007
hive.llap.daemon.web.port
7008
hive.llap.daemon.yarn.shuffle.port
7006
hive.llap.management.rpc.port
7005
hive.metastore.db.encoding
UTF-8
hive.metastore.port
7004
hive.metastore.schema.verification
false
hive.metastore.schema.verification.record.version
false
hive.metastore.warehouse.dir
/usr/hive/warehouse
hive.querylog.location
/data/emr/hive/tmp
hive.security.authenticator.manager
org.apache.hadoop.hive.ql.security.SessionStateUserAuthenticator
hive.security.authorization.createtable.owner.grants
ALL
hive.security.authorization.enabled
true
hive.security.authorization.manager
org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory
hive.server2.enable.doAs
true
hive.server2.logging.operation.log.location
/data/emr/hive/tmp/operation_logs
hive.server2.support.dynamic.service.discovery
false
hive.server2.thrift.bind.host
192.168.2.3
hive.server2.thrift.http.port
7000
hive.server2.thrift.port
7001
hive.server2.webui.host
0.0.0.0
hive.server2.webui.port
7003
hive.users.in.admin.role
hadoop
hive.zookeeper.quorum
192.168.2.3:2181
io.compression.codec.lzo.class
com.hadoop.compression.lzo.LzoCodec
io.compression.codecs
org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.SnappyCodec
javax.jdo.option.ConnectionDriverName
com.mysql.jdbc.Driver
javax.jdo.option.ConnectionPassword
123456
javax.jdo.option.ConnectionURL
jdbc:mysql://192.168.14.23:3306/hivemetastore?useSSL=false&createDatabaseIfNotExist=true&characterEncoding=UTF-8
javax.jdo.option.ConnectionUserName
hive
hive.exec.post.hooks
org.apache.atlas.hive.hook.HiveHook,org.apache.hadoop.hive.ql.hooks.LineageLogger
hive.metastore.uris
thrift://192.168.2.3:7004
cd /usr/local/service/flink-1.11.2/conf
vim sql-client-defaults.yaml
最终sql-client-default.yaml文件内容,修改的时候主要注意一下格式,把''[]'' 去掉(catalogs:[] # empty list)
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
# This file defines the default environment for Flink's SQL Client.
# Defaults might be overwritten by a session specific environment.
# See the Table API & SQL documentation for details about supported properties.
#==============================================================================
# Tables
#==============================================================================
# Define tables here such as sources, sinks, views, or temporal tables.
tables: [] # empty list
# A typical table source definition looks like:
# - name: ...
# type: source-table
# connector: ...
# format: ...
# schema: ...
# A typical view definition looks like:
# - name: ...
# type: view
# query: "SELECT ..."
# A typical temporal table definition looks like:
# - name: ...
# type: temporal-table
# history-table: ...
# time-attribute: ...
# primary-key: ...
#==============================================================================
# User-defined functions
#==============================================================================
# Define scalar, aggregate, or table functions here.
functions: [] # empty list
# A typical function definition looks like:
# - name: ...
# from: class
# class: ...
# constructor: ...
#==============================================================================
# Catalogs
#==============================================================================
# Define catalogs here.
catalogs: # empty list
# A typical catalog definition looks like:
- name: myhive
type: hive
hive-conf-dir: /usr/local/service/hive/conf
# default-database: ...
#==============================================================================
# Modules
#==============================================================================
# Define modules here.
#modules: # note the following modules will be of the order they are specified
# - name: core
# type: core
#==============================================================================
# Execution properties
#==============================================================================
# Properties that change the fundamental execution behavior of a table program.
execution:
# select the implementation responsible for planning table programs
# possible values are 'blink' (used by default) or 'old'
planner: blink
# 'batch' or 'streaming' execution
type: streaming
# allow 'event-time' or only 'processing-time' in sources
time-characteristic: event-time
# interval in ms for emitting periodic watermarks
periodic-watermarks-interval: 200
# 'changelog', 'table' or 'tableau' presentation of results
result-mode: table
# maximum number of maintained rows in 'table' presentation of results
max-table-result-rows: 1000000
# parallelism of the program
parallelism: 1
# maximum parallelism
max-parallelism: 128
# minimum idle state retention in ms
min-idle-state-retention: 0
# maximum idle state retention in ms
max-idle-state-retention: 0
# current catalog ('default_catalog' by default)
# current-catalog: default_catalog
current-catalog: myhive
# current database of the current catalog (default database of the catalog by default)
#current-database: default_database
current-database: default
# controls how table programs are restarted in case of a failures
restart-strategy:
# strategy type
# possible values are "fixed-delay", "failure-rate", "none", or "fallback" (default)
type: fallback
#==============================================================================
# Configuration options
#==============================================================================
# Configuration options for adjusting and tuning table programs.
# A full list of options and their default values can be found
# on the dedicated "Configuration" web page.
# A configuration can look like:
# configuration:
# table.exec.spill-compression.enabled: true
# table.exec.spill-compression.block-size: 128kb
# table.optimizer.join-reorder-enabled: true
#==============================================================================
# Deployment properties
#==============================================================================
# Properties that describe the cluster to which table programs are submitted to.
deployment:
# general cluster communication timeout in ms
response-timeout: 5000
# (optional) address from cluster to gateway
gateway-address: ""
# (optional) port from cluster to gateway
gateway-port: 0
cd /usr/local/service/flink-1.11.2
mkdir sqlLibs
flink lib下的包如下:
#新建topic
./kafka-topics.sh --zookeeper localhost:2181 --topic test_12 --partitions 1 --replication-factor 1 --create
#写入数据
./kafka-console-producer.sh --broker-list localhost:9092 --topic test_12
>tom,15
>jhon,21
#验证 是否写入,消费
./kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic test_12 --from-beginning
cd /usr/local/service/flink-1.11.2
./sql-client.sh embedded -d ../conf/sql-client-defaults.yaml -l ../sqlLibs
CREATE TABLE mykafka_t (name String, age Int) WITH (
'connector.type' = 'kafka',
'connector.version' = 'universal',
'connector.topic' = 'test_12',
'connector.properties.bootstrap.servers' = '119.29.23.123:9092',
'format.type' = 'csv',
'update-mode' = 'append'
);
select * from mykafka_t;
写数据进去kafka
/usr/local/service/kafka/bin/kafka-console-producer.sh --broker-list localhost:9092 --topic test_12
>tom,15
>john,21
>sam,14
flink sql结果展示
error1:
Flink SQL> select * from mykafka_t;
[ERROR] Could not execute SQL statement. Reason:
org.apache.flink.table.api.NoMatchingTableFactoryException: Could not find a suitable table factory for 'org.apache.flink.table.factories.TableSourceFactory' in
the classpath.
Reason: Required context properties mismatch.
The matching candidates:
org.apache.flink.table.sources.CsvAppendTableSourceFactory
Mismatched properties:
'connector.type' expects 'filesystem', but is 'kafka'
The following properties are requested:
connector.properties.bootstrap.servers=119.29.23.123:9092
connector.topic=test_12
connector.type=kafka
connector.version=universal
format.type=csv
schema.0.data-type=VARCHAR(2147483647)
schema.0.name=name
schema.1.data-type=INT
schema.1.name=age
update-mode=append
The following factories have been considered:
org.apache.flink.table.sources.CsvBatchTableSourceFactory
org.apache.flink.table.sources.CsvAppendTableSourceFactory
org.apache.flink.table.filesystem.FileSystemTableFactory
solve: 缺少flink-connector-kafka_2.11-1.11.2.jar , flink-connector-kafka-base_2.11-1.11.2.jar ,flink-csv-1.11.2.jar
error2:
Flink SQL> select * from source_kafka4;
[ERROR] Could not execute SQL statement. Reason:
java.lang.ClassNotFoundException: org.apache.kafka.common.serialization.ByteArrayDeserialize
solve:缺少kafka-clients-2.4.1.jar