create table T_YYBZB_TGH_BANKINFO
(
id int(8),
bank_id int(8),
bank_name varchar(200)
);
insert into T_YYBZB_TGH_BANKINFO (ID, BANK_ID, BANK_NAME)values (11, 11, '工商银行(广州)');
create table ods.ods_t_yybzb_tgh_bankinfo_di
(
id int,
bank_id int,
bank_name string
);
mysql_username=root
mysql_password=123456
mysql_ip=192.168.6.102
mysql_port=3306
mysql_sid=source
hadoop102_ip=192.168.6.102
hadoop102_port=8020
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"connection": [
{
"jdbcUrl": [
"jdbc:mysql://$ip:$port/$sid"],
"querySql": ["select id,bank_id,bank_name from T_YYBZB_TGH_BANKINFO"],
}
],
"password": "$password",
"username": "$username"
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"defaultFS": "hdfs://$hdfs_ip:$hdfs_port",
"fileType": "text",
"path": "/user/hive/warehouse/ods.db/ods_t_yybzb_tgh_bankinfo_di",
"fileName": "ods_t_yybzb_tgh_bankinfo_di",
"column": [
{"name":"id","type":"int"},
{"name":"bank_id","type":"int"},
{"name":"bank_name","type":"string"}
],
"writeMode": "append",
"fieldDelimiter": "\t",
"encoding": "utf-8"
}
}
}],
"setting": {
"speed": {
"channel": "1"
}
}
}
}
#!/bin/bash
#配置文件路径:
config_file=../db_conf/db.conf
#源系统名称
src_system=mysql
#目标系统名称
tag_system=hadoop102
export in_username=`grep -w ${src_system}_username ${config_file} |awk -F '=' '{print $2}'`
export in_password=`grep -w ${src_system}_password ${config_file} |awk -F '=' '{print $2}'`
export in_ip=`grep -w ${src_system}_ip ${config_file} |awk -F '=' '{print $2}'`
export in_port=`grep -w ${src_system}_port ${config_file} |awk -F '=' '{print $2}'`
export in_sid=`grep -w ${src_system}_sid ${config_file} |awk -F '=' '{print $2}'`
export in_hdfs_ip=`grep -w ${tag_system}_ip ${config_file} |awk -F '=' '{print $2}'`
export in_hdfs_port=`grep -w ${tag_system}_port ${config_file} |awk -F '=' '{print $2}'`
pre_day=`date -d -1day +%Y%m%d`
pre_day_mon=`date -d -1day +%Y%m`
echo ${in_username}
echo ${in_password}
echo ${in_ip}
echo ${in_port}
echo ${in_sid}
echo ${in_hdfs_ip}
echo ${in_hdfs_port}
echo ${pre_day}
echo ${pre_day_mon}
# 全量导入:
hive -e "truncate table test_hive.stu;"
# nsrun_workgroup.json
python ../../datax.py -p"-Dusername=$in_username -Dpassword=$in_password -Dip=$in_ip -Dport=$in_port -Dsid=$in_sid -Dhdfs_ip=$in_hdfs_ip -Dhdfs_port=$in_hdfs_port" ../json_conf/bank_name.json
(1)创建主文件夹datax_text
(2)创建三个子文件夹
sh_start
json_conf
db_conf
(3)将脚本分被上传至对应文件夹中
① start.sh上传至sh_start
② bank_name.json上传至json_conf
③db.conf上传至db_conf文件夹
(1)创建工作流
(2)配置sql+shell控件节点
truncate table ods.ods_t_yybzb_tgh_bankinfo_di
(3)保存shell节点作为工作流
(4)调度运行
(5)查看运行状态
(1)JSON脚本编写
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"connection": [
{
"jdbcUrl": [
"jdbc:mysql://${ip}:${port}/${sid}?useSSL=false"],
"querySql": ["select id,bank_id,bank_name from T_YYBZB_TGH_BANKINFO"],
}
],
"password": "${password}",
"username": "${username}"
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"defaultFS": "hdfs://${hdfs_ip}:${hdfs_port}",
"fileType": "text",
"path": "/user/hive/warehouse/ods.db/ods_t_yybzb_tgh_bankinfo_di",
"fileName": "ods_t_yybzb_tgh_bankinfo_di",
"column": [
{"name":"id","type":"int"},
{"name":"bank_id","type":"int"},
{"name":"bank_name","type":"string"}
],
"writeMode": "append",
"fieldDelimiter": "\t",
"encoding": "utf-8"
}
}
}],
"setting": {
"speed": {
"channel": "1"
}
}
}
}
(2)组件部署
truncate table ods.ods_t_yybzb_tgh_bankinfo_di
(4)上线运行
(5)查看运行结果
[INFO] 2021-12-07 16:04:20.697 - [taskAppId=TASK-10-23-85]:[115] - create dir success /tmp/dolphinscheduler/exec/process/4/10/23/85
[INFO] 2021-12-07 16:04:20.750 - [taskAppId=TASK-10-23-85]:[112] - sql task params {"postStatements":[],"connParams":"","receiversCc":"","udfs":"","type":"HIVE","title":"","sql":"truncate table ods.ods_t_yybzb_tgh_bankinfo_di","preStatements":[],"sqlType":"1","sendEmail":false,"receivers":"","datasource":7,"displayRows":10,"limit":10000,"showType":"TABLE","localParams":[]}
[INFO] 2021-12-07 16:04:20.755 - [taskAppId=TASK-10-23-85]:[128] - Full sql parameters: SqlParameters{type='HIVE', datasource=7, sql='truncate table ods.ods_t_yybzb_tgh_bankinfo_di', sqlType=1, sendEmail=false, displayRows=10, limit=10000, udfs='', showType='TABLE', connParams='', title='', receivers='', receiversCc='', preStatements=[], postStatements=[]}
[INFO] 2021-12-07 16:04:20.755 - [taskAppId=TASK-10-23-85]:[129] - sql type : HIVE, datasource : 7, sql : truncate table ods.ods_t_yybzb_tgh_bankinfo_di , localParams : [],udfs : ,showType : TABLE,connParams : , query max result limit : 10000
[INFO] 2021-12-07 16:04:20.765 - [taskAppId=TASK-10-23-85]:[549] - after replace sql , preparing : truncate table ods.ods_t_yybzb_tgh_bankinfo_di
[INFO] 2021-12-07 16:04:20.765 - [taskAppId=TASK-10-23-85]:[558] - Sql Params are replaced sql , parameters:
[INFO] 2021-12-07 16:04:20.767 - [taskAppId=TASK-10-23-85]:[52] - can't find udf function resource
[INFO] 2021-12-07 16:04:20.974 - [taskAppId=TASK-10-23-85]:[458] - prepare statement replace sql : org.apache.hive.jdbc.HivePreparedStatement@43ecab0e
2021-12-07 16:04:36.123 [job-0] INFO JobContainer - PerfTrace not enable!
2021-12-07 16:04:36.123 [job-0] INFO StandAloneJobContainerCommunicator - Total 15 records, 134 bytes | Speed 13B/s, 1 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.000s | All Task WaitReaderTime 0.000s | Percentage 100.00%
2021-12-07 16:04:36.124 [job-0] INFO JobContainer -
任务启动时刻 : 2021-12-07 16:04:25
任务结束时刻 : 2021-12-07 16:04:36
任务总计耗时 : 11s
任务平均流量 : 13B/s
记录写入速度 : 1rec/s
读出记录总数 : 15
读写失败总数 : 0
(1)查看运行日志
(2)任务下线
(3)调整任务
(3)重新上线并运行
(1)问题如下
(2)问题原因
masterserver服务挂调
(3)重启服务
./stop-all.sh
./start-all.sh
(1)hive的多用户配置
hive本身不创建用户,用户就是linux的用户,
(2)打开hive的server服务
./hiveserver2
beeline
# 2、连接hive2
!connect jdbc:hive2://192.168.6.102:10000
# 3、输入用户名和密码
Enter username for jdbc:hive2://192.168.6.102:10000: atguigu
Enter password for jdbc:hive2://192.168.6.102:10000: ******
(3)海豚调度器创建数据源
(1)问题现象
(2)问题原因
# 1、检查hiveserver是否启动
netstat -anp |grep 10000
(3)解决方式
启动hiveserver2
current cpu load average 0.35 is too high or available memory 0.14G is too low, under max.cpuload.avg=16.0 and reserved.memory=0.3G
[WARN] 2021-12-06 17:17:45.031 org.apache.dolphinscheduler.server.master.dispatch.host.LowerWeightHostManager:[159] - worker 192.168.6.102:1234 current cpu load average 0.42 is too high or available memory 0.14G is too low
[WARN] 2021-12-06 17:17:45.032 org.apache.dolphinscheduler.server.master.dispatch.host.LowerWeightHostManager:[159] - worker 192.168.6.102:1234 current cpu load average 0.42 is too high or available memory 0.14G is too low
[WARN] 2021-12-06 17:17:50.032 org.apache.dolphinscheduler.server.master.dispatch.host.LowerWeightHostManager:[159] - worker 192.168.6.102:1234 current cpu load average 0.42 is too high or available memory 0.14G is too low
[WARN] 2021-12-06 17:17:50.032 org.apache.dolphinscheduler.server.master.dispatch.host.LowerWeightHostManager:[159] - worker 192.168.6.102:1234 current cpu load average 0.42 is too high or available memory 0.14G is too low
[WARN] 2021-12-06 17:17:51.317 org.apache.dolphinscheduler.server.registry.HeartBeatTask:[80] - current cpu load average 0.3 is too high or available memory 0.14G is too low, under max.cpuload.avg=16.0 and reserved.memory=0.3G
花个一亿提升电脑配置
(1)问题现象
(2)问题原因
(3)解决方案
(2)问题原因
未找到,待解决,吐槽日志…
(3)方案结果
放弃shell调度datax的方式
(1)问题现象
(2)问题原因
(3)解决方案
dolphinscheduler_env.sh
#JAVA_HOME
export JAVA_HOME=/opt/module/jdk1.8.0_212
export PATH=$PATH:$JAVA_HOME/bin
#HADOOP_HOME
export HADOOP_HOME=/opt/module/hadoop-3.1.3
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
#HIVE_HOME
export HIVE_HOME=/opt/module/hive
export PATH=$PATH:$HIVE_HOME/bin
#KAFKA_HOME
export KAFKA_HOME=/opt/module/kafka
export PATH=$PATH:$KAFKA_HOME/bin
#HBASE_HOME
export HBASE_HOME=/opt/module/hbase
export PATH=$PATH:$HBASE_HOME/bin
#FLINK_HOME
export FLINK_HOME=/opt/module/flink-1.10.1
export PATH=$PATH:$FLINK_HOME/bin
#SPARK_HOME
export SPARK_HOME=/opt/module/spark-3.1.2
export PATH=$PATH:$SPARK_HOME/bin
export PATN=$PATH:$SPARK_HOME/sbin
#DATAX_HOME
export DATAX_HOME=/opt/module/datax
export PATH=$PATH:$DATAX_HOME/bin
xsync dolphinscheduler_env.sh
(1)问题现状
(2)问题原因
(3)解决方案
(1)问题现状
java.lang.Exception: DataX无法连接对应的数据库,可能原因是:1) 配置的ip/port/database/jdbc错误,无法连接。2) 配置的username/password错误,鉴权失败。请和DBA确认该数据库的连接信息是否正确。
(2)问题原因
(3)解决方案
在jdbc的连接配置中加上useSSL=false
"jdbcUrl": ["jdbc:mysql://$ip:$port/$sid?useSSL=false"],