mysql2mysql.sh
[root@azkaban testDB]# cat mysql2mysql.sh
#获取本次执行开始时间
incre_stime=`date -d "today" +"%Y-%m-%d %H:%M:%S"`
#获取源表信息
source_table_info="192.168.108.140:3306-jiakesong-person"
#获取目的表信息
destination_table_info="192.168.108.140:3306-jiakesong-person_copy"
# 获取增量字段
incre_column='id'
#连接mysql
temp=(`mysql -h 192.168.108.140 -umysql -pmysql -e"use jiakesong;
SELECT max(id) FROM person_copy;"`)
#>/home/test1/testDB/incre_mmysql.txt
declare -i incre_column_value=${temp[1]}
echo "历史最大id:$incre_column_value"
#获取增量条数
temp1=(`mysql -h 192.168.108.140 -umysql -pmysql -e"use jiakesong;
SELECT count(*)num FROM person where id>${incre_column_value};"`)
declare -i incre_num=${temp1[1]}
echo "增量条数:${incre_num}"
#得到本次执行结束后的最大增量值
declare -i curMax=$incre_column_value+$incre_num
echo "本次执行结束后的最大增量值:$curMax"
#declare -i maxid=`sed -n '2p' incre_mmysql.txt`
#配置相关参数并执行增量导数据
python /opt/datax/bin/datax.py -p"\
-Dchannel=4 \
-Drname=mysqlreader \
-DrjdbcUrl='jdbc:mysql://192.168.108.140:3306/jiakesong' \
-DrquerySql='select id,name,age from person where id>${incre_column_value};' \
-Drusername=mysql \
-Drpassword=mysql \
-Dwname=mysqlwriter \
-Dwcolumns='id\",\"name\",\"age' \
-DwjdbcUrl='jdbc:mysql://192.168.108.140:3306/jiakesong' \
-Dwtable=person_copy \
-Dwusername=mysql \
-Dwpassword=mysql \
-DwriteMode=insert" ./mysql2mysql.json >./err1.txt 2>&1
#过滤错误日志
grep -w "可能原因是" err1.txt>./err.txt
rm -rf ./err1.txt
#如果err.txt大小不为0byte,则表示执行失败
if [ -s err.txt ];then
#if [ $err != "" ];then
status=failed
# reason=echo `cat err.txt`
reason=`grep -w "Exception" err.txt`
curMax=$incre_column_value
incre_num=0
else
status=succeed
reason=""
fi
echo "status: ${status},reason:${reason} "
#获取本次执行结束时间
incre_etime=`date -d "today" +"%Y-%m-%d %H:%M:%S"`
#将执行日志及增量信息存入数据库增量表中
sqlTxt="\""
item_name="test_incre_exportData_mysqltomysql"
mysql -h 192.168.108.140 -umysql -pmysql -e"set names utf8;use jiakesong;
insert into incre_record(item_name,source_table,destination_table,incre_column,incre_column_value,status,reason,incre_num,incre_stime,incre_etime)
values($sqlTxt$item_name$sqlTxt,$sqlTxt$source_table_info$sqlTxt,$sqlTxt$destination_table_info$sqlTxt,$sqlTxt$incre_column$sqlTxt,$sqlTxt$curMax$sqlTxt,$sqlTxt$status$sqlTxt,$sqlTxt$reason$sqlTxt,$sqlTxt$incre_num$sqlTxt,$sqlTxt$incre_stime$sqlTxt,$sqlTxt$incre_etime$sqlTxt);"
#删除临时错误文件
rm -rf ./err.txt
mysal2mysql.json
[root@azkaban testDB]# cat mysql2mysql.json
{
"job": {
"content": [
{
"reader": {
"name": "$rname",
"parameter": {
"column": [],
"connection": [
{
"jdbcUrl": ["$rjdbcUrl"],
"querySql": [
"$rquerySql"
]
}
],
"password": "$rpassword",
"username": "$rusername",
"where": ""
}
},
"writer": {
"name": "$wname",
"parameter": {
"column": ["$wcolumns"],
"connection": [
{
"jdbcUrl": "$wjdbcUrl",
"table": ["$wtable"]
}
],
"password": "$wpassword",
"preSql": [],
"session": [],
"username": "$wusername",
"writeMode": "$writeMode"
}
}
}
],
"setting": {
"speed": {
"channel": "4"
}
}
}
}
[root@gpmaster01 etl]# find / -name datax.py
执行:python /home/etl/datax/bin/datax.py ./hive2pg.json
[root@gpmaster01 etl]# cat hive2pg.json
{
"job": {
"setting": {
"speed": {
"byte":1048576,
"channel":"4"
}
},
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"path": "/user/hive/warehouse/student/student.txt",
"defaultFS": "hdfs://cdhmaster01.unisinsight.com:8020",
"column": [
{
"index": 0,
"type": "long"
},
{
"index": 1,
"type": "string"
},
{
"index": 2,
"type": "long"
},
{
"index": 3,
"type": "date"
}
],
"fileType": "text",
"encoding": "UTF-8",
"fieldDelimiter": ","
}
},
"writer": {
"name": "postgresqlwriter",
"parameter": {
"print": true,
"encoding": "UTF-8",
"username": "postgres",
"password": "passwd",
"column": [
"id","name","age","recordtime"
],
"connection": [
{
"jdbcUrl": "jdbc:postgresql://192.168.111.64:5432/dumptest",
"table": ["public.student"]
}
]
}
}
}
]
}
}