下载地址:http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz
下载完成以后 放置服务器,解压可直接使用
tar -zxvf datax.tar.gz
--创建测试hive表
create table if not exists adm_test (
bizdate string comment '业务日期'
,name string comment '姓名'
) partitioned by (dt string);
--由于是测试直接插入一条数据
insert into adm_test partition (dt =20200629)
select '20200629' bizdate
,'张三' name;
--这里要注意下mysql和hive的字段类型不一样
create table if not exists adm_test (
bizdate varchar(32)
,name varchar(32)
) ;
DataX 内部类型 | Mysql 数据类型 |
---|---|
Long | int, tinyint, smallint, mediumint, int, bigint, year |
Double | float, double, decimal |
String | varchar, char, tinytext, text, mediumtext, longtext |
Date | date, datetime, timestamp, time |
Boolean | bit, bool |
Bytes | tinyblob, mediumblob, blob, longblob, varbinary |
文件位置:
vim /alidata1/admin/datax/conf/adm_test.json
##这里提供两种配置文件,hadoop ha和hadoop单节点
{
"job": {
"setting": {
"speed": {
"channel": 1
}
},
"content": [{
"reader": {
"name": "hdfsreader",
"parameter": {
"path": "/user/hive/warehouse/test.db/adm_test/dt=${bizdate} 文件存放位置有分区一定要指定到分区里面",
"defaultFS": "hdfs://端口号或者域名/",
"hadoopConfig": {
"dfs.nameservices": "随便给个名字:nameser",
"dfs.ha.namenodes.nameser": "cdh0,cdh2",
"dfs.namenode.rpc-address.nameser.cdh0": "cdh0:8020",
"dfs.namenode.rpc-address.nameser.cdh2": "cdh2:8020",
"dfs.client.failover.proxy.provider.nameser": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
},
"partition": [
"dt='${bizdate}'"
],
"column": [{
"index": 0,
"type": "string"
}, {
"index": 1,
"type": "string"
}
],
"fileType": "text",
"encoding": "UTF-8",
"fieldDelimiter": "\u0001"
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"column": [
"bizdate",
"name"
],
"preSql": [
"delete from adm_test where bizdate = '${bizdate}' ; "
],
"writeMode": "update",
"username": "****",
"password": "****",
"connection": [{
"jdbcUrl": "jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=utf8",
"table": [
"adm_test"
]
}]
}
}
}]
}
}
##单节点
{
"job": {
"content": [{
"writer": {
"parameter": {
"username": "****",
"writeMode": "insert",
"column": [
"bizdate_date",
"name"
],
"connection": [{
"table": [
"adm_test"
],
"jdbcUrl": "jdbc:localhost:3306/test?useUnicode=true&characterEncoding=utf8"
}],
"preSql": [
"delete from adm_test where bizdate_date = '${bizdate}' ; "
],
"password": "****"
},
"name": "mysqlwriter"
},
"reader": {
"parameter": {
"encoding": "UTF-8",
"column": [{
"index": 0,
"type": "string"
},
{
"index": 1,
"type": "string"
}
],
"fieldDelimiter": "\u0001",
"fileType": "text",
"partition": [
"dt='${bizdate}'"
],
"path": "/user/hive/warehouse/test.db/adm_test/dt=${bizdate}",
"defaultFS": "hdfs://cdh0:8020"
},
"name": "hdfsreader"
}
}],
"setting": {
"speed": {
"channel": 1
}
}
}
}
执行命令
/alidata1/admin/datax/bin/datax.py -p “-Dbizdate=20200629” /alidata1/admin/datax/conf/adm_test.json