1. dataX 说明
DataX 是阿里巴巴集团内被广泛使用的离线数据同步工具/平台,实现包括 MySQL、Oracle、SqlServer、Postgre、HDFS、Hive、ADS、HBase、OTS、ODPS 等各种异构数据源之间高效的数据同步功能。
2. 下载
使用git
git clone [email protected]:alibaba/DataX.git
使用wget
wget http://datax-opensource.oss-c...
3. 安装
> git 源码编译
参考:https://github.com/alibaba/DataX/wiki/compile-datax
> wget 安装
参考 https://github.com/alibaba/DataX/wiki/Quick-Start
下载后解压至本地某个目录,修改权限为755,进入bin目录,即可运行样例同步作业:
$ tar zxvf datax.tar.gz
$ sudo chmod -R 755 {YOUR_DATAX_HOME}
$ cd {YOUR_DATAX_HOME}/bin
$ python datax.py ../job/job.json
4. 配置
5 job json 文件配置
nysql数据同步到odps(mysq2odsp.json):
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "szy",
"password": "yP0tfUTHvP5q",
"column": ["uid","date_time"],
"connection": [
{
"table": [
"datax_user"
],
"jdbcUrl": [
"jdbc:mysql://127.0.0.1:3306/ztjy"
]
}
]
}
},
"writer": {
"name": "odpswriter",
"parameter": {
"accessId": "06xxZ11yge6LekDx",
"accessKey": "PMbyrM2DZC3QIu586BTyGQCBS5l3nt",
"column": ["uid","date_time"],
"odpsServer": "http://service.odps.aliyun.com/api",
"partition": "",
"project": "odps_testproject001",
"table": "datax_test_user01",
"truncate": true
}
}
}
],
"setting": {
"speed": {
"channel": 5
}
}
}
}
odps同步到mysql(odsp2mysq.json):
{
"job": {
"content": [
{
"reader": {
"name": "odpsreader",
"parameter": {
"accessId": "LTAIRFDnrN660HWe",
"accessKey": "T0mFczTQwziIItyySdZ9QMMvANEgc6",
"project": "xmszysample",
"table": "stat_user_tag",
"column": [
"user_id",
"province",
"city",
"county",
"birthday",
"baby_id",
"baby_birthday",
"model"
],
"packageAuthorizedProject": "xmszysample",
"splitMode": "record",
"odpsServer": "http://service.odps.aliyun.com/api"
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "insert",
"username": "szy",
"password": "yP0tfUTHvP5q",
"column": [
"user_id",
"province",
"city",
"county",
"birthday",
"baby_id",
"baby_birthday",
"model"
],
"session": [
"set session sql_mode='ANSI'"
],
"preSql": [
"truncate log_stat_user_tag"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://172.16.200.17:3306/ztjy_xm?useUnicode=true&characterEncoding=utf8",
"table": [
"log_stat_user_tag"
]
}
]
}
}
}
],
"setting": {
"speed": {
"channel": 5
}
}
}
}
5. 执行测试
mysql同步到odps python datax.py /job/mysql2odps.json
odps同步到mysql python datax.py /job/odps2mysql.json