最近有需求将数据从mysql同步到oracle,之前有使用kettle将表从oracle同步到mysql,这里使用的插件依然是阿里的dataX
这里也是根据阿里云的一篇帖子来进行验证
https://yq.aliyun.com/articles/715393?spm=a2c4e.11155472.0.0.3aef4dc1IWo28F
1 环境需要
JDK(1.8以上,推荐1.8)
Python(推荐Python2.6.X)
Apache Maven 3.x (Compile DataX)
2 #服务器配置
[root@yhq ~]#yum search java|grep jdk
[root@yhq ~]#yum install java-1.8.0-openjdk.x86_64 -y
[root@yhq ~]#java -version
openjdk version "1.8.0_242"OpenJDK Runtime Environment (build1.8.0_242-b07)
OpenJDK64-Bit Server VM (build 25.242-b07, mixed mode)
[root@yhq ~]#python --version
Python 2.6.6下载地址:https://maven.apache.org/download.cgi
[root@yhq soft]#ls apache-maven-3.6.3-bin.tar.gz
apache-maven-3.6.3-bin.tar.gz
[root@yhq soft]#tar -zxvf apache-maven-3.6.3-bin.tar.gz
[root@yhq soft]#mkdir /opt/maven
[root@yhq soft]#mv apache-maven-3.6.3/* /opt/maven/
[root@yhq soft]#ln -s /opt/maven/b
bin/ boot/[root@yhq soft]#ln -s /opt/maven/bin/mvn /usr/bin/mvn
[root@yhq soft]#vim /etc/profile.d/maven.sh
export M2_HOME=/opt/maven
export PATH=${M2_HOME}/bin:${PATH}[root@yhq soft]#mvn -v
Apache Maven 3.6.3(cecedd343002696d0abb50b32b541b8a6ba2883f)
Maven home:/opt/maven
Java version:1.8.0_242, vendor: Oracle Corporation, runtime: /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.242.b07-1.el6_10.x86_64/jreDefault locale: en_US, platform encoding: UTF-8OS name:"linux", version: "2.6.32-754.el6.x86_64", arch: "amd64", family: "unix"
#安装datax
[root@yhq soft]#tar -zxvf datax.tar.gz
[root@yhq soft]#ls datax
bin conf job lib plugin script tmp#自检测试脚本
[root@yhq soft]#python datax/bin/datax.py datax/job/job.json
DataX (DATAX-OPENSOURCE-3.0), From Alibaba !Copyright (C)2010-2017, Alibaba Group. All Rights Reserved.2020-02-09 23:27:35.717 [main] INFO VMInfo - VMInfo#operatingSystem class => sun.management.OperatingSystemImpl
2020-02-09 23:27:35.763 [main] INFO Engine - the machine info =>osInfo: Oracle Corporation1.8 25.242-b07
jvmInfo: Linux amd642.6.32-754.el6.x86_64
cpu num:2
2020-02-09 23:27:46.974 [job-0] INFO JobContainer - PerfTrace not enable!
2020-02-09 23:27:46.975 [job-0] INFO StandAloneJobContainerCommunicator - Total 100000 records, 2600000 bytes | Speed 253.91KB/s, 10000 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.020s | All Task WaitReaderTime 0.121s | Percentage 100.00%
2020-02-09 23:27:46.976 [job-0] INFO JobContainer -任务启动时刻 :2020-02-09 23:27:35任务结束时刻 :2020-02-09 23:27:46任务总计耗时 : 10s
任务平均流量 :253.91KB/s
记录写入速度 : 10000rec/s
读出记录总数 :100000读写失败总数 :0
#测试使用
mysql 表
CREATE TABLE `datax_test` (
`id` int(11) NOT NULL,
`name` varchar(20) DEFAULTNULL,
`birthday` dateDEFAULTNULL,
`memo` varchar(32) DEFAULTNULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;#插入10w数据
#在oracle里面可以先创建好表结构,或者用kettle同步表
create table datax_test
(id number,
name varchar2(20),
birthday date,
memo varchar2(32));
#创建配置文件
#可以通过命令查看配置模板: python datax.py -r {YOUR_READER} -w {YOUR_WRITER}
#python datax.py -r streamreader -w streamwriter
[root@yhq job]#vim job1.json
{"job": {"setting": {"speed": {"channel": 5}
},"content": [
{"reader": {"name": "mysqlreader","parameter": {"username": "system","password": "****","column": ["*"],"connection": [
{"table": ["datax_test"],"jdbcUrl": ["jdbc:mysql://ip:3306/vision"]
}
]
}
},"writer": {"name": "oraclewriter","parameter": {"username": "vision","password": "***","column": ["*"],"connection": [
{"jdbcUrl": "jdbc:oracle:thin:@ip:1521:bol","table": ["datax_test"]
}
]
}
}
}
]
}
}
#启动数据同步
[root@yhq job]#python /data/soft/datax/bin/datax.py job1.json
DataX (DATAX-OPENSOURCE-3.0), From Alibaba !Copyright (C)2010-2017, Alibaba Group. All Rights Reserved.2020-02-10 00:12:03.796 [main] INFO VMInfo - VMInfo#operatingSystem class => sun.management.OperatingSystemImpl
2020-02-10 00:12:03.807 [main] INFO Engine - the machine info =>osInfo: Oracle Corporation1.8 25.242-b07
jvmInfo: Linux amd642.6.32-754.el6.x86_64
cpu num:2totalPhysicalMemory:-0.00G
freePhysicalMemory:-0.00G
maxFileDescriptorCount:-1currentOpenFileDescriptorCount:-1GC Names [PS MarkSweep, PS Scavenge]
MEMORY_NAME| allocation_size |init_size
PS Eden Space| 256.00MB | 256.00MB
Code Cache| 240.00MB | 2.44MB
Compressed Class Space| 1,024.00MB | 0.00MB
PS Survivor Space| 42.50MB | 42.50MB
PS Old Gen| 683.00MB | 683.00MB
Metaspace| -0.00MB | 0.00MB2020-02-10 00:12:03.869 [main] INFO Engine -
2020-02-10 00:13:48.061 [job-0] INFO JobContainer - PerfTrace not enable!
2020-02-10 00:13:48.061 [job-0] INFO StandAloneJobContainerCommunicator - Total 99999 records, 1888875 bytes | Speed 21.96KB/s, 1190 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 42.323s | All Task WaitReaderTime 29.424s | Percentage 100.00%
2020-02-10 00:13:48.069 [job-0] INFO JobContainer -任务启动时刻 :2020-02-10 00:12:05任务结束时刻 :2020-02-10 00:13:48任务总计耗时 : 102s
任务平均流量 :21.96KB/s
记录写入速度 : 1190rec/s
读出记录总数 :99999读写失败总数 :0
#在oracle查看数据
select count(*) from datax_test; #同步正常
#剩下的增量同步,在crontab中定时任务就行。
$ crontab -e
#会进入已有crontab文件编辑界面,继续增加定时任务即可,本示例增加以下内容,并保存
0,10,20,30,40,50 * * * * python /data/soft/datax/bin/datax.py /data/soft/datax/job/mysql2odps.json >>/tmp/log.`date +\%Y\%m\%d\%H\%M\%S` 2>&1
测试环境
#测试环境
==mysql db 5.7.27 centos 6.10create database vision;==oracle db 11.2.0.4 centos 6.10select file#,name from v$datafile;
create tablespace vision
logging
datafile '/u01/app/oracle/oradata/bol/vision01.dbf'
size 100m
autoextend on
next 500m;
create user vision identified by ****defaulttablespace vision ;
grant connect,resource,dba to vision;==delimiter//create procedure insrt_t ()begindeclare i intdefault 1;
declare j intdefault 1;while i < 100000 do
#insert into test values (i,'a');
insert into datax_test
(id,name,birthday,memo)
VALUES
(i,'yhq',now(),'123');
set i=i+1;
set j=j+1;if j=5000then
set j=1;
commit;end if;end while;end
//delimiter ;