1.1 总体说明
Goldengate软件目录的属主为oracle:dba,权限为775;
1.2 添加环境变量
在oracle用户下增加环境变量:
源端:
export GG_HOME=/data/oggo
export PATH=$PATH:$GG_HOME
export LIBPATH=$GG_HOME:$ORACLE_HOME/lib
目标端bigdata:要求必须jdk1.7
export GG_HOME=/data/oggt
export PATH=$PATH:$GG_HOME
export
LD_LIBRARY_PATH=$GG_HOME:/usr/lib/jvm/java-1.7.0-openjdk-1.7.0.45.x86_64/jre/lib/amd64/server
Platform Environment variable
IBM AIX
LIBPATH
HP-UX
SHLIB_PATH
Sun Solaris
LD_LIBRARY_PATH
HP Tru64 (OSF/1)
LINUX
1.3 GG软件的安装(源和目标系统均需要安装)
1)
使用Oracle用户(如非此用户,请自行取代,下同)通过ftp上传GGS软件到安装目录下并解压缩:
Unzip fbo_ggs_Linux_x64_shiphome.zip
修改参数文件:
Cd fbo_ggs_Linux_x64_shiphome/Disk1/response
执行vi oggcore.rsp修改一下参数:
INSTALL_OPTION=ORA11g
SOFTWARE_LOCATION=/data/oggo
DATABASE_LOCATION=/usr/lib/oracle/u01/app/oracle
执行runInstaller开始安装:
./runInstaller -silent -nowait -responseFile /data/oggo/fbo_ggs_Linux_x64_shiphome/Disk1/response/oggcore.rsp
2)
在/goldengate目录下运行GGS命令行控制界面:
./ggsci
3)
在GGS命令行界面下创建子目录:
GGSCI> create subdirs
1.4 开启源端数据库附加日志
1)检查附加日志情况
使用以下sql语句检查数据库附加日志的打开状况:
Select
SUPPLEMENTAL_LOG_DATA_MIN
,SUPPLEMENTAL_LOG_DATA_PK
,SUPPLEMENTAL_LOG_DATA_UI
,SUPPLEMENTAL_LOG_DATA_FK
,SUPPLEMENTAL_LOG_DATA_ALL
, FORCE_LOGGING from v$database;
2)打开数据库附加日志
打开附加日志并切换日志(保证Online redo log和Archive log一致)
在mount状态下打开归档日志:
Alter database archivelog;
alter database add supplemental log data ;
或全部打开:
alter database add supplemental log data(primary key,unique) columns;
alter database force logging;
3)创建Goldengate用户
create tablespace ogg datafile ‘/data/oradata/orcl/ogg.dbf’ size 1G;
create user ogg identified by ogg default tablespace ogg;
GRANT execute on DBMS_SQL to ogg;
grant EXEMPT ACCESS POLICY to ogg;
grant create table to ogg;
grant create sequence to ogg;
alter user ogg quota unlimited on ogg;
GRANT CONNECT TO ogg;
GRANT RESOURCE TO ogg;
GRANT DBA TO ogg;
GRANT CREATE SESSION TO ogg;
GRANT ALTER SESSION TO ogg;
GRANT SELECT ANY DICTIONARY TO ogg;
GRANT SELECT ANY TABLE TO ogg;
GRANT FLASHBACK ANY TABLE TO ogg;
GRANT ALTER ANY TABLE TO ogg;
GRANT ALTER SYSTEM TO ogg;
GRANT SELECT on dba_clusters TO ogg;
GRANT EXECUTE ON utl_file TO ogg;
BEGIN
DBMS_GOLDENGATE_AUTH.GRANT_ADMIN_PRIVILEGE(
Grantee => 'OGG',
privilege_type => 'CAPTURE',
grant_select_privileges => TRUE,
do_grants => TRUE);
END;
/
4)打开表级附加日志
alter table ogg.test add supplemental log data(all) columns;
alter table SMODEL_JLJ.CM_PORT add supplemental log data(all) columns;
alter table SMODEL_JLJ.CM_LINK add supplemental log data(all) columns;
1.5 数据库11.2.0.4版本参数修改
从数据库11.2.0.4版本开始,需要在源和目标端数据库上同时修改ENABLE_GOLDENGATE_REPLICATION参数:
sqlplus / as sysdba
alter system set ENABLE_GOLDENGATE_REPLICATION=true scope=both;
1.6 配置GLOBALS
编辑GLOBALS:
EDIT PARAMS ./GLOBALS
GGSCHEMA ogg
1.7 配置定义文件
./ggsci
1.8 建立OGG进程和队列
(1)
MGR参数和进程(源和目标均需要配置)
cd $GG_HOME
./ggsci
ggsci>edit param mgr --源端
PORT 7809
DYNAMICPORTLIST 7840-7914
--AUTORESTART EXTRACT *, RETRIES 5, WAITMINUTES 3, RESETMINUTES 60
PURGEOLDEXTRACTS ./dirdat/*, usecheckpoints, minkeepdays 1
LAGREPORTHOURS 1
LAGINFOMINUTES 30
LAGCRITICALMINUTES 45
ggsci>edit param mgr --目标端
PORT 17809
DYNAMICPORTLIST 17810-17820
--AUTORESTART REPLICAT *, RETRIES 5, WAITMINUTES 3, RESETMINUTES 60
PURGEOLDEXTRACTS ./dirdat/*, usecheckpoints, minkeepdays 1
LAGREPORTHOURS 1
LAGINFOMINUTES 30
LAGCRITICALMINUTES 45
配置完成后,源和目标均启动:
ggsci> start mgr
(2)
源端配置extract和pump进程(根据用户实际环境填写)
配置服务器A捕获进程
cd $GG_HOME
./ggsci
ggsci>edit param ext_jl
具体ext_jl.prm内容如下:
EXTRACT ext_jl
SETENV (NLS_LANG="AMERICAN_AMERICA.ZHS16GBK")
USERID ogg, PASSWORD ogg
DISCARDFILE ./dirrpt/ext_jl.dsc, APPEND, MEGABYTES 100
TRANLOGOPTIONS dblogreader
DBOPTIONS ALLOWUNUSEDCOLUMN
WARNLONGTRANS 2h,CHECKINTERVAL 3m
FETCHOPTIONS NOUSESNAPSHOT
TRANLOGOPTIONS CONVERTUCS2CLOBS
--ddl include mapped
EXTTRAIL ./dirdat/jl
--TRANLOGOPTIONS ALTARCHIVELOGDEST PRIMARY INSTANCE oraXPAD1 /gg/sarch1, ALTARCHIVELOGDEST INSTANCE oraXPAD2 /gg/sarch2
WILDCARDRESOLVE DYNAMIC
dynamicresolution
GETUPDATEBEFORES
NOCOMPRESSUPDATES
NOCOMPRESSDELETES
gettruncates
TABLE SMODEL_JLJ.*, tokens (INDICATOR=@GETENV('GGHEADER','BEFOREAFTERINDICATOR'));
TABLE gg.tcustmer, tokens (INDICATOR=@GETENV('GGHEADER','BEFOREAFTERINDICATOR'));
配置传输进程
cd $GG_HOME
./ggsci
ggsci>edit param dpe_jl
具体dpe_jl.prm内容如下:
EXTRACT dpe_jl
PASSTHRU
RMTHOST 10.142.96.16, MGRPORT 17809
RMTTRAIL ./dirdat/jl
DYNAMICRESOLUTION
GETTRUNCATES
TABLE SMODEL_JLJ.*;
table gg.tcustmer;
增加ext和pump进程以及队列:
cd $GG_HOME
./ggsci
ADD EXTRACT ext_jl, TRANLOG, BEGIN NOW
ADD EXTTRAIL ./dirdat/jl, EXTRACT ext_jl, MEGABYTES 200
ADD EXTRACT dpe_jl, EXTTRAILSOURCE ./dirdat/jl
ADD RMTTRAIL ./dirdat/jl, EXTRACT dpe_jl, MEGABYTES 200
(3)
配置hdfs投递进程
创建hdfs目录:
su – hjpt
hadoop fs –mkdir /data/hjpt/itf/hqit/oss/jl
hadoop fs –ls /data/hjpt/itf/hqit/oss/jl
hadoop fs -chmod -R +w /data/hjpt/itf/hqit/oss/oss/jl
设置HDFS_HOME环境变量
./ggsci
ggsci>edit param hive_jl
具体 hive_jl.prm内容如下:
REPLICAT hive_jl
TARGETDB LIBFILE libggjava.so SET property=dirprm/hdfs_jl.props
GETUPDATEBEFORES
gettruncates
DISCARDFILE ./dirrpt/ hive_jl.dsc, purge
REPORTCOUNT EVERY 1 MINUTES, RATE
GROUPTRANSOPS 2000
MAXTRANSOPS 5000
MAP gg.*, TARGET gg.*;
MAP SMODEL_JLJ.*, TARGET SMODEL_JLJ.*;
配置hdfs_jl.props内容如下:
gg.handlerlist=hdfs
gg.handler.hdfs.type=hdfs
gg.handler.hdfs.includeTokens=true
gg.handler.hdfs.maxFileSize=256m
gg.handler.hdfs.rootFilePath=/data/hjpt/itf/hqit/oss/jl
gg.handler.hdfs.fileRollInterval=0
gg.handler.hdfs.inactivityRollInterval=0
gg.handler.hdfs.fileSuffix=.txt
gg.handler.hdfs.partitionByTable=true
gg.handler.hdfs.rollOnMetadataChange=true
gg.handler.hdfs.authType=none
gg.handler.hdfs.format=delimitedtext
gg.handler.hdfs.format.includeColumnNames=false
gg.handler.hdfs.mode=tx
#
gg.handler.hdfs.format.includeOpTimestamp=true
gg.handler.hdfs.format.includeCurrentTimestamp=true
gg.handler.hdfs.format.insertOpKey=I
gg.handler.hdfs.format.updateOpKey=U
gg.handler.hdfs.format.deleteOpKey=D
gg.handler.hdfs.format.truncateOpKey=T
gg.handler.hdfs.format.encoding=UTF-8
gg.handler.hdfs.format.fieldDelimiter=CDATA[\u0001]
#gg.handler.hdfs.format.fieldDelimiter='|'
gg.handler.hdfs.format.lineDelimiter=CDATA[\n]
gg.handler.hdfs.format.includeTableName=true
gg.handler.hdfs.format.keyValueDelimiter=CDATA[=]
#gg.handler.hdfs.format.kevValuePairDelimiter=CDATA[,]
gg.handler.hdfs.format.pkUpdateHandling=update
gg.handler.hdfs.format.nullValueRepresentation=
gg.handler.hdfs.format.missingValueRepresentation=CDATA[]
gg.handler.hdfs.format.includePosition=false
#
goldengate.userexit.timestamp=utc
goldengate.userexit.writers=javawriter
javawriter.stats.display=TRUE
javawriter.stats.full=TRUE
gg.log=log4j
gg.log.level=INFO
gg.report.time=30sec
gg.classpath=/etc/hadoop/conf.empty:/usr/lib/hadoop/*:/usr/lib/hadoop-hdfs/lib/*:/usr/lib/hadoop-hdfs/*:/usr/lib/hadoop/lib/*
javawriter.bootoptions=-Xmx8200m –Xms8192m -Djava.class.path=ggjava/ggjava.jar
增加目标端hive投递进程:
cd $GG_HOME
./ggsci
GGSCI> add replicat hive_jl, EXTTRAIL ./dirdat/jl