Databus Relays主要负责以下两个工作:
目前支持Oracle和Mysql两种数据源的抓取。
抓取Oracle数据是通过给源表添加一个触发器,在新增和修改的时候记录SCN号作为查询的依据,通过relay定期的查询获取变化的数据。删除和查询不受影响。
首次部署databus,主要有以下几个步骤(非首次部署,只需要从步骤3开始):
创建databus表空间、用户、赋权限
-- 创建databus表空间,databus用户,并给databus付权限(参见createUser.sql),注意需要指定datafile,如果表空间名字修改了,则需要修改tablespace文件
create
tablespace TBS_DATABUS datafile
'${DBDIR}/tbs_databus_01.dbf'
size
50M reuse autoextend
on
next
50M maxsize unlimited extent management
local
uniform
size
2M;
create
tablespace TBS_DATABUS_IDX datafile
'${DBDIR}/tbs_databus_idx_01.dbf'
size
50M reuse autoextend
on
next
50M maxsize unlimited extent management
local
uniform
size
2M;
create
user
DATABUS identified
by
DATABUS
default
tablespace TBS_DATABUS
temporary
tablespace temp1;
grant
create
session,
create
table
,
create
view
,
create
sequence
,
create
procedure
,
create
trigger
,
create
type,
create
job
to
DATABUS;
grant
query rewrite
to
DATABUS;
grant
execute
on
dbms_alert
to
DATABUS;
grant
execute
on
sys.dbms_lock
to
DATABUS;
grant
select
on
sys.v_$
database
to
DATABUS;
grant
execute
on
sys.dbms_aq
to
DATABUS;
grant
execute
on
sys.dbms_aqadm
to
DATABUS;
grant
execute
on
sys.dbms_aqin
to
DATABUS;
grant
execute
on
sys.dbms_aq_bqview
to
DATABUS;
alter
user
DATABUS quota unlimited
on
TBS_DATABUS;
alter
user
DATABUS quota unlimited
on
TBS_DATABUS_IDX;
|
创建databus核心包、表、存储过程等必须的东西,使用databus用户,执行oracle/bin/createSchema.sh databus/databus@posp_boss ${Dir}/BOOK
#
#
# Copyright
2013
LinkedIn Corp. All rights reserved
#
# Licensed under the Apache License, Version
2.0
(the
"License"
);
# you may not use
this
file except in compliance with the License.
# You may obtain a copy of the License at
#
# http:
//www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
#
"AS IS"
BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License
for
the
# specific language governing permissions and limitations
# under the License.
#
#
usage() {
echo
"Usage: $0
}
# Database Connection Info
DB=$
1
# Log Directory
SRC_VIEW_DIR=$
2
# Check that DB is set
if
[
"x$DB"
=
"x"
]
then
usage
exit
1
fi
user=`echo $DB | perl -lane
'{my $a = $_; $a =~ s/([^\/]*)\/.*/$1/; print $a;}'
`;
password=`echo $DB | perl -lane
'{my $a = $_; $a =~ s/[^\/]*\/([^@]*)\@.*/$1/; print $a;}'
`;
sid=`echo $DB | perl -lane
'{my $a = $_; $a =~ s/[^\/]*\/[^@]*\@(.*)/$1/; print $a;}'
`;
if
[
"x$user"
=
"x"
] || [
"x$password"
=
"x"
] || [
"x$sid"
=
"x"
]
then
echo
"First argument not in format
usage
exit
1
fi
if
[ ! -d
"$SRC_VIEW_DIR"
]
then
echo
"Src view directory not found or not a directory"
usage
exit
1
fi
if
[ ! -f
"$SRC_VIEW_DIR/tablespace"
]
then
echo
"Tablespace file ($SRC_VIEW_DIR/tablespace) not provided"
usage
exit
1
fi
tbs=`cat $SRC_VIEW_DIR/tablespace`;
tbs_uc=`echo $tbs | tr
'[A-Z]'
'[a-z]'
`
tbs_lc=`echo $tbs | tr
'[a-z]'
'[A-Z]'
`
echo
"User : $user"
echo
"Password : $password"
echo
"SID : $sid"
echo
"DB : $DB"
echo
"TBS : $tbs_uc"
#
# create sequenes,tables
#
../fwk/schema/ddl_sqs.sh
"$DB"
../fwk/schema/ddl_tab.sh
"$DB"
"$tbs_uc"
# add column txn
#
for
t in ${SRC_VIEW_DIR}/*\.tab;
#
do
# echo
"Processing Table definition File : $t"
# table=`echo $t | perl -lane
'{ $a = $_; $a =~ s/.*\/(.*)\.tab/$1/; print $a; }'
`
# echo
"Creating Table $table. File is $t"
# echo
"sqlplus ${DB} "
@$t
" ${sid} ${user} ${password} ${tbs_lc}"
# sqlplus ${DB} << __EOF__
#
@$t
${sid} ${user} ${password} ${tbs_lc}
#__EOF__
#done
#
# create contains/indexes
#
../fwk/schema/ddl_con.sh
"$DB"
"$tbs_uc"
../fwk/schema/ddl_ind.sh
"$DB"
"$tbs_uc"
#
# crete procedure/packages/functions
#
for
t in ${SRC_VIEW_DIR}/*\.tab;
do
table=`echo $t | perl -lane
'{ $a = $_; $a =~ s/.*\/(.*)\.tab/$1/; print $a; }'
`
echo
"Setting up Alerts for $table. File is $t"
../fwk/schema/ddl_prc.sh
"$DB"
"$table"
done
#
# create databus views
#
../fwk/schema/ddl_vw.sh
"$DB"
#
# create business table's view
#
#
for
t in ${SRC_VIEW_DIR}/*\.view;
#
do
# echo
"Processing View definition File : $t"
# view=`echo $t | perl -lane
'{ $a = $_; $a =~ s/.*\/(.*)\.view/$1/; print $a; }'
`
# echo
"Creating view sy\$$view"
# pattern=
"sy\\$"
;
# pattern+=
"${view}"
;
# wc=`grep -ic
"${pattern}"
$t`
#
if
[ $wc -lt
1
];
# then
# echo
"View names should start with sy\$. Offending view file ($t)"
;
# exit
1
# fi
# sqlplus ${DB} << __EOF__
#
@$t
${sid} ${user} ${password} ${tbs_lc};
#__EOF__
#done
#
# create sync_alert
#
## execute rest
for
t in ${SRC_VIEW_DIR}/*\.tab;
do
table=`echo $t | perl -lane
'{ $a = $_; $a =~ s/.*\/(.*)\.tab/$1/; print $a; }'
`
echo
"Creating trigger for source $table"
../fwk/schema/ddl_prc1.sh
"$DB"
"$table"
done
#
# create business table's trigger
#
#
for
t in ${SRC_VIEW_DIR}/*\.tab;
#
do
# table=`echo $t | perl -lane
'{ $a = $_; $a =~ s/.*\/(.*)\.tab/$1/; print $a; }'
`
# echo
"Creating trigger for source $table"
# ../fwk/schema/ddl_trg.sh
"$DB"
"$table"
#done
echo
"INFO: compiling types"
sqlplus $DB << __EOF__
exec compile_allobjects;
__EOF__
echo
"INFO: Sync_core seeding "
sqlplus $DB << __EOF__
insert into sync_core_settings values (
'N'
);
commit;
show errors;
__EOF__
#
# sources setting
#
#echo
"INFO: sy\$SOURCES setup"
#i=
0
;
#
for
t in ${SRC_VIEW_DIR}/*\.tab;
#
do
# echo
"Processing Table definition File : $t"
# table=`echo $t | perl -lane
'{ $a = $_; $a =~ s/.*\/(.*)\.tab/$1/; print $a; }'
`
# echo
"Creating Table $table"
# sqlplus $DB << __EOF__
# insert into sy\$sources values(
'$table'
,$i);
#__EOF__
# ((i++))
#done
|
修改表结构,增加一列 TXN NUMBER(posp_boss)
-- 创建表,添加TXN列
create
table
posp_boss.person
(
id number
primary
key
,
first_name
varchar
(120)
not
null
,
last_name
varchar
(120)
not
null
,
birth_date
date
,
deleted
varchar
(5)
default
'false'
not
null
,
txn number
);
|
将源表权限赋给databus(posp_boss)
grant
insert
,
update
,
select
on
posp_boss.PERSON
to
databus;
|
给posp_boss赋databus.sync_core包的执行权限(databus)
grant
execute
on
databus.sync_core
to
posp_boss;
|
创建索引(posp_boss)
-- 创建索引(posp_boss)
create
index
posp_boss.PERSON_txn
on
POSP_BOSS.PERSON(txn) tablespace index_data;
|
创建表视图,注意一定要把TXN列包括进去,并且要把ID映射为KEY
-- 主键ID 映射为 KEY
CREATE
OR
REPLACE
FORCE
VIEW
sy$person
AS
SELECT
txn,
id
key
,
first_name,
last_name,
birth_date,
deleted
FROM
posp_boss.person;
|
新增sy$sources表配置,注意value的值必须小于等于125
-- 注意sourceName区分大小写,对应到上面触发器里面的sync_core.getTxn('POSP_BOSS.PERSON')的POSP_BOSS.PERSON
insert
into
databus.sy$sources
values
(
'POSP_BOSS.PERSON'
,1);
|
创建触发器
-- 注意和sy$sources插入的值一致
CREATE TRIGGER PERSON_TRG
before insert or update on POSP_BOSS.PERSON
referencing old as old
new
as
new
for
each row
begin
if
(updating and :
new
.txn <
0
) then
:
new
.txn := -:
new
.txn;
else
:
new
.txn := databus.sync_core.getTxn(
'POSP_BOSS.PERSON'
);
end
if
;
end;
|
至此,针对于Oracle的数据抓取数据端的配置就全部配置完毕了。
Mysql的数据抓取比较简单
设置binlog日志格式为ROW,默认是STATEMENT。binlog_format = ROW ,只有ROW模式才会记录受影响的行数,Databus默认只获取影响行数的事件
server-id =
1
log_bin = mysql-bin
expire_logs_days =
10
max_binlog_size = 100M
binlog_format = ROW
|
配置数据源,注意sources的id必须与sy$sources中的value一致
{
"name"
:
"boss"
,
"id"
:
1
,
"uri"
:
"mysql://repl%2F123456@localhost:3306/1/mysql-bin"
,
"slowSourceQueryThreshold"
:
2000
,
"sources"
:
[
{
"id"
:
1
,
"name"
:
"com.linkedin.events.example.person.Person"
,
"uri"
:
"lijiang.person"
,
"partitionFunction"
:
"constant:1"
}
]
}
|
uri的格式为:mysql://用户%2F密码@host:port/serverID/binlog文件名称
另外需要注意sources里对应数据源的uri,必需带上数据库名称,格式为 db.table
配置relay sources,sources的id必须与sy$sources的value一致。注意oracle和mysql的配置是不一样的。
# oracle
{
"name"
:
"person"
,
"id"
:
1
,
"slowSourceQueryThreshold"
:
2000
,
"sources"
:
[
{
"id"
:
1
,
"name"
:
"com.linkedin.events.example.person.Person"
,
"uri"
:
"lijiang.person"
,
"partitionFunction"
:
"constant:1"
}
]
}
# mysql
{
"name"
:
"boss"
,
"id"
:
1
,
"uri"
:
"mysql://repl%2F123456@localhost:3306/1/mysql-bin"
,
"slowSourceQueryThreshold"
:
2000
,
"sources"
:
[
{
"id"
:
1
,
"name"
:
"com.linkedin.events.example.person.Person"
,
"uri"
:
"lijiang.person"
,
"partitionFunction"
:
"constant:1"
}
]
}
|
添加 avro 配置文件至schemas_registry文件夹中,关于avro的详细结束参见Apache Avro
{
"name"
:
"Person_V1"
,
"doc"
:
"Auto-generated Avro schema for sy$person. Generated at Dec 04, 2012 05:07:05 PM PST"
,
"type"
:
"record"
,
"meta"
:
"dbFieldName=sy$person;pk=key;"
,
"namespace"
:
"com.linkedin.events.example.person"
,
"fields"
: [ {
"name"
:
"txn"
,
"type"
: [
"long"
,
"null"
],
"meta"
:
"dbFieldName=TXN;dbFieldPosition=0;"
}, {
"name"
:
"key"
,
"type"
: [
"long"
,
"null"
],
"meta"
:
"dbFieldName=KEY;dbFieldPosition=1;"
}, {
"name"
:
"firstName"
,
"type"
: [
"string"
,
"null"
],
"meta"
:
"dbFieldName=FIRST_NAME;dbFieldPosition=2;"
}, {
"name"
:
"lastName"
,
"type"
: [
"string"
,
"null"
],
"meta"
:
"dbFieldName=LAST_NAME;dbFieldPosition=3;"
}, {
"name"
:
"birthDate"
,
"type"
: [
"long"
,
"null"
],
"meta"
:
"dbFieldName=BIRTH_DATE;dbFieldPosition=4;"
}, {
"name"
:
"deleted"
,
"type"
: [
"string"
,
"null"
],
"meta"
:
"dbFieldName=DELETED;dbFieldPosition=5;"
} ]
}
|
启动relay
./bin/startup.sh relay
|
至此,Relay和数据库都已经配置和部署完成!