- 下载Datax
直接下载DataX工具包:DataX下载地址
- 解压并生成json模板:
tar -zxvf datax.tar.gz
[hdfs@192-168-16-71 datax]$ cd bin
[hdfs@192-168-16-71 bin]$ ll
total 40
-rwxrwxr-x 1 hdfs hdfs 8993 Nov 24 2017 datax.py
-rwxrwxr-x 1 hdfs hdfs 6906 Nov 24 2017 dxprof.py
-rwxrwxr-x 1 hdfs hdfs 16897 Nov 24 2017 perftrace.py
[hdfs@YZ-222-16-71 bin]$ python datax.py -r hbase11xreader -w txtfilewriter
DataX (DATAX-OPENSOURCE-3.0), From Alibaba !
Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.
Please refer to the hbase11xreader document:
https://github.com/alibaba/DataX/blob/master/hbase11xreader/doc/hbase11xreader.md
Please refer to the txtfilewriter document:
https://github.com/alibaba/DataX/blob/master/txtfilewriter/doc/txtfilewriter.md
Please save the following configuration as a json file and use
python {DATAX_HOME}/bin/datax.py {JSON_FILE_NAME}.json
to run the job.
{
"job": {
"content": [
{
"reader": {
"name": "hbase11xreader",
"parameter": {
"column": [],
"encoding": "",
"hbaseConfig": {},
"mode": "",
"range": {
"endRowkey": "",
"isBinaryRowkey": true,
"startRowkey": ""
},
"table": ""
}
},
"writer": {
"name": "txtfilewriter",
"parameter": {
"dateFormat": "",
"fieldDelimiter": "",
"fileName": "",
"path": "",
"writeMode": ""
}
}
}
],
"setting": {
"speed": {
"channel": ""
}
}
}
}
- 创建测试表:
hbase(main):046:0> create 'LXW','CF'
0 row(s) in 1.2120 seconds
=> Hbase::Table - LXW
hbase(main):047:0> put 'LXW','row1','CF:NAME','lxw'
0 row(s) in 0.0120 seconds
hbase(main):048:0> put 'LXW','row1','CF:AGE','18'
0 row(s) in 0.0080 seconds
hbase(main):049:0> put 'LXW','row1','CF:ADDRESS','BeijingYiZhuang'
0 row(s) in 0.0070 seconds
hbase(main):050:0> put 'LXW','row2','CF:ADDRESS','BeijingYiZhuang2'
0 row(s) in 0.0060 seconds
hbase(main):051:0> put 'LXW','row2','CF:AGE','18'
0 row(s) in 0.0050 seconds
hbase(main):052:0> put 'LXW','row2','CF:NAME','lxw2'
0 row(s) in 0.0040 seconds
hbase(main):053:0> exit
- 根据模板编写JSON文件:lxw.json
{
"job": {
"content": [
{
"reader": {
"name": "hbase11xreader",
"parameter": {
"hbaseConfig": {
"hbase.zookeeper.quorum": "192.168.16.70:2181"
},
"table": "LXW",
"encoding": "utf-8",
"mode": "normal",
"column": [
{
"name":"rowkey",
"type":"string"
},
{
"name":"CF:NAME",
"type":"string"
},
{
"name":"CF:AGE",
"type":"string"
},
{
"name":"CF:ADDRESS",
"type":"string"
}
],
"range": {
"endRowkey": "",
"isBinaryRowkey": false,
"startRowkey": ""
}
}
},
"writer": {
"name": "txtfilewriter",
"parameter": {
"dateFormat": "yyyy-MM-dd",
"fieldDelimiter": "\t",
"fileName": "LXW",
"path": "/export/test_datax/",
"writeMode": "truncate"
}
}
}
],
"setting": {
"speed": {
"channel": 5
}
}
}
}
- 执行抽取命令:
[hdfs@192-168-16-71 bin]$ python datax.py ./lxw.json
#########输出日志太长,这里就粘上最后一段信息
2018-11-07 15:50:05.846 [job-0] INFO StandAloneJobContainerCommunicator - Total 2 records, 50 bytes | Speed 5B/s, 0 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.000s | All Task WaitReaderTime 0.049s | Percentage 100.00%
2018-11-07 15:50:05.847 [job-0] INFO JobContainer -
任务启动时刻 : 2018-11-07 15:49:54
任务结束时刻 : 2018-11-07 15:50:05
任务总计耗时 : 10s
任务平均流量 : 5B/s
记录写入速度 : 0rec/s
读出记录总数 : 2
读写失败总数 : 0
- 查看导出结果:
[root@192-168-16-71 test_datax]# cat LXW__36bd388f_0969_49ba_9757_3d8978ab2ac0
row1 lxw 18 BeijingYiZhuang
row2 lxw2 18 BeijingYiZhuang2
- 当指定rowkey范围
"range": {
"endRowkey": "row2",
"isBinaryRowkey": false,
"startRowkey": "row1"
}
- 其执行结果:
[root@192-168-16-71 test_datax]# cat LXW__0aabbffc_65c6_4e0d_a173_414d469a18e2
row1 lxw 18 BeijingYiZhuang