2019独角兽企业重金招聘Python工程师标准>>>
DataX是一个在异构的数据库/文件系统之间高速交换数据的工具,实现了在任意的数据处理系统(RDBMS/Hdfs/Local filesystem)之间的数据交换
datax产品说明
http://code.taobao.org/p/datax/wiki/DataX%E4%BA%A7%E5%93%81%E8%AF%B4%E6%98%8E/
从oracle读取数据打印到控制台
//启动脚本
#!/bin/bash
source ~/.bashrc
python /home/hadoop/ceshi/datax/bin/datax.py /home/hadoop/test/jobJson/test2.json
//任务json
{
"job": {
"setting": {
"speed": {
"channel": 5
}
},
"content": [
{
"reader": {
"name": "oraclereader",
"parameter": {
"username": "****",
"password": "****",
"where": "",
"connection": [
{
"querySql": [
"select callingtel,calledtel from trecord where calledtel <= 100 group by callingtel,calledtel"
],
"jdbcUrl": [
"jdbc:oracle:thin:@192.168.140.30:1521:TEST"
]
}
]
}
},
"writer": {
"name": "streamwriter",
"parameter": {
"visible": true,
"encoding": "UTF-8"
}
}
}
]
}
}
从oracle导出数据到csv文件(用于neo4j数据导入)
//任务json; 启动命令与以上类似 【需要注意channel的不同?】
{
"job": {
"content": [
{
"reader": {
"name": "oraclereader",
"parameter": {
"connection": [
{
"querySql": [
"SELECT CALLINGTEL AS START_ID, (CASE DATATYPE WHEN 0 THEN 'voice'WHEN 3 THEN 'sms'ELSE ''END ) calltype, (BEGINTIME - TO_DATE ('1970-01-01', 'yyyy-mm-dd') ) * 24 * 60 * 60 * 1000 AS BeginTime, ((BEGINTIME - TO_DATE ('1970-01-01', 'yyyy-mm-dd') ) * 24 * 60 * 60 * 1000 ) + (SPAN * 1000) AS EndTime, SPAN AS Span, CALLEDTEL AS END_ID, (CASE DATATYPE WHEN 0 THEN 'voice'WHEN 3 THEN 'sms'ELSE ''END ) TYPE FROM TRECORD WHERE CALLINGTEL != CALLEDTEL AND CALLINGTEL IS NOT NULL AND CALLEDTEL IS NOT NULL"
],
"jdbcUrl": [
"jdbc:oracle:thin:@10.1.140.30:1521:TEST"
]
}
],
"password": "test",
"username": "test"
}
},
"writer": {
"name": "txtfilewriter",
"parameter": {
"path": "/home/hadoop/test/data/",
"fileName": "rel",
"fileType": "csv",
"fieldDelimiter": ",",
"writeMode": "append"
}
}
}
],
"setting": {
"speed": {
"channel": "10"
}
}
}
}
fz用oracle导出数据到hdfs
//querySql模式,parameter-column可以不用指定
{
"job": {
"content": [
{
"reader": {
"name": "oraclereader",
"parameter": {
"connection": [
{
"querySql": [
"select callingtel,calledtel from trecord where to_char(rectime,'yyyy-mm-dd')=to_char(sysdate - 1,'yyyy-mm-dd') group by callingtel,calledtel"
],
"jdbcUrl": [
"jdbc:oracle:thin:@192.168.140.30:1521:TEST"
]
}
],
"password": "****",
"username": "****"
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"column": [
{
"name": "callingtel",
"type": "INT"
},
{
"name": "calledtel",
"type": "INT"
}
],
"compress": "",
"defaultFS": "hdfs://192.168.140.11:9000",
"fieldDelimiter": " ",
"fileName": "trecord",
"fileType": "text",
"path": "/user/test/data/",
"writeMode": "append"
}
}
}
],
"setting": {
"speed": {
"channel": "2"
}
}
}
}