DataX不需要依赖其他服务,直接将DataX上传解压配置环境变量即可
官方安装包下载地址
编写脚本并保存为 MySQLToHive.json
注意:编写脚本的时候Hive表中的字段分割符要与"fieldDelimiter": ","一致。
如下:
{
"job":
{
"setting":
{
"speed":
{
"channel": 3
}
},
"content":
[
{
"reader":
{
"name": "mysqlreader",
"parameter":
{
"username": "root",
"password": "123456",
"column":
[
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"splitPk": "id",
"connection":
[
{
"table":
[
"student"
],
"jdbcUrl":
[
"jdbc:mysql://master:3306/student?useUnicode=true&characterEncoding=utf8"
]
}
]
}
},
"writer":
{
"name": "hdfswriter",
"parameter":
{
"defaultFS": "hdfs://master:9000",
"fileType": "text",
"path": "/user/hive/warehouse/student.db/student",
"fileName": "test",
"column":
[
{
"name": "id",
"type": "int"
},
{
"name": "name",
"type": "string"
},
{
"name": "age",
"type": "int"
},
{
"name": "gender",
"type": "string"
},
{
"name": "clazz",
"type": "string"
},
{
"name": "last_mod",
"type": "TIMESTAMP"
}
],
"writeMode": "append",
"fieldDelimiter": ","
}
}
}
]
}
}
datax.py MySQLToHive.json
执行结束,去Hive查询结果如下
“hbaseConfig”:
{
“hbase.zookeeper.quorum”: “master:2181,node1:2181,node2:2181”
}
指定zookeeper的地址
{
"job":
{
"setting":
{
"speed":
{
"channel": 3
}
},
"content":
[
{
"reader":
{
"name": "mysqlreader",
"parameter":
{
"username": "root",
"password": "123456",
"column":
[
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"splitPk": "id",
"connection":
[
{
"table":
[
"student"
],
"jdbcUrl":
[
"jdbc:mysql://master:3306/student?useUnicode=true&characterEncoding=utf8"
]
}
]
}
},
"writer":
{
"name": "hbase11xwriter",
"parameter":
{
"hbaseConfig":
{
"hbase.zookeeper.quorum": "master:2181,node1:2181,node2:2181"
},
"table": "student",
"mode": "normal",
"rowkeyColumn":
[
{
"index": 0,
"type": "string"
}
],
"column":
[
{
"index": 1,
"name": "cf1:name",
"type": "string"
},
{
"index": 2,
"name": "cf1:age",
"type": "int"
},
{
"index": 3,
"name": "cf1:gender",
"type": "string"
},
{
"index": 4,
"name": "cf1:clazz",
"type": "string"
},
{
"index": 5,
"name": "cf1:last_mod",
"type": "string"
}
],
"encoding": "utf-8"
}
}
}
]
}
}
datax.py MySQLToHbase.json
注意上面的数据类型
例如:在Hive中的数据类型是timestamp在编写脚本的时候就应该使用Date数据类型,否则将会出现脏数据。
{
"job":
{
"setting":
{
"speed":
{
"channel": 3
}
},
"content":
[
{
"reader":
{
"name": "hdfsreader",
"parameter":
{
"path": "/user/hive/warehouse/student.db/student/*",
"defaultFS": "hdfs://master:9000",
"column":
[
{
"index": 0,
"type": "long"
},
{
"index": 1,
"type": "string"
},
{
"index": 2,
"type": "long"
},
{
"index": 3,
"type": "string"
},
{
"index": 4,
"type": "string"
},
{
"index": 5,
"type": "date"
}
],
"fileType": "text",
"encoding": "UTF-8",
"fieldDelimiter": ","
}
},
"writer":
{
"name": "mysqlwriter",
"parameter":
{
"writeMode": "insert",
"username": "root",
"password": "123456",
"column":
[
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"connection":
[
{
"jdbcUrl": "jdbc:mysql://master:3306/student?useUnicode=true&characterEncoding=utf8",
"table":
[
"student"
]
}
]
}
}
}
]
}
}
datax.py HiveToMySQL.json
去MySQL中查看student数据,查看数据完整,表示导入数据完成。