程序目的:从已有的mysql数据导入到hdfs中,使用java编程的方式减少安装sqoop本地环境的麻烦。
CREATE DATABASE test;
USE test;
CREATE TABLE `vote_record` (
`id` INT(11) NOT NULL AUTO_INCREMENT,
`user_id` VARCHAR(20) NOT NULL,
`vote_id` INT(11) NOT NULL,
`group_id` INT(11) NOT NULL,
`create_time` DATETIME NOT NULL,
PRIMARY KEY (`id`),
KEY `index_user_id` (`user_id`) USING HASH
)
ENGINE = INNODB
AUTO_INCREMENT = 1
DEFAULT CHARSET = utf8;
CREATE FUNCTION `rand_string`(n INT)
RETURNS VARCHAR(255) CHARSET latin1
BEGIN
DECLARE chars_str VARCHAR(100) DEFAULT 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
DECLARE return_str VARCHAR(255) DEFAULT '';
DECLARE i INT DEFAULT 0;
WHILE i < n DO
SET return_str = concat(return_str, substring(chars_str, FLOOR(1 + RAND() * 62), 1));
SET i = i + 1;
END WHILE;
RETURN return_str;
END;
CREATE PROCEDURE `add_vote_record`(IN n INT)
BEGIN
DECLARE i INT DEFAULT 1;
WHILE (i <= n) DO
INSERT INTO vote_record (user_id, vote_id, group_id, create_time)
VALUES (rand_string(20), FLOOR(RAND() * 1000), FLOOR(RAND() * 100), now());
SET i = i + 1;
END WHILE;
END;
CALL add_vote_record(1000000);
<dependencies>
<dependency>
<groupId>junitgroupId>
<artifactId>junitartifactId>
<version>4.11version>
<scope>testscope>
dependency>
<dependency>
<groupId>mysqlgroupId>
<artifactId>mysql-connector-javaartifactId>
<version>5.1.46version>
dependency>
<dependency>
<groupId>org.apache.sqoopgroupId>
<artifactId>sqoopartifactId>
<version>1.4.7version>
<classifier>hadoop260classifier>
dependency>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-clientartifactId>
<version>2.9.0version>
dependency>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-commonartifactId>
<version>2.9.0version>
dependency>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-hdfsartifactId>
<version>2.9.0version>
dependency>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-mapreduce-client-commonartifactId>
<version>2.9.0version>
dependency>
<dependency>
<groupId>org.apache.avrogroupId>
<artifactId>avroartifactId>
<version>1.8.2version>
dependency>
dependencies>
package cn.hackcoder;
import org.apache.hadoop.conf.Configuration;
import org.apache.sqoop.Sqoop;
import org.apache.sqoop.tool.SqoopTool;
import org.apache.sqoop.util.OptionsFileUtil;
/**
* Created by linzhichao on 2018/5/15.
*/
public class SqoopUtils {
private static int importDataFromMysql() throws Exception {
String[] args = new String[]{
"--connect", "jdbc:mysql://127.0.0.1:3306/test",
"--driver", "com.mysql.jdbc.Driver",
"-username", "root",
"-password", "system",
"--table", "vote_record",
"-m", "1",
"--target-dir", "/user/root/import_vote_record"
};
String[] expandArguments = OptionsFileUtil.expandArguments(args);
SqoopTool tool = SqoopTool.getTool("import");
Configuration conf = new Configuration();
conf.set("fs.default.name", "hdfs://127.0.0.1:9000");//设置HDFS服务地址
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
Configuration loadPlugins = SqoopTool.loadPlugins(conf);
Sqoop sqoop = new Sqoop((com.cloudera.sqoop.tool.SqoopTool) tool, loadPlugins);
return Sqoop.runSqoop(sqoop, expandArguments);
}
private static int listTablesFromMysql() throws Exception {
String[] args = new String[]{
"--connect", "jdbc:mysql://127.0.0.1:3306/test",
"-username", "root",
"-password", "system",
};
String[] expandArguments = OptionsFileUtil.expandArguments(args);
Configuration pluginConf = SqoopTool.loadPlugins(new Configuration());
com.cloudera.sqoop.tool.SqoopTool tool = (com.cloudera.sqoop.tool.SqoopTool) SqoopTool
.getTool("list-tables");
if (null == tool) {
System.err.println("No such sqoop tool: list-tables See 'sqoop help'.");
}
Sqoop sqoop = new Sqoop(tool, pluginConf);
return Sqoop.runSqoop(sqoop, expandArguments);
}
public static void main(String[] args) throws Exception {
System.out.println(listTablesFromMysql());
importDataFromMysql();
}
}
- 使用中需要特别注意hadoop的版本需要与maven中hadoop配置的版本兼容。
- 使用mysql开启binlog模式时,创建mysql函数需要执行
set global log_bin_trust_function_creators=TRUE;