1.测试MySQL连接
bin/sqoop list-databases --connect jdbc:mysql://192.168.1.187:3306/trade_dev --username 'mysql' --password '111111'
2.检验SQL语句
bin/sqoop eval --connect jdbc:mysql://192.168.1.187:3306/trade_dev --username 'mysql' --password '111111' --query "SELECT * FROM TB_REGION WHERE REGION_ID = '00A1719A489D4F49906A8CA9661CCBE8'"
3.导入hdfs
3.1 导入
bin/sqoop import --connect jdbc:mysql://192.168.1.187:3306/trade_dev --username 'mysql' --password '111111' --table TB_REGION --target-dir /sqoop/mysql/trade_dev/tb_region -m 5 --columns "code,name,category,farthercode,visible,regionlevel,region_id" --direct
3.2验证
hdfs dfs -cat /sqoop/mysql/trade_dev_tb_region/*01
4.导入hbase
4.1新建hbase表
hbase shell
create 'mysql_trade_dev', 'region'
4.2导入mysql数据到hbase
bin/sqoop import --connect jdbc:mysql://192.168.1.187:3306/trade_dev --username 'mysql' --password '111111' --table TB_REGION --hbase-table mysql_trade_dev --hbase-row-key REGION_ID --column-family region
4.3验证
scan 'mysql_trade_dev'
count 'mysql_trade_dev'
5.导入hive
bin/sqoop import --connect jdbc:mysql://192.168.1.187:3306/trade_dev --username 'mysql' --password '111111' --table TB_REGION --hive-import --create-hive-table --target-dir /user/hive/warehouse/tb_region --hive-table tb_region
6.增量hive
6.1 初始化导入hdfs
bin/sqoop job import --connect jdbc:mysql://192.168.1.187:3306/trade_dev --username mysql --password 111111 --table TB_DICTIONARY -m 1 --target-dir /sqoop/mysql/trade_dev/tb_dic --incremental append --check-column DIC_ID
返回数据:
16/09/07 10:27:06 INFO tool.ImportTool: --incremental append
16/09/07 10:27:06 INFO tool.ImportTool: --check-column DIC_ID
16/09/07 10:27:06 INFO tool.ImportTool: --last-value 287
16/09/07 10:27:06 INFO tool.ImportTool: (Consider saving this with 'sqoop job --create')
6.2 创建hive外部表
CREATE EXTERNAL TABLE tb_dic (DIC_ID int, DOMAIN_ID STRING, DIC_TYPE_ID int, DESCRIPTION STRING, CODE int, NAME STRING, MNEMONIC STRING, ATTRIBUTE STRING, MARK_FOR_DEFAULT int, MARK_FOR_DELETE int, OPT_COUNTER int, CREATE_DATE STRING, CREATE_BY STRING, LAST_MODIFIED_DATE STRING, LAST_MODIFIED_BY STRING, ATTRIBUTE1 int, ATTRIBUTE2 int, ATTRIBUTE3 STRING, ATTRIBUTE4 STRING, ATTRIBUTE5 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE location '/sqoop/mysql/trade_dev/tb_dic';
mysql建表语句
DROP TABLE IF EXISTS `TB_DICTIONARY`;
CREATE TABLE `TB_DICTIONARY` (
`DIC_ID` int(11) NOT NULL AUTO_INCREMENT COMMENT '字典ID',
`DOMAIN_ID` varchar(45) NOT NULL DEFAULT 'domain1' COMMENT '服务域区分的ID',
`DIC_TYPE_ID` int(11) NOT NULL COMMENT '字典类型ID-外键-TB_DICTIONARY_TYPE',
`DESCRIPTION` varchar(1024) NOT NULL COMMENT '转义码解释',
`CODE` tinyint(2) NOT NULL COMMENT '转义码',
`NAME` varchar(45) NOT NULL COMMENT '转义码对应含义',
`MNEMONIC` varchar(45) DEFAULT NULL COMMENT '助记码',
`ATTRIBUTE` varchar(45) DEFAULT NULL COMMENT '当前字典属性:如计量单位的量纲类型',
`MARK_FOR_DEFAULT` tinyint(2) NOT NULL DEFAULT '0' COMMENT '默认标记(1为默认,0为非默认)',
`MARK_FOR_DELETE` tinyint(2) NOT NULL DEFAULT '1' COMMENT '是否有效,1:有效;0:无效',
`OPT_COUNTER` int(5) DEFAULT NULL COMMENT '版本管理标志',
`CREATE_DATE` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建日期',
`CREATE_BY` varchar(45) NOT NULL DEFAULT 'admin' COMMENT '创建人ID',
`LAST_MODIFIED_DATE` datetime DEFAULT NULL COMMENT '修改日期',
`LAST_MODIFIED_BY` varchar(45) DEFAULT NULL COMMENT '修改人ID',
`ATTRIBUTE1` int(11) DEFAULT NULL,
`ATTRIBUTE2` int(11) DEFAULT NULL,
`ATTRIBUTE3` varchar(45) DEFAULT NULL,
`ATTRIBUTE4` varchar(45) DEFAULT NULL,
`ATTRIBUTE5` date DEFAULT NULL,
PRIMARY KEY (`DIC_ID`)
) ENGINE=InnoDB AUTO_INCREMENT=290 DEFAULT CHARSET=utf8 COMMENT='字典表-李思宇';
SET FOREIGN_KEY_CHECKS = 1;
6.3 更新增量数据
bin/sqoop job --create incjob -- import --connect jdbc:mysql://192.168.1.187:3306/trade_dev --username mysql --password 111111 --table TB_DICTIONARY -m 1 --target-dir /sqoop/mysql/trade_dev/tb_dic --incremental append --check-column DIC_ID --last-value 287
bin/sqoop job --exec incjob
6.4 验证
select count(*) from tb_dic;
返回数据:
第一次
Time taken: 0.068 seconds, Fetched: 489 row(s)
第二次
Time taken: 0.068 seconds, Fetched: 490 row(s)
7.整库导入测试
7.1新建hbase表
hbase shell
create 'new_table','data'
7.2导入mysql数据到hbase
bin/sqoop import-all-tables --connect jdbc:mysql://192.168.1.187:3306/new_schema --username mysql --password 111111 --hbase-create-table --hbase-table new_table --column-family data --hbase-bulkload
注意
整库导入要求每个表都有主键,不然会报错
16/09/08 15:03:50 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-xdata/compile/070fa1eda1e77fc70eaa0c532cfa94b8/nopk.jar
16/09/08 15:03:50 ERROR tool.ImportAllTablesTool: Error during import: No primary key could be found for table nopk. Please specify one with --split-by or perform a sequential import with '-m 1'.
或者每个表同一个可以做rowkey的字段(如id),增加--hbase-row-key id
7.3验证
scan 'new_table'
count 'new_table'
参考
sqoop incremental import in hive i get error message hive not support append mode how to solve that
sqoop incremental import working fine ,now i want know how to update value move into hive table