tar -zxvf 安装包路径 -C 解压路径
cp 解压路径/conf/sqoop-env-template.sh sqoop-env.sh
#export HADOOP_COMMON_HOME=
export HADOOP_COMMON_HOME=/home/dong/hadoop/hadoop
#Set path to where hadoop-*-core.jar is available
#export HADOOP_MAPRED_HOME=
export HADOOP_MAPRED_HOME=/home/dong/hadoop/hadoop
#set the path to where bin/hbase is available
#export HBASE_HOME=
export HBASE_HOME=/home/dong/hadoop/hbase
#Set the path to where bin/hive is available
#export HIVE_HOME=
export HIVE_HOME=/home/dong/hadoop/hive
#Set the path for where zookeper config dir is
#export ZOOCFGDIR=
export ZOOKEEPER_HOME=/home/dong/hadoop/zookeeper
export ZOOCFGDIR=/home/dong/hadoop/zookeeper
vim ~/.bashrc
export SQOOP_HOME=sqoop路径
export PATH=$PATH:$SQOOP_HOME/bin
export CLASSPATH=$CLASSPATH:$SQOOP_HOME/lib
source ~/.bashrc
# 测试,正常使用的话,会返回mysql的所有数据库名称
sqoop list-databases --connect jdbc:mysql://127.0.0.1:3306/ --username root -password rt123@RT456
如果报错
# ERROR manager.CatalogQueryManager: Failed to list databases
# com.mysql.cj.jdbc.exceptions.CommunicationsException: Communications link failure
# 需要将mysql捆绑ip功能拿掉
sudo vim /etc/mysql/mysql.conf.d/mysqld.cnf
注释掉
bind-address = 127.0.0.1
并重启mysql服务
create database sqoop_test;
create table student(id int,name char(5),age int);
insert into student values (1,"zhao",10),(2,"qian",11),(3,"sun",9),4,"li",17),(5,"zhou",14);
全表导入
sqoop import \
--connect jdbc:mysql://127.0.0.1:3306/sqoop_test \
--username root \
--password rt123@RT456\
--table student \
--target-dir /sqoop_test/ \
--delete-target-dir \
--num-mappers 1 \
--fields-terminated-by "\t"
参数解释 target-dir 为导入的hdfs文件夹名,如果该路径不存在,则自动创建
delete-target-dir 为如果target-dir路径已经存在,则删除
num-mappers 启用的mapper数量,即并行度
fields-terminated-by hdfs文件的分割符
查询导入
sqoop import \
--connect jdbc:mysql://127.0.0.1:3306/sqoop_test \
--username root \
--password rt123@RT456\
--target-dir /sqoop_test/ \
--delete-target-dir \
--num-mappers 1 \
--fields-terminated-by "\t" \
--query 'select * from student where id >= 1 and $CONDITIONS;'
参数解释 query 给定一个查询sql,sqoop将sql的查询结果导入到指定的hdfs路径
query需要用''包住sql语句,且where语句后必须添加and $CONDITIONS,该句的作用是保证hdfs的结果顺序与mysql的顺序一致;如果使用""包住sql语法,则$CONDITIONS前要加\
该方法无需指定参数table
导入指定列
sqoop import \
--connect jdbc:mysql://127.0.0.1:3306/sqoop_test \
--username root \
--password rt123@RT456\
--table student \
--columns id,name \
--target-dir /sqoop_test/ \
--delete-target-dir \
--num-mappers 1 \
--fields-terminated-by "\t" \
--where "id=1"
参数解释 columns 指定mysq需要导入的列
where 给出过滤条件,无需像query一样需要写完整的sql
query无法与where同时使用
sqoop import \
--connect jdbc:mysql://localhost:3306/sqoop_test \
--username root \
--password rt123@RT456 \
--table student \
--num-mappers 1 \
--hive-import \
--fields-terminated-by "\t" \
--hive-overwrite \
--hive-table student_hive
底层实现:从mysql导入到hdfs再导入到hive
--hive-partition-key
--hive-partition-value 分区参数
sqoop import \
--connect jdbc:mysql://localhost:3306/sqoop_test \
--username root \
--password rt123@RT456 \
--table student \
--columns "id,name,age" \
--column-family "info" \
--hbase-create-table \
--hbase-row-key "id" \
--hbase-table "student_hbase" \
--num-mappers 1 \
--split-by id
底层实现:直接从mysql到hbase
--hbase-row-key 用于指定hbase的row_key,如果该参数为空,则hbase默认以--split-by为row_key
如果--hbase-row-key需要指定多个列,可用逗号分割开列,如"column1,column2,...",但列之前不能有空格,否则空格也会当成列名的一部分,从而报row key为null的错误
虽说--hbase-row-key可以指定多个列,但最终在hbase表内仍只有一个row_key,hbase会以"_"将所有列拼接成一列
sqoop export \
--connect jdbc:mysql://localhost:3306/sqoop_test \
--username root \
--password rt123@RT456 \
--table student \
--num-mappers 1 \
--export-dir /user/hive/warehouse/student_hive \
--input-fields-terminated-by "\t"
vim xxx.opt
export
--connect
jdbc:mysql://localhost:3306/sqoop_test
--username
root
--password
rt123@RT456
--table student
--num-mappers
1
--export-dir
/user/hive/warehouse/student_hive
--input-fields-terminated-by
"\t"
#注:以sqoop1.4.7为例,该方式必须一行参数一行参数值,否则会报错;
#并且参数值无需加"",比如 --query SELECT * FROM xxx WHERE $CONDITIONS即可
执行方式:sqoop --options-file xxx.opt
或者 vim xxx.sh
#!/bin/bash
sqoop export \
--connect jdbc:mysql://localhost:3306/sqoop_test \
--username root \
--password rt123@RT456 \
--table student \
--num-mappers 1 \
--export-dir /user/hive/warehouse/student_hive \
--input-fields-terminated-by "\t"
执行方式 sh xxx.sh
1.query where
2.incremental
append模式
sqoop import \
--connect jdbc:mysql://localhost:3306/sqoop_test \
--username root \
--password rt123@RT456 \
--table student \
--driver com.mysql.jdbc.Driver \
--target-dir /user/hive/student_hive/ \
--split-by id \
--check-column id \
--incremental append \
--last-value 0 \
--fields-terminated-by '\t' \
--null-string '\\N' \
--null-non-string '0'
--check-column指定列增量导入,必须是递增的,且不为char类型
--last-value 上一次--chceck-column已经导入到值为几的数据,本次导入为--check-column >= last-value的数据
--incremental append 按自增方式导入
现在每次导入需要手动修改--last-value的值,可以通过以下方式自动修改参数值
sqoop job --create X -- import \
--connect jdbc:mysql://localhost:3306/sqoop_test \
--username root \
--table student \
--target-dir /user/hive/student_hive/ \
--split-by id \
--check-column id \
--incremental append \
--last-value 0 \
--fields-terminated-by '\t' \
--null-string '\\N' \
--null-non-string '0' \
--password-file /sqoop/pwd/sqoopPWD.pwd
先执行上述脚本,以后每次可执行sqoop job --exec X
使用sqoop job --exec的方式执行任务可能需要手动输入密码,解决办法是将mysql密码放至hdfs中
lastmodified模式
sqoop job --create X -- import \
--connect jdbc:mysql://localhost:3306/sqoop_test \
--username root \
--table student \
--target-dir /user/hive/student_hive/ \
--split-by id \
--check-column date_column \
--incremental lastmodified \
--append \
--last-value 0 \
--fields-terminated-by '\t' \
--null-string '\\N' \
--null-non-string '0' \
--password-file /sqoop/pwd/sqoopPWD.pwd
按某个日期列导入数据,每次导入check-column列时间在last-alue到脚本执行时间之间的数据
与上一种自增不同之处在使用参数 --incremetal lastmodified
--append
ERROR hive.HiveConfig: Could not load org.apache.hadoop.hive.conf.HiveConf. Make sure HIVE_CONF_DIR is set correctly.
解决办法
vim ~/.bashrc
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$HIVE_HOME/lib/*
source ~/.bashrc
或者将$HIVE_HOME/lib/hive-common* 复制到$SQOOP_HOME/lib
2020-11-29 15:09:49,490 ERROR tool.ImportTool: Import failed: java.io.IOException: Exception thrown in Hive
at org.apache.sqoop.hive.HiveImport.executeScript(HiveImport.java:358)
at org.apache.sqoop.hive.HiveImport.importTable(HiveImport.java:241)
at org.apache.sqoop.tool.ImportTool.importTable(ImportTool.java:537)
at org.apache.sqoop.tool.ImportTool.run(ImportTool.java:628)
at org.apache.sqoop.Sqoop.run(Sqoop.java:147)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76)
at org.apache.sqoop.Sqoop.runSqoop(Sqoop.java:183)
at org.apache.sqoop.Sqoop.runTool(Sqoop.java:234)
at org.apache.sqoop.Sqoop.runTool(Sqoop.java:243)
at org.apache.sqoop.Sqoop.main(Sqoop.java:252)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.sqoop.hive.HiveImport.executeScript(HiveImport.java:331)
... 9 more
Caused by: java.lang.NoSuchMethodError: com.lmax.disruptor.dsl.Disruptor.<init>(Lcom/lmax/disruptor/EventFactory;ILjava/util/concurrent/ThreadFactory;Lcom/lmax/disruptor/dsl/ProducerType;Lcom/lmax/disruptor/WaitStrategy;)V
解决办法
将$HIVE_HOME/lib/Disruptor*复制到$SQOOP_HOME/lib
Error: java.lang.RuntimeException: Could not access HBase table student_hbase
at org.apache.sqoop.hbase.HBasePutProcessor.setConf(HBasePutProcessor.java:114)
at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:77)
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:137)
at org.apache.sqoop.mapreduce.DelegatingOutputFormat$DelegatingRecordWriter.<init>(DelegatingOutputFormat.java:107)
at org.apache.sqoop.mapreduce.DelegatingOutputFormat.getRecordWriter(DelegatingOutputFormat.java:82)
at org.apache.hadoop.mapred.MapTask$NewDirectOutputCollector.<init>(MapTask.java:659)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:779)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:347)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:174)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:168)
Caused by: java.io.IOException: java.lang.reflect.InvocationTargetException
at org.apache.hadoop.hbase.client.ConnectionFactory.createConnection(ConnectionFactory.java:240)
at org.apache.hadoop.hbase.client.ConnectionManager.createConnection(ConnectionManager.java:439)
at org.apache.hadoop.hbase.client.ConnectionManager.createConnection(ConnectionManager.java:432)
at org.apache.hadoop.hbase.client.ConnectionManager.getConnectionInternal(ConnectionManager.java:310)
at org.apache.hadoop.hbase.client.HTable.<init>(HTable.java:185)
at org.apache.hadoop.hbase.client.HTable.<init>(HTable.java:151)
at org.apache.sqoop.hbase.HBasePutProcessor.setConf(HBasePutProcessor.java:112)
... 12 more
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.hbase.client.ConnectionFactory.createConnection(ConnectionFactory.java:238)
... 18 more
Caused by: java.lang.NoClassDefFoundError: org/apache/commons/lang/ArrayUtils
at org.apache.hadoop.hbase.HConstants.<clinit>(HConstants.java:1111)
at org.apache.hadoop.hbase.client.ConnectionUtils.getPauseTime(ConnectionUtils.java:63)
at org.apache.hadoop.hbase.client.AsyncProcess.<init>(AsyncProcess.java:410)
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.createAsyncProcess(ConnectionManager.java:2423)
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.<init>(ConnectionManager.java:700)
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.<init>(ConnectionManager.java:647)
... 23 more
Caused by: java.lang.ClassNotFoundException: org.apache.commons.lang.ArrayUtils
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:352)
at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
... 29 more
解决办法
将$HBASE_HOME/lib/common-lang-xxx.jar复制到$SQOOP_HOME/lib下
ERROR sqoop.Sqoop: Got exception running Sqoop: java.lang.NullPointerException
java.lang.NullPointerException
at org.json.JSONObject.<init>(JSONObject.java:144)
at org.apache.sqoop.util.SqoopJsonUtil.getJsonStringforMap(SqoopJsonUtil.java:43)
at org.apache.sqoop.SqoopOptions.writeProperties(SqoopOptions.java:785)
at org.apache.sqoop.metastore.hsqldb.HsqldbJobStorage.createInternal(HsqldbJobStorage.java:399)
at org.apache.sqoop.metastore.hsqldb.HsqldbJobStorage.create(HsqldbJobStorage.java:379)
at org.apache.sqoop.tool.JobTool.createJob(JobTool.java:181)
at org.apache.sqoop.tool.JobTool.run(JobTool.java:294)
at org.apache.sqoop.Sqoop.run(Sqoop.java:147)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76)
at org.apache.sqoop.Sqoop.runSqoop(Sqoop.java:183)
at org.apache.sqoop.Sqoop.runTool(Sqoop.java:234)
at org.apache.sqoop.Sqoop.runTool(Sqoop.java:243)
at org.apache.sqoop.Sqoop.main(Sqoop.java:252)
解决办法
下载java-json.jar并放到$SQOOP_HOME/lib
jar包下载地址
以sqoop job --exec执行任务需要手动输入秘密
解决办法