环境说明
10.176.2.101 master
10.176.2.103 zjx03
10.176.2.105 zjx05
cent-os6.5
zookeeper cdh 3.4.5
hadoop apache 2.7.7
mysql 5.17
jdk 1.8.191
sqoop 1.4.7 (2.x不稳定,使用1.x)
sqoop安装
本人将sqoop部署在master机器上,mysql安装在zjx03上
1 下载sqoop应用包
mkdir /opt/softwares/
cd /opt/softwares
wget https://mirrors.tuna.tsinghua.edu.cn/apache/sqoop/1.4.7/sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz
tar –xzvf sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz –C /opt/softwares
rm -rf sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz
修改配置文件
cd /opt/softwares/sqoop-1.4.7.bin__hadoop-2.6.0/conf
cp sqoop-env-template.sh sqoop-env.sh
vim sqoop-env.sh
export HADOOP_COMMON_HOME=/opt/softwares/hadoop-2.7.7
#Set path to where hadoop-*-core.jar is available
export HADOOP_MAPRED_HOME=/opt/softwares/hadoop-2.7.7
#set the path to where bin/hbase is available
#export HBASE_HOME=
#Set the path to where bin/hive is available
#export HIVE_HOME=
#Set the path for where zookeper config dir is
export ZOOCFGDIR=/opt/softwares/zookeeper-3.4.5-cdh5.12.2
配置环境变量
vim /etc/profile (root用户或者使用vim ~/.bashrc)
export SQOOP_HOME=/opt/softwares/sqoop-1.4.7.bin__hadoop-2.6.0
export PATH=$PATH:$SQOOP_HOME/bin
export CLASSPATH=$CLASSPATH:${SQOOP_HOME}/lib
source /etc/profile
mysql环境准备
卸载系统自带的mysql
执行以下命令查看是否有自带mysql:
rpm -qa | grep mysql
如果发现有输出结果,那么执行以下命令依次删除:
rpm -e **** --nodeps
开始安装mysql
首先安装mysql客户端:
yum install mysql
然后安装mysql服务端:
yum install mysql-server
yum install mysql-devel
至此mysql相关组件已经安装成功!
修改字符集
mysql的配置文件路径为:
/etc/my.cnf
在配置文件中加入如下配置来修改字符集(本机加上字符集有问题,暂将其注释):
vim /etc/my.cnf
default-character-set=utf8
启动和停止mysql
service mysqld start
service mysqld status
service mysqld stop
将mysql加入开机启动,下次重启机器就不用重启mysql服务
chkconfig --level 345 mysqld on
chkconfig --list | grep mysql
配置初始密码
mysqladmin -u root password root (其中设置密码为root)
登录mysql
mysql -uroot -proot
允许远程登录
sql:
GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY 'root' WITH GRANT OPTION;
FLUSH RIVILEGES;
完成mysql环境准备
mysql安装参考:
https://www.jianshu.com/p/7a3656ec6927
mysql数据库创建相关参考:
https://www.cnblogs.com/jiangxiaobo/p/7089345.html
复制sqoop相关依赖包
下载一个mysql驱动jar包mysql-connector-java-5.1.33.jar放入{SQOOP_HOME}/lib,本地有的话
cp ./mysql-connector-java-5.1.33-bin.jar ${SQOOP_HOME}/lib
cp $HADOOP_HOME/share/hadoop/common/hadoop-common-2.7.7.jar ${SQOOP_HOME}/lib
修改$SQOOP_HOME/bin/configure-sqoop
注释掉HCatalog,Accumulo检查(除非你准备使用HCatalog,Accumulo等HADOOP上的组件)
##Moved to be a runtime check in sqoop.
#if[ ! -d "${HCAT_HOME}" ]; then
# echo "Warning: $HCAT_HOME does notexist! HCatalog jobs will fail."
# echo 'Please set $HCAT_HOME to the root ofyour HCatalog installation.'
#fi
#if[ ! -d "${ACCUMULO_HOME}" ]; then
# echo "Warning: $ACCUMULO_HOME does notexist! Accumulo imports will fail."
# echo 'Please set $ACCUMULO_HOME to the rootof your Accumulo installation.'
#fi
#Add HCatalog to dependency list
#if[ -e "${HCAT_HOME}/bin/hcat" ]; then
# TMP_SQOOP_CLASSPATH=${SQOOP_CLASSPATH}:`${HCAT_HOME}/bin/hcat-classpath`
# if [ -z "${HIVE_CONF_DIR}" ]; then
# TMP_SQOOP_CLASSPATH=${TMP_SQOOP_CLASSPATH}:${HIVE_CONF_DIR}
# fi
# SQOOP_CLASSPATH=${TMP_SQOOP_CLASSPATH}
#fi
#Add Accumulo to dependency list
#if[ -e "$ACCUMULO_HOME/bin/accumulo" ]; then
# for jn in `$ACCUMULO_HOME/bin/accumuloclasspath | grep file:.*accumulo.*jar |cut -d':' -f2`; do
# SQOOP_CLASSPATH=$SQOOP_CLASSPATH:$jn
# done
# for jn in `$ACCUMULO_HOME/bin/accumuloclasspath | grep file:.*zookeeper.*jar |cut -d':' -f2`; do
# SQOOP_CLASSPATH=$SQOOP_CLASSPATH:$jn
# done
#fi
测试与mysql是否连通
sqoop list-databases --connect jdbc:mysql://10.176.2.103:3306/ --username root -P
#或者
sqoop list-databases --connect jdbc:mysql://10.176.2.103:3306/ --username root -password root
#同上述连通显示所有数据库
sqoop list-databases --username root --password root --connect jdbc:mysql://10.176.2.103:3306/
主机zjx03(10.176.2.103)登录mysql
mysql -uroot -proot
sql:
show databases;
# 数据库创建
CREATE DATABASE IF NOT EXISTS db_zjx default charset utf8 COLLATE utf8_general_ci;
# 数据库创建简单操作
create database db_zjx;
use db_zjx;
show tables;
create table tbl_sqoop(name varchar(11),age int ,sex varchar(11));
desc tbl_sqoop;
insert into tbl_sqoop value('zs',11,'girl');
insert into tbl_sqoop value('ls',12,'girl');
insert into tbl_sqoop value('zjx',25,'boy');
select * from tbl_sqoop;
exit;
主机master节点(10.176.2.101)进行sqoop操作,linux:
#列出数据库下的所有表
sqoop list-tables --username root --password root --connect jdbc:mysql://10.176.2.103:3306/db_zjx
sqoop导入导出操作练习
#将tbl_sqoop表中的数据导入到HDFS的/tmp/zhoujixiang/sqoop_data目录中(该目录自动创建)
sqoop import --username root --password root --connect jdbc:mysql://10.176.2.103:3306/db_zjx --query "select * from tbl_sqoop where \$CONDITIONS" --target-dir /tmp/zhoujixiang/sqoop_data/ --fields-terminated-by ',' --split-by id -m 1
hadoop fs -ls /tmp/
#查看导入HDFS的数据文件
hadoop fs -ls /tmp/zhoujixiang/
hadoop fs -ls /tmp/zhoujixiang/sqoop_data/
hadoop fs -cat /tmp/zhoujixiang/sqoop_data/*
# result
zs,11,girl
ls,12,girl
zjx,25,boy
安装部署sqoop参考链接:
https://www.cnblogs.com/harrymore/p/9056863.html